Repository: allenai/allenact
Branch: main
Commit: d055fc9d4533
Files: 402
Total size: 2.0 MB
Directory structure:
gitextract_rp45h8jw/
├── .VERSION
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ ├── feature_request.md
│ │ └── support_request.md
│ └── workflows/
│ ├── black.yml
│ ├── codeql.yml
│ ├── publish.yml
│ └── pytest.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CNAME
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── ROADMAP.md
├── allenact/
│ ├── __init__.py
│ ├── _constants.py
│ ├── algorithms/
│ │ ├── __init__.py
│ │ ├── offpolicy_sync/
│ │ │ ├── __init__.py
│ │ │ └── losses/
│ │ │ ├── __init__.py
│ │ │ └── abstract_offpolicy_loss.py
│ │ └── onpolicy_sync/
│ │ ├── __init__.py
│ │ ├── engine.py
│ │ ├── losses/
│ │ │ ├── __init__.py
│ │ │ ├── a2cacktr.py
│ │ │ ├── abstract_loss.py
│ │ │ ├── grouped_action_imitation.py
│ │ │ ├── imitation.py
│ │ │ └── ppo.py
│ │ ├── misc.py
│ │ ├── policy.py
│ │ ├── runner.py
│ │ ├── storage.py
│ │ └── vector_sampled_tasks.py
│ ├── base_abstractions/
│ │ ├── __init__.py
│ │ ├── callbacks.py
│ │ ├── distributions.py
│ │ ├── experiment_config.py
│ │ ├── misc.py
│ │ ├── preprocessor.py
│ │ ├── sensor.py
│ │ └── task.py
│ ├── embodiedai/
│ │ ├── __init__.py
│ │ ├── aux_losses/
│ │ │ ├── __init__.py
│ │ │ └── losses.py
│ │ ├── mapping/
│ │ │ ├── __init__.py
│ │ │ ├── mapping_losses.py
│ │ │ ├── mapping_models/
│ │ │ │ ├── __init__.py
│ │ │ │ └── active_neural_slam.py
│ │ │ └── mapping_utils/
│ │ │ ├── __init__.py
│ │ │ ├── map_builders.py
│ │ │ └── point_cloud_utils.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── aux_models.py
│ │ │ ├── basic_models.py
│ │ │ ├── fusion_models.py
│ │ │ ├── resnet.py
│ │ │ └── visual_nav_models.py
│ │ ├── preprocessors/
│ │ │ ├── __init__.py
│ │ │ └── resnet.py
│ │ ├── sensors/
│ │ │ ├── __init__.py
│ │ │ └── vision_sensors.py
│ │ └── storage/
│ │ ├── __init__.py
│ │ └── vdr_storage.py
│ ├── main.py
│ ├── setup.py
│ └── utils/
│ ├── __init__.py
│ ├── cache_utils.py
│ ├── cacheless_frcnn.py
│ ├── experiment_utils.py
│ ├── inference.py
│ ├── misc_utils.py
│ ├── model_utils.py
│ ├── multi_agent_viz_utils.py
│ ├── spaces_utils.py
│ ├── system.py
│ ├── tensor_utils.py
│ └── viz_utils.py
├── allenact_plugins/
│ ├── __init__.py
│ ├── babyai_plugin/
│ │ ├── __init__.py
│ │ ├── babyai_constants.py
│ │ ├── babyai_models.py
│ │ ├── babyai_tasks.py
│ │ ├── configs/
│ │ │ └── __init__.py
│ │ ├── data/
│ │ │ └── __init__.py
│ │ ├── extra_environment.yml
│ │ ├── extra_requirements.txt
│ │ └── scripts/
│ │ ├── __init__.py
│ │ ├── download_babyai_expert_demos.py
│ │ ├── get_instr_length_percentiles.py
│ │ └── truncate_expert_demos.py
│ ├── clip_plugin/
│ │ ├── __init__.py
│ │ ├── clip_preprocessors.py
│ │ ├── extra_environment.yml
│ │ └── extra_requirements.txt
│ ├── gym_plugin/
│ │ ├── __init__.py
│ │ ├── extra_environment.yml
│ │ ├── extra_requirements.txt
│ │ ├── gym_distributions.py
│ │ ├── gym_environment.py
│ │ ├── gym_models.py
│ │ ├── gym_sensors.py
│ │ └── gym_tasks.py
│ ├── habitat_plugin/
│ │ ├── __init__.py
│ │ ├── data/
│ │ │ └── __init__.py
│ │ ├── extra_environment.yml
│ │ ├── extra_environment_headless.yml
│ │ ├── extra_requirements.txt
│ │ ├── habitat_constants.py
│ │ ├── habitat_environment.py
│ │ ├── habitat_preprocessors.py
│ │ ├── habitat_sensors.py
│ │ ├── habitat_task_samplers.py
│ │ ├── habitat_tasks.py
│ │ ├── habitat_utils.py
│ │ └── scripts/
│ │ ├── __init__.py
│ │ ├── agent_demo.py
│ │ └── make_map.py
│ ├── ithor_plugin/
│ │ ├── __init__.py
│ │ ├── extra_environment.yml
│ │ ├── extra_requirements.txt
│ │ ├── ithor_constants.py
│ │ ├── ithor_environment.py
│ │ ├── ithor_sensors.py
│ │ ├── ithor_task_samplers.py
│ │ ├── ithor_tasks.py
│ │ ├── ithor_util.py
│ │ ├── ithor_viz.py
│ │ └── scripts/
│ │ ├── __init__.py
│ │ ├── make_objectnav_debug_dataset.py
│ │ └── make_pointnav_debug_dataset.py
│ ├── lighthouse_plugin/
│ │ ├── __init__.py
│ │ ├── configs/
│ │ │ └── __init__.py
│ │ ├── data/
│ │ │ └── __init__.py
│ │ ├── extra_environment.yml
│ │ ├── extra_requirements.txt
│ │ ├── lighthouse_environment.py
│ │ ├── lighthouse_models.py
│ │ ├── lighthouse_sensors.py
│ │ ├── lighthouse_tasks.py
│ │ ├── lighthouse_util.py
│ │ └── scripts/
│ │ └── __init__.py
│ ├── manipulathor_plugin/
│ │ ├── __init__.py
│ │ ├── arm_calculation_utils.py
│ │ ├── armpointnav_constants.py
│ │ ├── manipulathor_constants.py
│ │ ├── manipulathor_environment.py
│ │ ├── manipulathor_sensors.py
│ │ ├── manipulathor_task_samplers.py
│ │ ├── manipulathor_tasks.py
│ │ ├── manipulathor_utils.py
│ │ └── manipulathor_viz.py
│ ├── minigrid_plugin/
│ │ ├── __init__.py
│ │ ├── configs/
│ │ │ ├── __init__.py
│ │ │ └── minigrid_nomemory.py
│ │ ├── data/
│ │ │ └── __init__.py
│ │ ├── extra_environment.yml
│ │ ├── extra_requirements.txt
│ │ ├── minigrid_environments.py
│ │ ├── minigrid_models.py
│ │ ├── minigrid_offpolicy.py
│ │ ├── minigrid_sensors.py
│ │ ├── minigrid_tasks.py
│ │ └── scripts/
│ │ └── __init__.py
│ ├── navigation_plugin/
│ │ ├── __init__.py
│ │ ├── objectnav/
│ │ │ ├── __init__.py
│ │ │ └── models.py
│ │ └── pointnav/
│ │ ├── __init__.py
│ │ └── models.py
│ ├── robothor_plugin/
│ │ ├── __init__.py
│ │ ├── configs/
│ │ │ └── __init__.py
│ │ ├── extra_environment.yml
│ │ ├── extra_requirements.txt
│ │ ├── robothor_constants.py
│ │ ├── robothor_distributions.py
│ │ ├── robothor_environment.py
│ │ ├── robothor_models.py
│ │ ├── robothor_preprocessors.py
│ │ ├── robothor_sensors.py
│ │ ├── robothor_task_samplers.py
│ │ ├── robothor_tasks.py
│ │ ├── robothor_viz.py
│ │ └── scripts/
│ │ ├── __init__.py
│ │ ├── make_objectnav_debug_dataset.py
│ │ └── make_pointnav_debug_dataset.py
│ └── setup.py
├── conda/
│ ├── environment-10.1.yml
│ ├── environment-10.2.yml
│ ├── environment-11.1.yml
│ ├── environment-9.2.yml
│ ├── environment-base.yml
│ ├── environment-cpu.yml
│ └── environment-dev.yml
├── constants.py
├── datasets/
│ ├── .gitignore
│ ├── .habitat_datasets_download_info.json
│ ├── .habitat_downloader_helper.py
│ ├── download_habitat_datasets.sh
│ └── download_navigation_datasets.sh
├── dev_requirements.txt
├── docs/
│ ├── .gitignore
│ ├── CNAME
│ ├── FAQ.md
│ ├── css/
│ │ └── extra.css
│ ├── getting_started/
│ │ ├── abstractions.md
│ │ ├── running-your-first-experiment.md
│ │ └── structure.md
│ ├── howtos/
│ │ ├── changing-rewards-and-losses.md
│ │ ├── defining-a-new-model.md
│ │ ├── defining-a-new-task.md
│ │ ├── defining-a-new-training-pipeline.md
│ │ ├── defining-an-experiment.md
│ │ ├── running-a-multi-agent-experiment.md
│ │ └── visualizing-results.md
│ ├── installation/
│ │ ├── download-datasets.md
│ │ ├── installation-allenact.md
│ │ └── installation-framework.md
│ ├── javascripts/
│ │ └── extra.js
│ ├── notebooks/
│ │ └── firstbook.md
│ ├── projects/
│ │ ├── advisor_2020/
│ │ │ └── README.md
│ │ ├── babyai_baselines/
│ │ │ └── README.md
│ │ ├── gym_baselines/
│ │ │ └── README.md
│ │ ├── objectnav_baselines/
│ │ │ └── README.md
│ │ ├── pointnav_baselines/
│ │ │ └── README.md
│ │ └── two_body_problem_2019/
│ │ └── README.md
│ └── tutorials/
│ ├── distributed-objectnav-tutorial.md
│ ├── gym-mujoco-tutorial.md
│ ├── gym-tutorial.md
│ ├── index.md
│ ├── minigrid-tutorial.md
│ ├── offpolicy-tutorial.md
│ ├── running-inference-on-a-pretrained-model.md
│ ├── training-a-pointnav-model.md
│ ├── training-pipelines.md
│ └── transfering-to-a-different-environment-framework.md
├── main.py
├── mkdocs.yml
├── mypy.ini
├── overrides/
│ └── main.html
├── pretrained_model_ckpts/
│ ├── .gitignore
│ └── download_navigation_model_ckpts.sh
├── projects/
│ ├── __init__.py
│ ├── babyai_baselines/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ └── experiments/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── go_to_local/
│ │ │ ├── __init__.py
│ │ │ ├── a2c.py
│ │ │ ├── base.py
│ │ │ ├── bc.py
│ │ │ ├── bc_teacher_forcing.py
│ │ │ ├── dagger.py
│ │ │ ├── distributed_bc_offpolicy.py
│ │ │ ├── distributed_bc_teacher_forcing.py
│ │ │ └── ppo.py
│ │ └── go_to_obj/
│ │ ├── __init__.py
│ │ ├── a2c.py
│ │ ├── base.py
│ │ ├── bc.py
│ │ ├── bc_teacher_forcing.py
│ │ ├── dagger.py
│ │ └── ppo.py
│ ├── gym_baselines/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── experiments/
│ │ │ ├── __init__.py
│ │ │ ├── gym_base.py
│ │ │ ├── gym_humanoid_base.py
│ │ │ ├── gym_humanoid_ddppo.py
│ │ │ ├── gym_mujoco_base.py
│ │ │ ├── gym_mujoco_ddppo.py
│ │ │ └── mujoco/
│ │ │ ├── __init__.py
│ │ │ ├── gym_mujoco_ant_ddppo.py
│ │ │ ├── gym_mujoco_halfcheetah_ddppo.py
│ │ │ ├── gym_mujoco_hopper_ddppo.py
│ │ │ ├── gym_mujoco_humanoid_ddppo.py
│ │ │ ├── gym_mujoco_inverteddoublependulum_ddppo.py
│ │ │ ├── gym_mujoco_invertedpendulum_ddppo.py
│ │ │ ├── gym_mujoco_reacher_ddppo.py
│ │ │ ├── gym_mujoco_swimmer_ddppo.py
│ │ │ └── gym_mujoco_walker2d_ddppo.py
│ │ └── models/
│ │ ├── __init__.py
│ │ └── gym_models.py
│ ├── manipulathor_baselines/
│ │ ├── __init__.py
│ │ └── armpointnav_baselines/
│ │ ├── __init__.py
│ │ ├── experiments/
│ │ │ ├── __init__.py
│ │ │ ├── armpointnav_base.py
│ │ │ ├── armpointnav_mixin_ddppo.py
│ │ │ ├── armpointnav_mixin_simplegru.py
│ │ │ ├── armpointnav_thor_base.py
│ │ │ └── ithor/
│ │ │ ├── __init__.py
│ │ │ ├── armpointnav_depth.py
│ │ │ ├── armpointnav_disjoint_depth.py
│ │ │ ├── armpointnav_ithor_base.py
│ │ │ ├── armpointnav_no_vision.py
│ │ │ ├── armpointnav_rgb.py
│ │ │ └── armpointnav_rgbdepth.py
│ │ └── models/
│ │ ├── __init__.py
│ │ ├── arm_pointnav_models.py
│ │ ├── base_models.py
│ │ ├── disjoint_arm_pointnav_models.py
│ │ └── manipulathor_net_utils.py
│ ├── objectnav_baselines/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── experiments/
│ │ │ ├── __init__.py
│ │ │ ├── clip/
│ │ │ │ ├── __init__.py
│ │ │ │ └── mixins.py
│ │ │ ├── habitat/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── objectnav_habitat_rgb_clipresnet50gru_ddppo.py
│ │ │ │ │ └── objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py
│ │ │ │ └── objectnav_habitat_base.py
│ │ │ ├── ithor/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── objectnav_ithor_base.py
│ │ │ │ ├── objectnav_ithor_depth_resnet18gru_ddppo.py
│ │ │ │ ├── objectnav_ithor_rgb_resnet18gru_ddppo.py
│ │ │ │ └── objectnav_ithor_rgbd_resnet18gru_ddppo.py
│ │ │ ├── objectnav_base.py
│ │ │ ├── objectnav_thor_base.py
│ │ │ └── robothor/
│ │ │ ├── __init__.py
│ │ │ ├── beta/
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.py
│ │ │ │ └── objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py
│ │ │ ├── clip/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── objectnav_robothor_rgb_clipresnet50gru_ddppo.py
│ │ │ │ └── objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py
│ │ │ ├── objectnav_robothor_base.py
│ │ │ ├── objectnav_robothor_depth_resnet18gru_ddppo.py
│ │ │ ├── objectnav_robothor_rgb_resnet18gru_dagger.py
│ │ │ ├── objectnav_robothor_rgb_resnet18gru_ddppo.py
│ │ │ ├── objectnav_robothor_rgb_resnet50gru_ddppo.py
│ │ │ ├── objectnav_robothor_rgb_unfrozenresnet18gru_ddppo.py
│ │ │ └── objectnav_robothor_rgbd_resnet18gru_ddppo.py
│ │ └── mixins.py
│ ├── pointnav_baselines/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── experiments/
│ │ │ ├── __init__.py
│ │ │ ├── habitat/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── pointnav_habitat_rgb_clipresnet50gru_ddppo.py
│ │ │ │ ├── pointnav_habitat_base.py
│ │ │ │ ├── pointnav_habitat_depth_simpleconvgru_ddppo.py
│ │ │ │ ├── pointnav_habitat_rgb_simpleconvgru_ddppo.py
│ │ │ │ └── pointnav_habitat_rgbd_simpleconvgru_ddppo.py
│ │ │ ├── ithor/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── pointnav_ithor_base.py
│ │ │ │ ├── pointnav_ithor_depth_simpleconvgru_ddppo.py
│ │ │ │ ├── pointnav_ithor_rgb_simpleconvgru_ddppo.py
│ │ │ │ └── pointnav_ithor_rgbd_simpleconvgru_ddppo.py
│ │ │ ├── pointnav_base.py
│ │ │ ├── pointnav_thor_base.py
│ │ │ └── robothor/
│ │ │ ├── __init__.py
│ │ │ ├── pointnav_robothor_base.py
│ │ │ ├── pointnav_robothor_depth_simpleconvgru_ddppo.py
│ │ │ ├── pointnav_robothor_rgb_simpleconvgru_ddppo.py
│ │ │ └── pointnav_robothor_rgbd_simpleconvgru_ddppo.py
│ │ └── mixins.py
│ └── tutorials/
│ ├── __init__.py
│ ├── distributed_objectnav_tutorial.py
│ ├── gym_mujoco_tutorial.py
│ ├── gym_tutorial.py
│ ├── minigrid_offpolicy_tutorial.py
│ ├── minigrid_tutorial.py
│ ├── minigrid_tutorial_conds.py
│ ├── navtopartner_robothor_rgb_ppo.py
│ ├── object_nav_ithor_dagger_then_ppo_one_object.py
│ ├── object_nav_ithor_dagger_then_ppo_one_object_viz.py
│ ├── object_nav_ithor_ppo_one_object.py
│ ├── pointnav_habitat_rgb_ddppo.py
│ ├── pointnav_ithor_rgb_ddppo.py
│ ├── running_inference_tutorial.py
│ └── training_a_pointnav_model.py
├── requirements.txt
├── scripts/
│ ├── auto_format.sh
│ ├── build_docs.py
│ ├── build_docs.sh
│ ├── dcommand.py
│ ├── dconfig.py
│ ├── dkill.py
│ ├── dmain.py
│ ├── literate.py
│ ├── release.py
│ ├── run_tests.sh
│ └── startx.py
└── tests/
├── .gitignore
├── __init__.py
├── hierarchical_policies/
│ ├── __init__.py
│ └── test_minigrid_conditional.py
├── manipulathor_plugin/
│ ├── __init__.py
│ └── test_utils.py
├── mapping/
│ ├── __init__.py
│ └── test_ai2thor_mapping.py
├── multiprocessing/
│ ├── __init__.py
│ └── test_frozen_attribs.py
├── sync_algs_cpu/
│ ├── __init__.py
│ └── test_to_to_obj_trains.py
├── utils/
│ ├── __init__.py
│ ├── test_inference_agent.py
│ └── test_spaces.py
└── vision/
├── __init__.py
└── test_pillow_rescaling.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .VERSION
================================================
0.5.5a
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug
assignees: ''
---
## Problem
A clear and concise description of what the bug is.
## Steps to reproduce
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error
## Expected behavior
A clear and concise description of what you expected to happen.
## Screenshots
If applicable, add screenshots to help explain your problem.
## Desktop
Please add the following information:
- OS: [e.g. Ubuntu 16.04.5]
- AllenAct Version: [e.g. current HEAD of master or v0.1.0]
## Additional context
Add any other context about the problem here.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an enhancement or a new feature
title: ''
labels: enhancement
assignees: ''
---
## Problem
Is your feature request related to a problem? Please provide a clear and concise description of what the problem is:
E.g. I would really like to have better support for my favorite environment X.
## Desired solution
A clear and concise description of what you want to happen.
## Alternative solutions
A description of any alternative solutions or features you've considered.
## Additional context
Add any other context or screenshots about the feature request here.
================================================
FILE: .github/ISSUE_TEMPLATE/support_request.md
================================================
---
name: Support request
about: Request support regarding AllenAct
title: ''
labels: ''
assignees: ''
---
## Problem / Question
What do you need help with? E.g. "I'm having trouble running model X" or "when I run command Y I get error Z."
## Additional context
_(Optional)_ - To provide support it's helpful to have as many details as possible, add additional context here.
================================================
FILE: .github/workflows/black.yml
================================================
name: Lint
on: [push, pull_request]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: psf/black@stable
================================================
FILE: .github/workflows/codeql.yml
================================================
name: "CodeQL"
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
schedule:
- cron: "13 6 * * 4"
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ python ]
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
queries: +security-and-quality
- name: Autobuild
uses: github/codeql-action/autobuild@v2
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{ matrix.language }}"
================================================
FILE: .github/workflows/publish.yml
================================================
# This workflow will upload the allenact and allenact_plugins packages using Twine (after manually triggering it)
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
name: Publish PYPI Packages
on:
workflow_dispatch:
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.7'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools twine
- name: Build and publish
env:
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python scripts/release.py
twine upload -u __token__ dist/*
================================================
FILE: .github/workflows/pytest.yml
================================================
name: PyTest
on: [push]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.9]
steps:
- uses: actions/checkout@v2
- uses: ouzi-dev/commit-status-updater@v1.1.0 # Updates the commit status badge to pending
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pytest wandb
python -m pip install --editable="./allenact"
python -m pip install --editable="./allenact_plugins[all]"
python -m pip install -e "git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai"
python -m pip install compress_pickle # Needed for some mapping tests
pip list
- name: Test with pytest
run: |
pytest --capture=tee-sys tests
- if: always() # Updates the commit status badge to the result of running the tests above
uses: ouzi-dev/commit-status-updater@v1.1.0
with:
status: "${{ job.status }}"
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
docs/build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pycharm
.idea/
# pytorch
*.pt
# Default output dir
experiment_output
*_out
# PDFs
*.pdf
# PNGs
*.png
# Tensorboard logs
events.out.tfevents.*
# TSV files
*.tsv
# tmp directory
tmp/
# Pickle files
*.pkl
*.pkl.gz
# Zip files
*.zip
# VSCode
.vscode/
# MacOS
.DS_Store
# Docs
docs/index.md
docs/CONTRIBUTING.md
docs/LICENSE.md
# Metrics
metrics__*.json
# Robothor
allenact_plugins/robothor_plugin/data/*
# ithor
allenact_plugins/ithor_plugin/data/*
# Habitat
external_projects/habitat-lab
# Local pip installations
src
.pip_src
# Files created when running training
**/used_configs
*.patch
# Package building
*.egg_info
*.egg-info
# Additional allenact-specific locks and hidden files
*.allenact_last_start_time_string
*.allenact_start_time_string.lock
*.lock
rsync-*
================================================
FILE: .gitmodules
================================================
[submodule "projects/ithor_rearrangement"]
path = projects/ithor_rearrangement
url = https://github.com/allenai/ai2thor-rearrangement.git
branch = active_neural_slam
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/ambv/black
rev: 19.10b0
hooks:
- id: black
language_version: python3.7
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v0.761' # Use the sha / tag you want to point at
hooks:
- id: mypy
args: [--follow-imports=skip]
================================================
FILE: CNAME
================================================
www.allenact.org
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing
We welcome contributions from the greater community. If you would like to make such a contributions we recommend first submitting an [issue](https://github.com/allenai/allenact/issues) describing your proposed improvement.
Doing so can ensure we can validate your suggestions before you spend a great deal of time
upon them. Improvements and bug fixes should be made via a pull request
from your fork of the repository at [https://github.com/allenai/allenact](https://github.com/allenai/allenact).
All code in pull requests should adhere to the following guidelines.
## Found a bug or want to suggest an enhancement?
Please submit an [issue](https://github.com/allenai/allenact/issues) in which you note the steps
to reproduce the bug or in which you detail the enhancement.
## Making a pull request?
When making a pull request we require that any code respects several guidelines detailed below.
### Auto-formatting
All python code in this repository should be formatted using [black](https://black.readthedocs.io/en/stable/).
To use `black` auto-formatting across all files, simply run
```bash
bash scripts/auto_format.sh
```
which will run `black` auto-formatting as well as [docformatter](https://pypi.org/project/docformatter/) (used
to auto-format documentation strings).
### Type-checking
Our code makes liberal use of type hints. If you have not had experience with type hinting in python we recommend
reading the [documentation](https://docs.python.org/3/library/typing.html) of the `typing` python module or the
simplified introduction to type hints found [here](https://www.python.org/dev/peps/pep-0483/). All methods should
have typed arguments and output. Furthermore we use [mypy](https://mypy.readthedocs.io/en/stable/) to perform
basic static type checking. Before making a pull request, there should be no warnings or errors when running
```bash
dmypy run -- --follow-imports=skip .
```
Explicitly ignoring type checking (for instance using `# type: ignore`) should be only be done when it would otherwise
be an extensive burden.
### Setting up pre-commit hooks (optional)
Pre-commit hooks check that, when you attempt to commit changes, your code adheres a number of
formatting and type-checking guidelines. Pull requests containing code not adhering to these
guidelines will not be accepted and thus we recommend installing these pre-commit hooks. Assuming you have
installed all of the project requirements, you can install our recommended
pre-commit hooks by running (from this project's root directory)
```bash
pre-commit install
```
After running the above, each time you run `git commit ...` a set of pre-commit checks will
be run.
================================================
FILE: LICENSE
================================================
MIT License
Original work Copyright (c) 2017 Ilya Kostrikov
Original work Copyright (c) Facebook, Inc. and its affiliates.
Modified work Copyright (c) 2020 Allen Institute for Artificial Intelligence
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
An open source framework for research in Embodied AI
[](./LICENSE)
[](https://allenact.org)
[](https://github.com/allenai/allenact/releases/latest)
[](https://www.python.org/downloads/release/python-360/)
[](https://lgtm.com/projects/g/allenai/allenact/context:python)
[](https://github.com/psf/black)
**AllenAct** is a modular and flexible learning framework designed with a focus on the unique requirements of Embodied-AI research. It provides first-class support for a growing collection of embodied environments, tasks and algorithms, provides reproductions of state-of-the-art models and includes extensive documentation, tutorials, start-up code, and pre-trained models.
AllenAct is built and backed by the [Allen Institute for AI (AI2)](https://allenai.org/). AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering.
## Quick Links
- [Website & Docs](https://www.allenact.org/)
- [Github](https://github.com/allenai/allenact)
- [Install](https://www.allenact.org/installation/installation-allenact/)
- [Tutorials](https://www.allenact.org/tutorials/)
- [AllenAct Paper](https://arxiv.org/abs/2008.12760)
- [Citation](#citation)
## Features & Highlights
* _Support for multiple environments_: Support for the [iTHOR](https://ai2thor.allenai.org/ithor/), [RoboTHOR](https://ai2thor.allenai.org/robothor/) and [Habitat](https://aihabitat.org/) embodied environments as well as for grid-worlds including [MiniGrid](https://github.com/maximecb/gym-minigrid).
* _Task Abstraction_: Tasks and environments are decoupled in AllenAct, enabling researchers to easily implement a large variety of tasks in the same environment.
* _Algorithms_: Support for a variety of on-policy algorithms including [PPO](https://arxiv.org/pdf/1707.06347.pdf), [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf), [A2C](https://arxiv.org/pdf/1611.05763.pdf), Imitation Learning and [DAgger](https://www.ri.cmu.edu/pub_files/2011/4/Ross-AISTATS11-NoRegret.pdf) as well as offline training such as offline IL.
* _Sequential Algorithms_: It is trivial to experiment with different sequences of training routines, which are often the key to successful policies.
* _Simultaneous Losses_: Easily combine various losses while training models (e.g. use an external self-supervised loss while optimizing a PPO loss).
* _Multi-agent support_: Support for multi-agent algorithms and tasks.
* _Visualizations_: Out of the box support to easily visualize first and third person views for agents as well as intermediate model tensors, integrated into Tensorboard.
* _Pre-trained models_: Code and models for a number of standard Embodied AI tasks.
* _Tutorials_: Start-up code and extensive tutorials to help ramp up to Embodied AI.
* _First-class PyTorch support_: One of the few RL frameworks to target PyTorch.
* _Arbitrary action spaces_: Supporting both discrete and continuous actions.
|Environments|Tasks|Algorithms|
|------------|-----|----------|
|[iTHOR](https://ai2thor.allenai.org/ithor/), [RoboTHOR](https://ai2thor.allenai.org/robothor/), [Habitat](https://aihabitat.org/), [MiniGrid](https://github.com/maximecb/gym-minigrid), [OpenAI Gym](https://gym.openai.com/)|[PointNav](https://arxiv.org/pdf/1807.06757.pdf), [ObjectNav](https://arxiv.org/pdf/2006.13171.pdf), [MiniGrid tasks](https://github.com/maximecb/gym-minigrid), [Gym Box2D tasks](https://gym.openai.com/envs/#box2d)|[A2C](https://arxiv.org/pdf/1611.05763.pdf), [PPO](https://arxiv.org/pdf/1707.06347.pdf), [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf), [DAgger](https://www.ri.cmu.edu/pub_files/2011/4/Ross-AISTATS11-NoRegret.pdf), Off-policy Imitation|
## Contributions
We welcome contributions from the greater community. If you would like to make such a contributions we recommend first submitting an [issue](https://github.com/allenai/allenact/issues) describing your proposed improvement. Doing so can ensure we can validate your suggestions before you spend a great deal of time upon them. Improvements and bug fixes should be made via a pull request from your fork of the repository at [https://github.com/allenai/allenact](https://github.com/allenai/allenact).
All code in this repository is subject to formatting, documentation, and type-annotation guidelines. For more details, please see the our [contribution guidelines](CONTRIBUTING.md).
## Acknowledgments
This work builds upon the [pytorch-a2c-ppo-acktr](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail) library of Ilya Kostrikov and uses some data structures from FAIR's [habitat-lab](https://github.com/facebookresearch/habitat-lab). We would like to thank Dustin Schwenk for his help for the public release of the framework.
## License
AllenAct is MIT licensed, as found in the [LICENSE](LICENSE) file.
## Team
AllenAct is an open-source project built by members of the PRIOR research group at the Allen Institute for Artificial Intelligence (AI2).
## Citation
If you use this work, please cite our [paper](https://arxiv.org/abs/2008.12760):
```bibtex
@article{AllenAct,
author = {Luca Weihs and Jordi Salvador and Klemen Kotar and Unnat Jain and Kuo-Hao Zeng and Roozbeh Mottaghi and Aniruddha Kembhavi},
title = {AllenAct: A Framework for Embodied AI Research},
year = {2020},
journal = {arXiv preprint arXiv:2008.12760},
}
```
================================================
FILE: ROADMAP.md
================================================
# Roadmap
Here we track new features/support to be added in the short/mid-term.
## New environments
* [SAPIEN](https://sapien.ucsd.edu/)
* [ThreeDWorld](http://www.threedworld.org/)
## New tasks
* [Room-to-room navigation](https://arxiv.org/pdf/1711.07280.pdf)
* [Furniture Lifting](https://arxiv.org/abs/1904.05879) and [Furniture Moving](https://arxiv.org/abs/2007.04979)
## New training methods
* A3C
* Deep Q-Learning
================================================
FILE: allenact/__init__.py
================================================
try:
# noinspection PyProtectedMember,PyUnresolvedReferences
from allenact._version import __version__
except ModuleNotFoundError:
__version__ = None
================================================
FILE: allenact/_constants.py
================================================
import os
from pathlib import Path
ALLENACT_INSTALL_DIR = os.path.abspath(os.path.dirname(Path(__file__)))
================================================
FILE: allenact/algorithms/__init__.py
================================================
================================================
FILE: allenact/algorithms/offpolicy_sync/__init__.py
================================================
================================================
FILE: allenact/algorithms/offpolicy_sync/losses/__init__.py
================================================
================================================
FILE: allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss.py
================================================
"""Defining abstract loss classes for actor critic models."""
import abc
from typing import Dict, Tuple, TypeVar, Generic
import torch
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.base_abstractions.misc import Loss, Memory
ModelType = TypeVar("ModelType")
class AbstractOffPolicyLoss(Generic[ModelType], Loss):
"""Abstract class representing an off-policy loss function used to train a
model."""
# noinspection PyMethodOverriding
@abc.abstractmethod
def loss( # type: ignore
self,
*, # No positional arguments
step_count: int,
model: ModelType,
batch: ObservationType,
memory: Memory,
**kwargs,
) -> Tuple[torch.FloatTensor, Dict[str, float], Memory, int]:
"""Computes the loss.
Loss after processing a batch of data with (part of) a model (possibly with memory).
# Parameters
model: model to run on data batch (both assumed to be on the same device)
batch: data to use as input for model (already on the same device as model)
memory: model memory before processing current data batch
# Returns
A tuple with:
current_loss: total loss
current_info: additional information about the current loss
memory: model memory after processing current data batch
bsize: batch size
"""
raise NotImplementedError()
================================================
FILE: allenact/algorithms/onpolicy_sync/__init__.py
================================================
================================================
FILE: allenact/algorithms/onpolicy_sync/engine.py
================================================
"""Defines the reinforcement learning `OnPolicyRLEngine`."""
import datetime
import logging
import numbers
import os
import random
import time
import traceback
from functools import partial
from multiprocessing.context import BaseContext
from typing import Any, Dict, List, Optional, Sequence, Union, cast
import filelock
import torch
import torch.distributed as dist # type: ignore
import torch.distributions # type: ignore
import torch.multiprocessing as mp # type: ignore
import torch.nn as nn
import torch.optim as optim
# noinspection PyProtectedMember
from torch._C._distributed_c10d import ReduceOp
from allenact.algorithms.onpolicy_sync.misc import TrackingInfo, TrackingInfoType
from allenact.base_abstractions.sensor import Sensor
from allenact.utils.misc_utils import str2bool
from allenact.utils.model_utils import md5_hash_of_state_dict
try:
# noinspection PyProtectedMember,PyUnresolvedReferences
from torch.optim.lr_scheduler import _LRScheduler
except (ImportError, ModuleNotFoundError):
raise ImportError("`_LRScheduler` was not found in `torch.optim.lr_scheduler`")
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel
from allenact.algorithms.onpolicy_sync.storage import (
ExperienceStorage,
MiniBatchStorageMixin,
RolloutStorage,
StreamingStorageMixin,
)
from allenact.algorithms.onpolicy_sync.vector_sampled_tasks import (
COMPLETE_TASK_CALLBACK_KEY,
COMPLETE_TASK_METRICS_KEY,
SingleProcessVectorSampledTasks,
VectorSampledTasks,
)
from allenact.base_abstractions.distributions import TeacherForcingDistr
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.misc import (
ActorCriticOutput,
GenericAbstractLoss,
Memory,
RLStepResult,
)
from allenact.utils import spaces_utils as su
from allenact.utils.experiment_utils import (
LoggingPackage,
PipelineStage,
ScalarMeanTracker,
StageComponent,
TrainingPipeline,
set_deterministic_cudnn,
set_seed,
)
from allenact.utils.system import get_logger
from allenact.utils.tensor_utils import batch_observations, detach_recursively
from allenact.utils.viz_utils import VizSuite
try:
# When debugging we don't want to timeout in the VectorSampledTasks
# noinspection PyPackageRequirements
import pydevd
DEBUGGING = str2bool(os.getenv("ALLENACT_DEBUG", "true"))
except ImportError:
DEBUGGING = str2bool(os.getenv("ALLENACT_DEBUG", "false"))
DEBUG_VST_TIMEOUT: Optional[int] = (lambda x: int(x) if x is not None else x)(
os.getenv("ALLENACT_DEBUG_VST_TIMEOUT", None)
)
TRAIN_MODE_STR = "train"
VALID_MODE_STR = "valid"
TEST_MODE_STR = "test"
class OnPolicyRLEngine(object):
"""The reinforcement learning primary controller.
This `OnPolicyRLEngine` class handles all training, validation, and
testing as well as logging and checkpointing. You are not expected
to instantiate this class yourself, instead you should define an
experiment which will then be used to instantiate an
`OnPolicyRLEngine` and perform any desired tasks.
"""
def __init__(
self,
experiment_name: str,
config: ExperimentConfig,
results_queue: mp.Queue, # to output aggregated results
checkpoints_queue: Optional[
mp.Queue
], # to write/read (trainer/evaluator) ready checkpoints
checkpoints_dir: str,
mode: str = "train",
callback_sensors: Optional[Sequence[Sensor]] = None,
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
mp_ctx: Optional[BaseContext] = None,
worker_id: int = 0,
num_workers: int = 1,
device: Union[str, torch.device, int] = "cpu",
distributed_ip: str = "127.0.0.1",
distributed_port: int = 0,
deterministic_agents: bool = False,
max_sampler_processes_per_worker: Optional[int] = None,
initial_model_state_dict: Optional[Union[Dict[str, Any], int]] = None,
try_restart_after_task_error: bool = False,
**kwargs,
):
"""Initializer.
# Parameters
config : The ExperimentConfig defining the experiment to run.
output_dir : Root directory at which checkpoints and logs should be saved.
seed : Seed used to encourage deterministic behavior (it is difficult to ensure
completely deterministic behavior due to CUDA issues and nondeterminism
in environments).
mode : "train", "valid", or "test".
deterministic_cudnn : Whether to use deterministic cudnn. If `True` this may lower
training performance this is necessary (but not sufficient) if you desire
deterministic behavior.
extra_tag : An additional label to add to the experiment when saving tensorboard logs.
"""
self.config = config
self.results_queue = results_queue
self.checkpoints_queue = checkpoints_queue
self.mp_ctx = mp_ctx
self.checkpoints_dir = checkpoints_dir
self.worker_id = worker_id
self.num_workers = num_workers
self.device = torch.device("cpu") if device == -1 else torch.device(device) # type: ignore
if self.device != torch.device("cpu"):
torch.cuda.set_device(device)
self.distributed_ip = distributed_ip
self.distributed_port = distributed_port
self.try_restart_after_task_error = try_restart_after_task_error
self.mode = mode.lower().strip()
assert self.mode in [
TRAIN_MODE_STR,
VALID_MODE_STR,
TEST_MODE_STR,
], f"Only {TRAIN_MODE_STR}, {VALID_MODE_STR}, {TEST_MODE_STR}, modes supported"
self.callback_sensors = callback_sensors
self.deterministic_cudnn = deterministic_cudnn
if self.deterministic_cudnn:
set_deterministic_cudnn()
self.seed = seed
set_seed(self.seed)
self.experiment_name = experiment_name
assert (
max_sampler_processes_per_worker is None
or max_sampler_processes_per_worker >= 1
), "`max_sampler_processes_per_worker` must be either `None` or a positive integer."
self.max_sampler_processes_per_worker = max_sampler_processes_per_worker
machine_params = config.machine_params(self.mode)
self.machine_params: MachineParams
if isinstance(machine_params, MachineParams):
self.machine_params = machine_params
else:
self.machine_params = MachineParams(**machine_params)
self.num_samplers_per_worker = self.machine_params.nprocesses
self.num_samplers = self.num_samplers_per_worker[self.worker_id]
self._vector_tasks: Optional[
Union[VectorSampledTasks, SingleProcessVectorSampledTasks]
] = None
self.sensor_preprocessor_graph = None
self.actor_critic: Optional[ActorCriticModel] = None
create_model_kwargs = {}
if self.machine_params.sensor_preprocessor_graph is not None:
self.sensor_preprocessor_graph = (
self.machine_params.sensor_preprocessor_graph.to(self.device)
)
create_model_kwargs["sensor_preprocessor_graph"] = (
self.sensor_preprocessor_graph
)
set_seed(self.seed)
self.actor_critic = cast(
ActorCriticModel,
self.config.create_model(**create_model_kwargs),
).to(self.device)
if initial_model_state_dict is not None:
if isinstance(initial_model_state_dict, int):
assert (
md5_hash_of_state_dict(self.actor_critic.state_dict())
== initial_model_state_dict
), (
f"Could not reproduce the correct model state dict on worker {self.worker_id} despite seeding."
f" Please ensure that your model's initialization is reproducable when `set_seed(...)`"
f"] has been called with a fixed seed before initialization."
)
else:
self.actor_critic.load_state_dict(
state_dict=cast(
"OrderedDict[str, Tensor]", initial_model_state_dict
)
)
else:
assert mode != TRAIN_MODE_STR or self.num_workers == 1, (
"When training with multiple workers you must pass a,"
" non-`None` value for the `initial_model_state_dict` argument."
)
if get_logger().level == logging.DEBUG:
model_hash = md5_hash_of_state_dict(self.actor_critic.state_dict())
get_logger().debug(
f"[{self.mode} worker {self.worker_id}] model weights hash: {model_hash}"
)
self.is_distributed = False
self.store: Optional[torch.distributed.TCPStore] = None # type:ignore
if self.num_workers > 1:
self.store = torch.distributed.TCPStore( # type:ignore
host_name=self.distributed_ip,
port=self.distributed_port,
world_size=self.num_workers,
is_master=self.worker_id == 0,
timeout=datetime.timedelta(
seconds=3 * (DEBUG_VST_TIMEOUT if DEBUGGING else 1 * 60) + 300
),
)
cpu_device = self.device == torch.device("cpu") # type:ignore
# "gloo" required during testing to ensure that `barrier()` doesn't time out.
backend = "gloo" if cpu_device or self.mode == TEST_MODE_STR else "nccl"
get_logger().debug(
f"Worker {self.worker_id}: initializing distributed {backend} backend with device {self.device}."
)
dist.init_process_group( # type:ignore
backend=backend,
store=self.store,
rank=self.worker_id,
world_size=self.num_workers,
# During testing, we sometimes found that default timeout was too short
# resulting in the run terminating surprisingly, we increase it here.
timeout=(
datetime.timedelta(minutes=3000)
if (self.mode == TEST_MODE_STR or DEBUGGING)
else dist.default_pg_timeout
),
)
self.is_distributed = True
self.deterministic_agents = deterministic_agents
self._is_closing: bool = (
False # Useful for letting the RL runner know if this is closing
)
self._is_closed: bool = False
# Keeping track of metrics and losses during training/inference
self.single_process_metrics: List = []
self.single_process_task_callback_data: List = []
self.tracking_info_list: List[TrackingInfo] = []
# Variables that wil only be instantiated in the trainer
self.optimizer: Optional[optim.optimizer.Optimizer] = None
# noinspection PyProtectedMember
self.lr_scheduler: Optional[_LRScheduler] = None
self.insufficient_data_for_update: Optional[torch.distributed.PrefixStore] = (
None
)
# Training pipeline will be instantiated during training and inference.
# During inference however, it will be instantiated anew on each run of `run_eval`
# and will be set to `None` after the eval run is complete.
self.training_pipeline: Optional[TrainingPipeline] = None
@property
def vector_tasks(
self,
) -> Union[VectorSampledTasks, SingleProcessVectorSampledTasks]:
if self._vector_tasks is None and self.num_samplers > 0:
if self.is_distributed:
total_processes = sum(
self.num_samplers_per_worker
) # TODO this will break the fixed seed for multi-device test
else:
total_processes = self.num_samplers
seeds = self.worker_seeds(
total_processes,
initial_seed=self.seed, # do not update the RNG state (creation might happen after seed resetting)
)
# TODO: The `self.max_sampler_processes_per_worker == 1` case below would be
# great to have but it does not play nicely with us wanting to kill things
# using SIGTERM/SIGINT signals. Would be nice to figure out a solution to
# this at some point.
# if self.max_sampler_processes_per_worker == 1:
# # No need to instantiate a new task sampler processes if we're
# # restricted to one sampler process for this worker.
# self._vector_tasks = SingleProcessVectorSampledTasks(
# make_sampler_fn=self.config.make_sampler_fn,
# sampler_fn_args_list=self.get_sampler_fn_args(seeds),
# )
# else:
self._vector_tasks = VectorSampledTasks(
make_sampler_fn=self.config.make_sampler_fn,
sampler_fn_args=self.get_sampler_fn_args(seeds),
callback_sensors=self.callback_sensors,
multiprocessing_start_method=(
"forkserver" if self.mp_ctx is None else None
),
mp_ctx=self.mp_ctx,
max_processes=self.max_sampler_processes_per_worker,
read_timeout=DEBUG_VST_TIMEOUT if DEBUGGING else 1 * 60,
)
return self._vector_tasks
@staticmethod
def worker_seeds(nprocesses: int, initial_seed: Optional[int]) -> List[int]:
"""Create a collection of seeds for workers without modifying the RNG
state."""
rstate = None # type:ignore
if initial_seed is not None:
rstate = random.getstate()
random.seed(initial_seed)
seeds = [random.randint(0, (2**31) - 1) for _ in range(nprocesses)]
if initial_seed is not None:
random.setstate(rstate)
return seeds
def get_sampler_fn_args(self, seeds: Optional[List[int]] = None):
sampler_devices = self.machine_params.sampler_devices
if self.mode == TRAIN_MODE_STR:
fn = self.config.train_task_sampler_args
elif self.mode == VALID_MODE_STR:
fn = self.config.valid_task_sampler_args
elif self.mode == TEST_MODE_STR:
fn = self.config.test_task_sampler_args
else:
raise NotImplementedError(
f"self.mode must be one of {TRAIN_MODE_STR}, {VALID_MODE_STR}, or {TEST_MODE_STR}."
)
if self.is_distributed:
total_processes = sum(self.num_samplers_per_worker)
process_offset = sum(self.num_samplers_per_worker[: self.worker_id])
else:
total_processes = self.num_samplers
process_offset = 0
sampler_devices_as_ints: Optional[List[int]] = None
if (
self.is_distributed or self.mode == TEST_MODE_STR
) and self.device.index is not None:
sampler_devices_as_ints = [self.device.index]
elif sampler_devices is not None:
sampler_devices_as_ints = [
-1 if sd.index is None else sd.index for sd in sampler_devices
]
return [
fn(
process_ind=process_offset + it,
total_processes=total_processes,
devices=sampler_devices_as_ints,
seeds=seeds,
)
for it in range(self.num_samplers)
]
def checkpoint_load(
self, ckpt: Union[str, Dict[str, Any]], restart_pipeline: bool
) -> Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]]:
if isinstance(ckpt, str):
get_logger().info(
f"[{self.mode} worker {self.worker_id}] Loading checkpoint from {ckpt}"
)
# Map location CPU is almost always better than mapping to a CUDA device.
ckpt = torch.load(os.path.abspath(ckpt), map_location="cpu")
ckpt = cast(
Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]],
ckpt,
)
self.actor_critic.load_state_dict(ckpt["model_state_dict"]) # type:ignore
if "training_pipeline_state_dict" in ckpt and not restart_pipeline:
self.training_pipeline.load_state_dict(
cast(Dict[str, Any], ckpt["training_pipeline_state_dict"])
)
return ckpt
# aggregates task metrics currently in queue
def aggregate_task_metrics(
self,
logging_pkg: LoggingPackage,
num_tasks: int = -1,
) -> LoggingPackage:
if num_tasks > 0:
if len(self.single_process_metrics) != num_tasks:
error_msg = (
"shorter"
if len(self.single_process_metrics) < num_tasks
else "longer"
)
get_logger().error(
f"Metrics out is {error_msg} than expected number of tasks."
" This should only happen if a positive number of `num_tasks` were"
" set during testing but the queue did not contain this number of entries."
" Please file an issue at https://github.com/allenai/allenact/issues."
)
num_empty_tasks_dequeued = 0
for metrics_dict in self.single_process_metrics:
num_empty_tasks_dequeued += not logging_pkg.add_metrics_dict(
single_task_metrics_dict=metrics_dict
)
self.single_process_metrics = []
if num_empty_tasks_dequeued != 0:
get_logger().warning(
f"Discarded {num_empty_tasks_dequeued} empty task metrics"
)
return logging_pkg
def _preprocess_observations(self, batched_observations):
if self.sensor_preprocessor_graph is None:
return batched_observations
return self.sensor_preprocessor_graph.get_observations(batched_observations)
def remove_paused(self, observations):
paused, keep, running = [], [], []
for it, obs in enumerate(observations):
if obs is None:
paused.append(it)
else:
keep.append(it)
running.append(obs)
for p in reversed(paused):
self.vector_tasks.pause_at(p)
# Group samplers along new dim:
batch = batch_observations(running, device=self.device)
return len(paused), keep, batch
def initialize_storage_and_viz(
self,
storage_to_initialize: Optional[Sequence[ExperienceStorage]],
visualizer: Optional[VizSuite] = None,
):
keep: Optional[List] = None
if visualizer is not None or (
storage_to_initialize is not None
and any(isinstance(s, RolloutStorage) for s in storage_to_initialize)
):
# No rollout storage, thus we are not
observations = self.vector_tasks.get_observations()
npaused, keep, batch = self.remove_paused(observations)
observations = (
self._preprocess_observations(batch) if len(keep) > 0 else batch
)
assert npaused == 0, f"{npaused} samplers are paused during initialization."
num_samplers = len(keep)
else:
observations = {}
num_samplers = 0
npaused = 0
recurrent_memory_specification = (
self.actor_critic.recurrent_memory_specification
)
if storage_to_initialize is not None:
for s in storage_to_initialize:
s.to(self.device)
s.set_partition(index=self.worker_id, num_parts=self.num_workers)
s.initialize(
observations=observations,
num_samplers=num_samplers,
recurrent_memory_specification=recurrent_memory_specification,
action_space=self.actor_critic.action_space,
)
if visualizer is not None and num_samplers > 0:
visualizer.collect(vector_task=self.vector_tasks, alive=keep)
return npaused
@property
def num_active_samplers(self):
if self.vector_tasks is None:
return 0
return self.vector_tasks.num_unpaused_tasks
def act(
self,
rollout_storage: RolloutStorage,
dist_wrapper_class: Optional[type] = None,
):
with torch.no_grad():
agent_input = rollout_storage.agent_input_for_next_step()
actor_critic_output, memory = self.actor_critic(**agent_input)
distr = actor_critic_output.distributions
if dist_wrapper_class is not None:
distr = dist_wrapper_class(distr=distr, obs=agent_input["observations"])
actions = distr.sample() if not self.deterministic_agents else distr.mode()
return actions, actor_critic_output, memory, agent_input["observations"]
def aggregate_and_send_logging_package(
self,
tracking_info_list: List[TrackingInfo],
logging_pkg: Optional[LoggingPackage] = None,
send_logging_package: bool = True,
checkpoint_file_name: Optional[str] = None,
):
if logging_pkg is None:
logging_pkg = LoggingPackage(
mode=self.mode,
training_steps=self.training_pipeline.total_steps,
pipeline_stage=self.training_pipeline.current_stage_index,
storage_uuid_to_total_experiences=self.training_pipeline.storage_uuid_to_total_experiences,
checkpoint_file_name=checkpoint_file_name,
)
self.aggregate_task_metrics(logging_pkg=logging_pkg)
for callback_dict in self.single_process_task_callback_data:
logging_pkg.task_callback_data.append(callback_dict)
self.single_process_task_callback_data = []
for tracking_info in tracking_info_list:
if tracking_info.n < 0:
get_logger().warning(
f"Obtained a train_info_dict with {tracking_info.n} elements."
f" Full info: ({tracking_info.type}, {tracking_info.info}, {tracking_info.n})."
)
else:
tracking_info_dict = tracking_info.info
if tracking_info.type == TrackingInfoType.LOSS:
tracking_info_dict = {
f"losses/{k}": v for k, v in tracking_info_dict.items()
}
logging_pkg.add_info_dict(
info_dict=tracking_info_dict,
n=tracking_info.n,
stage_component_uuid=tracking_info.stage_component_uuid,
storage_uuid=tracking_info.storage_uuid,
)
if send_logging_package:
self.results_queue.put(logging_pkg)
return logging_pkg
@staticmethod
def _active_memory(memory, keep):
return memory.sampler_select(keep) if memory is not None else memory
def probe(self, dones: List[bool], npaused, period=100000):
"""Debugging util. When called from
self.collect_step_across_all_task_samplers(...), calls render for the
0-th task sampler of the 0-th distributed worker for the first
beginning episode spaced at least period steps from the beginning of
the previous one.
For valid, train, it currently renders all episodes for the 0-th task sampler of the
0-th distributed worker. If this is not wanted, it must be hard-coded for now below.
# Parameters
dones : dones list from self.collect_step_across_all_task_samplers(...)
npaused : number of newly paused tasks returned by self.removed_paused(...)
period : minimal spacing in sampled steps between the beginning of episodes to be shown.
"""
sampler_id = 0
done = dones[sampler_id]
if self.mode != TRAIN_MODE_STR:
setattr(
self, "_probe_npaused", getattr(self, "_probe_npaused", 0) + npaused
)
if self._probe_npaused == self.num_samplers: # type:ignore
del self._probe_npaused # type:ignore
return
period = 0
if self.worker_id == 0:
if done:
if period > 0 and (
getattr(self, "_probe_steps", None) is None
or (
self._probe_steps < 0 # type:ignore
and (
self.training_pipeline.total_steps
+ self._probe_steps # type:ignore
)
>= period
)
):
self._probe_steps = self.training_pipeline.total_steps
if period == 0 or (
getattr(self, "_probe_steps", None) is not None
and self._probe_steps >= 0
and ((self.training_pipeline.total_steps - self._probe_steps) < period)
):
if (
period == 0
or not done
or self._probe_steps == self.training_pipeline.total_steps
):
self.vector_tasks.call_at(sampler_id, "render", ["human"])
else:
# noinspection PyAttributeOutsideInit
self._probe_steps = -self._probe_steps
def collect_step_across_all_task_samplers(
self,
rollout_storage_uuid: str,
uuid_to_storage: Dict[str, ExperienceStorage],
visualizer=None,
dist_wrapper_class=None,
) -> int:
rollout_storage = cast(RolloutStorage, uuid_to_storage[rollout_storage_uuid])
actions, actor_critic_output, memory, _ = self.act(
rollout_storage=rollout_storage,
dist_wrapper_class=dist_wrapper_class,
)
# Flatten actions
flat_actions = su.flatten(self.actor_critic.action_space, actions)
assert len(flat_actions.shape) == 3, (
"Distribution samples must include step and task sampler dimensions [step, sampler, ...]. The simplest way"
"to accomplish this is to pass param tensors (like `logits` in a `CategoricalDistr`) with these dimensions"
"to the Distribution."
)
# Convert flattened actions into list of actions and send them
outputs: List[RLStepResult] = self.vector_tasks.step(
su.action_list(self.actor_critic.action_space, flat_actions)
)
# Save after task completion metrics
for step_result in outputs:
if step_result.info is not None:
if COMPLETE_TASK_METRICS_KEY in step_result.info:
self.single_process_metrics.append(
step_result.info[COMPLETE_TASK_METRICS_KEY]
)
del step_result.info[COMPLETE_TASK_METRICS_KEY]
if COMPLETE_TASK_CALLBACK_KEY in step_result.info:
self.single_process_task_callback_data.append(
step_result.info[COMPLETE_TASK_CALLBACK_KEY]
)
del step_result.info[COMPLETE_TASK_CALLBACK_KEY]
rewards: Union[List, torch.Tensor]
observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]
rewards = torch.tensor(
rewards,
dtype=torch.float,
device=self.device, # type:ignore
)
# We want rewards to have dimensions [sampler, reward]
if len(rewards.shape) == 1:
# Rewards are of shape [sampler,]
rewards = rewards.unsqueeze(-1)
elif len(rewards.shape) > 1:
raise NotImplementedError()
# If done then clean the history of observations.
masks = (
1.0
- torch.tensor(
dones,
dtype=torch.float32,
device=self.device, # type:ignore
)
).view(
-1, 1
) # [sampler, 1]
npaused, keep, batch = self.remove_paused(observations)
if hasattr(self.actor_critic, "sampler_select"):
self.actor_critic.sampler_select(keep)
# TODO self.probe(...) can be useful for debugging (we might want to control it from main?)
# self.probe(dones, npaused)
if npaused > 0:
if self.mode == TRAIN_MODE_STR:
raise NotImplementedError(
"When trying to get a new task from a task sampler (using the `.next_task()` method)"
" the task sampler returned `None`. This is not currently supported during training"
" (and almost certainly a bug in the implementation of the task sampler or in the "
" initialization of the task sampler for training)."
)
for s in uuid_to_storage.values():
if isinstance(s, RolloutStorage):
s.sampler_select(keep)
to_add_to_storage = dict(
observations=(
self._preprocess_observations(batch) if len(keep) > 0 else batch
),
memory=self._active_memory(memory, keep),
actions=flat_actions[0, keep],
action_log_probs=actor_critic_output.distributions.log_prob(actions)[
0, keep
],
value_preds=actor_critic_output.values[0, keep],
rewards=rewards[keep],
masks=masks[keep],
)
for storage in uuid_to_storage.values():
storage.add(**to_add_to_storage)
# TODO we always miss tensors for the last action in the last episode of each worker
if visualizer is not None:
if len(keep) > 0:
visualizer.collect(
rollout=rollout_storage,
vector_task=self.vector_tasks,
alive=keep,
actor_critic=actor_critic_output,
)
else:
visualizer.collect(actor_critic=actor_critic_output)
return npaused
def distributed_weighted_sum(
self,
to_share: Union[torch.Tensor, float, int],
weight: Union[torch.Tensor, float, int],
):
"""Weighted sum of scalar across distributed workers."""
if self.is_distributed:
aggregate = torch.tensor(to_share * weight).to(self.device)
dist.all_reduce(aggregate)
return aggregate.item()
else:
if abs(1 - weight) > 1e-5:
get_logger().warning(
f"Scaling non-distributed value with weight {weight}"
)
return torch.tensor(to_share * weight).item()
def distributed_reduce(
self, to_share: Union[torch.Tensor, float, int], op: ReduceOp
):
"""Weighted sum of scalar across distributed workers."""
if self.is_distributed:
aggregate = torch.tensor(to_share).to(self.device)
dist.all_reduce(aggregate, op=op)
return aggregate.item()
else:
return torch.tensor(to_share).item()
def backprop_step(
self,
total_loss: torch.Tensor,
max_grad_norm: float,
local_to_global_batch_size_ratio: float = 1.0,
):
raise NotImplementedError
def save_error_data(self, batch: Dict[str, Any]):
raise NotImplementedError
@property
def step_count(self) -> int:
if (
self.training_pipeline.current_stage is None
): # Might occur during testing when all stages are complete
return 0
return self.training_pipeline.current_stage.steps_taken_in_stage
def compute_losses_track_them_and_backprop(
self,
stage: PipelineStage,
stage_component: StageComponent,
storage: ExperienceStorage,
skip_backprop: bool = False,
):
training = self.mode == TRAIN_MODE_STR
assert training or skip_backprop
if training and self.is_distributed:
self.insufficient_data_for_update.set(
"insufficient_data_for_update", str(0)
)
dist.barrier(
device_ids=(
None if self.device == torch.device("cpu") else [self.device.index]
)
)
training_settings = stage_component.training_settings
loss_names = stage_component.loss_names
losses = [self.training_pipeline.get_loss(ln) for ln in loss_names]
loss_weights = [stage.uuid_to_loss_weight[ln] for ln in loss_names]
loss_update_repeats_list = training_settings.update_repeats
if isinstance(loss_update_repeats_list, numbers.Integral):
loss_update_repeats_list = [loss_update_repeats_list] * len(loss_names)
if skip_backprop and isinstance(storage, MiniBatchStorageMixin):
if loss_update_repeats_list != [1] * len(loss_names):
loss_update_repeats_list = [1] * len(loss_names)
get_logger().warning(
"Does not make sense to do multiple updates when"
" skip_backprop is `True` and you are using a storage of type"
" `MiniBatchStorageMixin`. This is likely a problem caused by"
" using a custom valid/test stage component that is inheriting its"
" TrainingSettings from the TrainingPipeline's TrainingSettings. We will override"
" the requested number of updates repeats (which was"
f" {dict(zip(loss_names, loss_update_repeats_list))}) to be 1 for all losses."
)
enough_data_for_update = True
for current_update_repeat_index in range(
max(loss_update_repeats_list, default=0)
):
if isinstance(storage, MiniBatchStorageMixin):
batch_iterator = storage.batched_experience_generator(
num_mini_batch=training_settings.num_mini_batch
)
elif isinstance(storage, StreamingStorageMixin):
assert (
training_settings.num_mini_batch is None
or training_settings.num_mini_batch == 1
)
def single_batch_generator(streaming_storage: StreamingStorageMixin):
try:
yield cast(
StreamingStorageMixin, streaming_storage
).next_batch()
except EOFError:
if not training:
raise
if streaming_storage.empty():
yield None
else:
cast(
StreamingStorageMixin, streaming_storage
).reset_stream()
stage.stage_component_uuid_to_stream_memory[
stage_component.uuid
].clear()
yield cast(
StreamingStorageMixin, streaming_storage
).next_batch()
batch_iterator = single_batch_generator(streaming_storage=storage)
else:
raise NotImplementedError(
f"Storage {storage} must be a subclass of `MiniBatchStorageMixin` or `StreamingStorageMixin`."
)
for batch in batch_iterator:
if batch is None:
# This should only happen in a `StreamingStorageMixin` when it cannot
# generate an initial batch or when we are in testing/validation and
# we've reached the end of the dataset over which to test/validate.
if training:
assert isinstance(storage, StreamingStorageMixin)
get_logger().warning(
f"Worker {self.worker_id}: could not run update in {storage}, potentially because"
f" not enough data has been accumulated to be able to fill an initial batch."
)
else:
pass
enough_data_for_update = False
if training and self.is_distributed:
self.insufficient_data_for_update.add(
"insufficient_data_for_update",
1 * (not enough_data_for_update),
)
dist.barrier(
device_ids=(
None
if self.device == torch.device("cpu")
else [self.device.index]
)
)
if (
int(
self.insufficient_data_for_update.get(
"insufficient_data_for_update"
)
)
!= 0
):
enough_data_for_update = False
break
info: Dict[str, float] = {}
bsize: Optional[int] = None
total_loss: Optional[torch.Tensor] = None
actor_critic_output_for_batch: Optional[ActorCriticOutput] = None
batch_memory = Memory()
for loss, loss_name, loss_weight, max_update_repeats_for_loss in zip(
losses, loss_names, loss_weights, loss_update_repeats_list
):
if current_update_repeat_index >= max_update_repeats_for_loss:
continue
if isinstance(loss, AbstractActorCriticLoss):
bsize = batch["bsize"]
if actor_critic_output_for_batch is None:
try:
actor_critic_output_for_batch, _ = self.actor_critic(
observations=batch["observations"],
memory=batch["memory"],
prev_actions=batch["prev_actions"],
masks=batch["masks"],
)
except ValueError:
save_path = self.save_error_data(batch=batch)
get_logger().error(
f"Encountered a value error! Likely because of nans in the output/input."
f" Saving all error information to {save_path}."
)
raise
loss_return = loss.loss(
step_count=self.step_count,
batch=batch,
actor_critic_output=actor_critic_output_for_batch,
)
per_epoch_info = {}
if len(loss_return) == 2:
current_loss, current_info = loss_return
elif len(loss_return) == 3:
current_loss, current_info, per_epoch_info = loss_return
else:
raise NotImplementedError
elif isinstance(loss, GenericAbstractLoss):
loss_output = loss.loss(
model=self.actor_critic,
batch=batch,
batch_memory=batch_memory,
stream_memory=stage.stage_component_uuid_to_stream_memory[
stage_component.uuid
],
)
current_loss = loss_output.value
current_info = loss_output.info
per_epoch_info = loss_output.per_epoch_info
batch_memory = loss_output.batch_memory
stage.stage_component_uuid_to_stream_memory[
stage_component.uuid
] = loss_output.stream_memory
bsize = loss_output.bsize
else:
raise NotImplementedError(
f"Loss of type {type(loss)} is not supported. Losses must be subclasses of"
f" `AbstractActorCriticLoss` or `GenericAbstractLoss`."
)
if total_loss is None:
total_loss = loss_weight * current_loss
else:
total_loss = total_loss + loss_weight * current_loss
for key, value in current_info.items():
info[f"{loss_name}/{key}"] = value
if per_epoch_info is not None:
for key, value in per_epoch_info.items():
if max(loss_update_repeats_list, default=0) > 1:
info[
f"{loss_name}/{key}_epoch{current_update_repeat_index:02d}"
] = value
info[f"{loss_name}/{key}_combined"] = value
else:
info[f"{loss_name}/{key}"] = value
assert total_loss is not None, (
f"No {stage_component.uuid} losses specified for training in stage"
f" {self.training_pipeline.current_stage_index}"
)
total_loss_scalar = total_loss.item()
info[f"total_loss"] = total_loss_scalar
self.tracking_info_list.append(
TrackingInfo(
type=TrackingInfoType.LOSS,
info=info,
n=bsize,
storage_uuid=stage_component.storage_uuid,
stage_component_uuid=stage_component.uuid,
)
)
to_track = {
"rollout_epochs": max(loss_update_repeats_list, default=0),
"worker_batch_size": bsize,
}
aggregate_bsize = None
if training:
aggregate_bsize = self.distributed_weighted_sum(bsize, 1)
to_track["global_batch_size"] = aggregate_bsize
to_track["lr"] = self.optimizer.param_groups[0]["lr"]
if training_settings.num_mini_batch is not None:
to_track["rollout_num_mini_batch"] = (
training_settings.num_mini_batch
)
for k, v in to_track.items():
# We need to set the bsize to 1 for `worker_batch_size` below as we're trying to record the
# average batch size per worker, not the average per worker weighted by the size of the batches
# of those workers.
self.tracking_info_list.append(
TrackingInfo(
type=TrackingInfoType.UPDATE_INFO,
info={k: v},
n=1 if k == "worker_batch_size" else bsize,
storage_uuid=stage_component.storage_uuid,
stage_component_uuid=stage_component.uuid,
)
)
if not skip_backprop:
total_grad_norm = self.backprop_step(
total_loss=total_loss,
max_grad_norm=training_settings.max_grad_norm,
local_to_global_batch_size_ratio=bsize / aggregate_bsize,
)
self.tracking_info_list.append(
TrackingInfo(
type=TrackingInfoType.UPDATE_INFO,
info={"total_grad_norm": total_grad_norm},
n=bsize,
storage_uuid=stage_component.storage_uuid,
stage_component_uuid=stage_component.uuid,
)
)
stage.stage_component_uuid_to_stream_memory[stage_component.uuid] = (
detach_recursively(
input=stage.stage_component_uuid_to_stream_memory[
stage_component.uuid
],
inplace=True,
)
)
def close(self, verbose=True):
self._is_closing = True
if "_is_closed" in self.__dict__ and self._is_closed:
return
def logif(s: Union[str, Exception]):
if verbose:
if isinstance(s, str):
get_logger().info(s)
elif isinstance(s, Exception):
get_logger().error(traceback.format_exc())
else:
raise NotImplementedError()
if "_vector_tasks" in self.__dict__ and self._vector_tasks is not None:
try:
logif(
f"[{self.mode} worker {self.worker_id}] Closing OnPolicyRLEngine.vector_tasks."
)
self._vector_tasks.close()
logif(f"[{self.mode} worker {self.worker_id}] Closed.")
except Exception as e:
logif(
f"[{self.mode} worker {self.worker_id}] Exception raised when closing OnPolicyRLEngine.vector_tasks:"
)
logif(e)
self._is_closed = True
self._is_closing = False
@property
def is_closed(self):
return self._is_closed
@property
def is_closing(self):
return self._is_closing
def __del__(self):
self.close(verbose=False)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close(verbose=False)
class OnPolicyTrainer(OnPolicyRLEngine):
def __init__(
self,
experiment_name: str,
config: ExperimentConfig,
results_queue: mp.Queue,
checkpoints_queue: Optional[mp.Queue],
checkpoints_dir: str = "",
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
mp_ctx: Optional[BaseContext] = None,
worker_id: int = 0,
num_workers: int = 1,
device: Union[str, torch.device, int] = "cpu",
distributed_ip: str = "127.0.0.1",
distributed_port: int = 0,
deterministic_agents: bool = False,
distributed_preemption_threshold: float = 0.7,
max_sampler_processes_per_worker: Optional[int] = None,
save_ckpt_after_every_pipeline_stage: bool = True,
first_local_worker_id: int = 0,
save_ckpt_at_every_host: bool = False,
**kwargs,
):
kwargs["mode"] = TRAIN_MODE_STR
super().__init__(
experiment_name=experiment_name,
config=config,
results_queue=results_queue,
checkpoints_queue=checkpoints_queue,
checkpoints_dir=checkpoints_dir,
seed=seed,
deterministic_cudnn=deterministic_cudnn,
mp_ctx=mp_ctx,
worker_id=worker_id,
num_workers=num_workers,
device=device,
distributed_ip=distributed_ip,
distributed_port=distributed_port,
deterministic_agents=deterministic_agents,
max_sampler_processes_per_worker=max_sampler_processes_per_worker,
**kwargs,
)
self.save_ckpt_after_every_pipeline_stage = save_ckpt_after_every_pipeline_stage
self.actor_critic.train()
self.training_pipeline: TrainingPipeline = config.training_pipeline()
if self.num_workers != 1:
# Ensure that we're only using early stopping criterions in the non-distributed setting.
if any(
stage.early_stopping_criterion is not None
for stage in self.training_pipeline.pipeline_stages
):
raise NotImplementedError(
"Early stopping criterions are currently only allowed when using a single training worker, i.e."
" no distributed (multi-GPU) training. If this is a feature you'd like please create an issue"
" at https://github.com/allenai/allenact/issues or (even better) create a pull request with this "
" feature and we'll be happy to review it."
)
self.optimizer: optim.optimizer.Optimizer = (
self.training_pipeline.optimizer_builder(
params=[p for p in self.actor_critic.parameters() if p.requires_grad]
)
)
# noinspection PyProtectedMember
self.lr_scheduler: Optional[_LRScheduler] = None
if self.training_pipeline.lr_scheduler_builder is not None:
self.lr_scheduler = self.training_pipeline.lr_scheduler_builder(
optimizer=self.optimizer
)
if self.is_distributed:
# Tracks how many workers have finished their rollout
self.num_workers_done = torch.distributed.PrefixStore( # type:ignore
"num_workers_done", self.store
)
# Tracks the number of steps taken by each worker in current rollout
self.num_workers_steps = torch.distributed.PrefixStore( # type:ignore
"num_workers_steps", self.store
)
self.distributed_preemption_threshold = distributed_preemption_threshold
# Flag for finished worker in current epoch
self.offpolicy_epoch_done = torch.distributed.PrefixStore( # type:ignore
"offpolicy_epoch_done", self.store
)
# Flag for finished worker in current epoch with custom component
self.insufficient_data_for_update = (
torch.distributed.PrefixStore( # type:ignore
"insufficient_data_for_update", self.store
)
)
else:
self.num_workers_done = None
self.num_workers_steps = None
self.distributed_preemption_threshold = 1.0
self.offpolicy_epoch_done = None
# Keeping track of training state
self.former_steps: Optional[int] = None
self.last_log: Optional[int] = None
self.last_save: Optional[int] = None
# The `self._last_aggregated_train_task_metrics` attribute defined
# below is used for early stopping criterion computations
self._last_aggregated_train_task_metrics: ScalarMeanTracker = (
ScalarMeanTracker()
)
self.first_local_worker_id = first_local_worker_id
self.save_ckpt_at_every_host = save_ckpt_at_every_host
def advance_seed(
self, seed: Optional[int], return_same_seed_per_worker=False
) -> Optional[int]:
if seed is None:
return seed
seed = (seed ^ (self.training_pipeline.total_steps + 1)) % (
2**31 - 1
) # same seed for all workers
if (not return_same_seed_per_worker) and (
self.mode == TRAIN_MODE_STR or self.mode == TEST_MODE_STR
):
return self.worker_seeds(self.num_workers, seed)[
self.worker_id
] # doesn't modify the current rng state
else:
return self.worker_seeds(1, seed)[0] # doesn't modify the current rng state
def deterministic_seeds(self) -> None:
if self.seed is not None:
set_seed(self.advance_seed(self.seed)) # known state for all workers
seeds = self.worker_seeds(
self.num_samplers, None
) # use the latest seed for workers and update rng state
if self.vector_tasks is not None:
self.vector_tasks.set_seeds(seeds)
def save_error_data(self, batch: Dict[str, Any]) -> str:
model_path = os.path.join(
self.checkpoints_dir,
"error_for_exp_{}__stage_{:02d}__steps_{:012d}.pt".format(
self.experiment_name,
self.training_pipeline.current_stage_index,
self.training_pipeline.total_steps,
),
)
with filelock.FileLock(
os.path.join(self.checkpoints_dir, "error.lock"), timeout=60
):
if not os.path.exists(model_path):
save_dict = {
"model_state_dict": self.actor_critic.state_dict(), # type:ignore
"total_steps": self.training_pipeline.total_steps, # Total steps including current stage
"optimizer_state_dict": self.optimizer.state_dict(), # type: ignore
"training_pipeline_state_dict": self.training_pipeline.state_dict(),
"trainer_seed": self.seed,
"batch": batch,
}
if self.lr_scheduler is not None:
save_dict["scheduler_state"] = cast(
_LRScheduler, self.lr_scheduler
).state_dict()
torch.save(save_dict, model_path)
return model_path
def aggregate_and_send_logging_package(
self,
tracking_info_list: List[TrackingInfo],
logging_pkg: Optional[LoggingPackage] = None,
send_logging_package: bool = True,
checkpoint_file_name: Optional[str] = None,
):
logging_pkg = super().aggregate_and_send_logging_package(
tracking_info_list=tracking_info_list,
logging_pkg=logging_pkg,
send_logging_package=send_logging_package,
checkpoint_file_name=checkpoint_file_name,
)
if self.mode == TRAIN_MODE_STR:
# Technically self.mode should always be "train" here (as this is the training engine),
# this conditional is defensive
self._last_aggregated_train_task_metrics.add_scalars(
scalars=logging_pkg.metrics_tracker.means(),
n=logging_pkg.metrics_tracker.counts(),
)
return logging_pkg
def checkpoint_save(self, pipeline_stage_index: Optional[int] = None) -> str:
model_path = os.path.join(
self.checkpoints_dir,
"exp_{}__stage_{:02d}__steps_{:012d}.pt".format(
self.experiment_name,
(
self.training_pipeline.current_stage_index
if pipeline_stage_index is None
else pipeline_stage_index
),
self.training_pipeline.total_steps,
),
)
save_dict = {
"model_state_dict": self.actor_critic.state_dict(), # type:ignore
"total_steps": self.training_pipeline.total_steps, # Total steps including current stage
"optimizer_state_dict": self.optimizer.state_dict(), # type: ignore
"training_pipeline_state_dict": self.training_pipeline.state_dict(),
"trainer_seed": self.seed,
}
if self.lr_scheduler is not None:
save_dict["scheduler_state"] = cast(
_LRScheduler, self.lr_scheduler
).state_dict()
torch.save(save_dict, model_path)
return model_path
def checkpoint_load(
self, ckpt: Union[str, Dict[str, Any]], restart_pipeline: bool = False
) -> Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]]:
if restart_pipeline:
if "training_pipeline_state_dict" in ckpt:
del ckpt["training_pipeline_state_dict"]
ckpt = super().checkpoint_load(ckpt, restart_pipeline=restart_pipeline)
if restart_pipeline:
self.training_pipeline.restart_pipeline()
else:
self.seed = cast(int, ckpt["trainer_seed"])
self.optimizer.load_state_dict(ckpt["optimizer_state_dict"]) # type: ignore
if self.lr_scheduler is not None and "scheduler_state" in ckpt:
self.lr_scheduler.load_state_dict(ckpt["scheduler_state"]) # type: ignore
self.deterministic_seeds()
return ckpt
@property
def step_count(self):
return self.training_pipeline.current_stage.steps_taken_in_stage
@step_count.setter
def step_count(self, val: int) -> None:
self.training_pipeline.current_stage.steps_taken_in_stage = val
@property
def log_interval(self):
return (
self.training_pipeline.current_stage.training_settings.metric_accumulate_interval
)
@property
def approx_steps(self):
if self.is_distributed:
# the actual number of steps gets synchronized after each rollout
return (
self.step_count - self.former_steps
) * self.num_workers + self.former_steps
else:
return self.step_count # this is actually accurate
def act(
self,
rollout_storage: RolloutStorage,
dist_wrapper_class: Optional[type] = None,
):
if self.training_pipeline.current_stage.teacher_forcing is not None:
assert dist_wrapper_class is None
def tracking_callback(type: TrackingInfoType, info: Dict[str, Any], n: int):
self.tracking_info_list.append(
TrackingInfo(
type=type,
info=info,
n=n,
storage_uuid=self.training_pipeline.rollout_storage_uuid,
stage_component_uuid=None,
)
)
dist_wrapper_class = partial(
TeacherForcingDistr,
action_space=self.actor_critic.action_space,
num_active_samplers=self.num_active_samplers,
approx_steps=self.approx_steps,
teacher_forcing=self.training_pipeline.current_stage.teacher_forcing,
tracking_callback=tracking_callback,
)
actions, actor_critic_output, memory, step_observation = super().act(
rollout_storage=rollout_storage,
dist_wrapper_class=dist_wrapper_class,
)
self.step_count += self.num_active_samplers
return actions, actor_critic_output, memory, step_observation
def advantage_stats(self, advantages: torch.Tensor) -> Dict[str, torch.Tensor]:
r"""Computes the mean and variances of advantages (possibly over multiple workers).
For multiple workers, this method is equivalent to first collecting all versions of
advantages and then computing the mean and variance locally over that.
# Parameters
advantages: Tensors to compute mean and variance over. Assumed to be solely the
worker's local copy of this tensor, the resultant mean and variance will be computed
as though _all_ workers' versions of this tensor were concatenated together in
distributed training.
"""
# Step count has already been updated with the steps from all workers
global_rollout_steps = self.step_count - self.former_steps
if self.is_distributed:
summed_advantages = advantages.sum()
dist.all_reduce(summed_advantages)
mean = summed_advantages / global_rollout_steps
summed_squares = (advantages - mean).pow(2).sum()
dist.all_reduce(summed_squares)
std = (summed_squares / (global_rollout_steps - 1)).sqrt()
else:
# noinspection PyArgumentList
mean, std = advantages.mean(), advantages.std()
return {"mean": mean, "std": std}
def backprop_step(
self,
total_loss: torch.Tensor,
max_grad_norm: float,
local_to_global_batch_size_ratio: float = 1.0,
):
self.optimizer.zero_grad() # type: ignore
if isinstance(total_loss, torch.Tensor):
total_loss.backward()
if self.is_distributed:
# From https://github.com/pytorch/pytorch/issues/43135
reductions, all_params = [], []
for p in self.actor_critic.parameters():
# you can also organize grads to larger buckets to make all_reduce more efficient
if p.requires_grad:
if p.grad is None:
p.grad = torch.zeros_like(p.data)
else: # local_global_batch_size_tuple is not None, since we're distributed:
p.grad = p.grad * local_to_global_batch_size_ratio
reductions.append(
dist.all_reduce(
p.grad,
async_op=True,
) # sum
) # synchronize
all_params.append(p)
for reduction, p in zip(reductions, all_params):
reduction.wait()
if hasattr(self.actor_critic, "compute_total_grad_norm"):
total_grad_norm = self.actor_critic.compute_total_grad_norm().item()
else:
total_grad_norm = 0.0
nn.utils.clip_grad_norm_(
self.actor_critic.parameters(),
max_norm=max_grad_norm, # type: ignore
)
self.optimizer.step() # type: ignore
return total_grad_norm
def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter(
self, pipeline_stage_index: Optional[int] = None
):
model_path = None
self.deterministic_seeds()
if (
self.save_ckpt_at_every_host
and self.worker_id == self.first_local_worker_id
) or self.worker_id == 0:
model_path = self.checkpoint_save(pipeline_stage_index=pipeline_stage_index)
if self.checkpoints_queue is not None:
self.checkpoints_queue.put(("eval", model_path))
self.last_save = self.training_pipeline.total_steps
return model_path
def run_pipeline(self, valid_on_initial_weights: bool = False):
cur_stage_training_settings = (
self.training_pipeline.current_stage.training_settings
)
# Change engine attributes that depend on the current stage
self.training_pipeline.current_stage.change_engine_attributes(self)
rollout_storage = self.training_pipeline.rollout_storage
uuid_to_storage = self.training_pipeline.current_stage_storage
self.initialize_storage_and_viz(
storage_to_initialize=cast(
List[ExperienceStorage], list(uuid_to_storage.values())
)
)
self.tracking_info_list.clear()
self.last_log = self.training_pipeline.total_steps
if self.last_save is None:
self.last_save = self.training_pipeline.total_steps
should_save_checkpoints = (
self.checkpoints_dir != ""
and cur_stage_training_settings.save_interval is not None
and cur_stage_training_settings.save_interval > 0
)
already_saved_checkpoint = False
if (
valid_on_initial_weights
and should_save_checkpoints
and self.checkpoints_queue is not None
):
if (
self.save_ckpt_at_every_host
and self.worker_id == self.first_local_worker_id
) or self.worker_id == 0:
model_path = self.checkpoint_save()
if self.checkpoints_queue is not None:
self.checkpoints_queue.put(("eval", model_path))
while True:
pipeline_stage_changed = self.training_pipeline.before_rollout(
train_metrics=self._last_aggregated_train_task_metrics
) # This is `False` at the very start of training, i.e. pipeline starts with a stage initialized
self._last_aggregated_train_task_metrics.reset()
training_is_complete = self.training_pipeline.current_stage is None
# `training_is_complete` should imply `pipeline_stage_changed`
assert pipeline_stage_changed or not training_is_complete
# Saving checkpoints and initializing storage when the pipeline stage changes
if pipeline_stage_changed:
# Here we handle saving a checkpoint after a pipeline stage ends. We
# do this:
# (1) after every pipeline stage if the `self.save_ckpt_after_every_pipeline_stage`
# boolean is True, and
# (2) when we have reached the end of ALL training (i.e. all stages are complete).
if (
should_save_checkpoints
and ( # Might happen if the `save_interval` was hit just previously, see below
not already_saved_checkpoint
)
and (
self.save_ckpt_after_every_pipeline_stage
or training_is_complete
)
):
self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter(
pipeline_stage_index=(
self.training_pipeline.current_stage_index - 1
if not training_is_complete
else len(self.training_pipeline.pipeline_stages) - 1
)
)
# If training is complete, break out
if training_is_complete:
break
# Here we handle updating our training settings after a pipeline stage ends.
# Update the training settings we're using
cur_stage_training_settings = (
self.training_pipeline.current_stage.training_settings
)
# If the pipeline stage changed we must initialize any new custom storage and
# stop updating any custom storage that is no longer in use (this second bit
# is done by simply updating `uuid_to_storage` to the new custom storage objects).
new_uuid_to_storage = self.training_pipeline.current_stage_storage
storage_to_initialize = [
s
for uuid, s in new_uuid_to_storage.items()
if uuid
not in uuid_to_storage # Don't initialize storage already in use
]
self.initialize_storage_and_viz(
storage_to_initialize=storage_to_initialize,
)
uuid_to_storage = new_uuid_to_storage
# Change engine attributes that depend on the current stage
self.training_pipeline.current_stage.change_engine_attributes(self)
already_saved_checkpoint = False
if self.is_distributed:
self.num_workers_done.set("done", str(0))
self.num_workers_steps.set("steps", str(0))
# Ensure all workers are done before incrementing num_workers_{steps, done}
dist.barrier(
device_ids=(
None
if self.device == torch.device("cpu")
else [self.device.index]
)
)
self.former_steps = self.step_count
former_storage_experiences = {
k: v.total_experiences
for k, v in self.training_pipeline.current_stage_storage.items()
}
if self.training_pipeline.rollout_storage_uuid is None:
# In this case we're not expecting to collect storage experiences, i.e. everything
# will be off-policy.
# self.step_count is normally updated by the `self.collect_step_across_all_task_samplers`
# call below, but since we're not collecting onpolicy experiences, we need to update
# it here. The step count here is now just effectively a count of the number of times
# we've called `compute_losses_track_them_and_backprop` below.
self.step_count += 1
before_update_info = dict(
next_value=None,
use_gae=cur_stage_training_settings.use_gae,
gamma=cur_stage_training_settings.gamma,
tau=cur_stage_training_settings.gae_lambda,
adv_stats_callback=self.advantage_stats,
)
else:
vector_tasks_already_restarted = False
step = -1
while step < cur_stage_training_settings.num_steps - 1:
step += 1
try:
num_paused = self.collect_step_across_all_task_samplers(
rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid,
uuid_to_storage=uuid_to_storage,
)
except (TimeoutError, EOFError) as e:
if (
not self.try_restart_after_task_error
) or self.mode != TRAIN_MODE_STR:
# Apparently you can just call `raise` here and doing so will just raise the exception as though
# it was not caught (so the stacktrace isn't messed up)
raise
elif vector_tasks_already_restarted:
raise RuntimeError(
f"[{self.mode} worker {self.worker_id}] `vector_tasks` has timed out twice in the same"
f" rollout. This suggests that this error was not recoverable. Timeout exception:\n{traceback.format_exc()}"
)
else:
get_logger().warning(
f"[{self.mode} worker {self.worker_id}] `vector_tasks` appears to have crashed during"
f" training due to an {type(e).__name__} error. You have set"
f" `try_restart_after_task_error` to `True` so we will attempt to restart these tasks from"
f" the beginning. USE THIS FEATURE AT YOUR OWN"
f" RISK. Exception:\n{traceback.format_exc()}."
)
self.vector_tasks.close()
self._vector_tasks = None
vector_tasks_already_restarted = True
for (
storage
) in self.training_pipeline.current_stage_storage.values():
storage.after_updates()
self.initialize_storage_and_viz(
storage_to_initialize=cast(
List[ExperienceStorage],
list(uuid_to_storage.values()),
)
)
step = -1
continue
# A more informative error message should already have been thrown in be given in
# `collect_step_across_all_task_samplers` if `num_paused != 0` here but this serves
# as a sanity check.
assert num_paused == 0
if self.is_distributed:
# Preempt stragglers
# Each worker will stop collecting steps for the current rollout whenever a
# 100 * distributed_preemption_threshold percentage of workers are finished collecting their
# rollout steps, and we have collected at least 25% but less than 90% of the steps.
num_done = int(self.num_workers_done.get("done"))
if (
num_done
> self.distributed_preemption_threshold * self.num_workers
and 0.25 * cur_stage_training_settings.num_steps
<= step
< 0.9 * cur_stage_training_settings.num_steps
):
get_logger().debug(
f"[{self.mode} worker {self.worker_id}] Preempted after {step}"
f" steps (out of {cur_stage_training_settings.num_steps})"
f" with {num_done} workers done"
)
break
with torch.no_grad():
actor_critic_output, _ = self.actor_critic(
**rollout_storage.agent_input_for_next_step()
)
self.training_pipeline.rollout_count += 1
if self.is_distributed:
# Mark that a worker is done collecting experience
self.num_workers_done.add("done", 1)
self.num_workers_steps.add(
"steps", self.step_count - self.former_steps
)
# Ensure all workers are done before updating step counter
dist.barrier(
device_ids=(
None
if self.device == torch.device("cpu")
else [self.device.index]
)
)
ndone = int(self.num_workers_done.get("done"))
assert (
ndone == self.num_workers
), f"# workers done {ndone} != # workers {self.num_workers}"
# get the actual step_count
self.step_count = (
int(self.num_workers_steps.get("steps")) + self.former_steps
)
before_update_info = dict(
next_value=actor_critic_output.values.detach(),
use_gae=cur_stage_training_settings.use_gae,
gamma=cur_stage_training_settings.gamma,
tau=cur_stage_training_settings.gae_lambda,
adv_stats_callback=self.advantage_stats,
)
# Prepare storage for iteration during updates
for storage in self.training_pipeline.current_stage_storage.values():
storage.before_updates(**before_update_info)
for sc in self.training_pipeline.current_stage.stage_components:
component_storage = uuid_to_storage[sc.storage_uuid]
self.compute_losses_track_them_and_backprop(
stage=self.training_pipeline.current_stage,
stage_component=sc,
storage=component_storage,
)
for storage in self.training_pipeline.current_stage_storage.values():
storage.after_updates()
# We update the storage step counts saved in
# `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` here rather than with
# `self.steps` above because some storage step counts may only change after the update calls above.
# This may seem a bit weird but consider a storage that corresponds to a fixed dataset
# used for imitation learning. For such a dataset, the "steps" will only increase as
# new batches are sampled during update calls.
# Note: We don't need to sort the keys below to ensure that distributed updates happen correctly
# as `self.training_pipeline.current_stage_storage` is an ordered `dict`.
# First we calculate the change in counts (possibly aggregating across devices)
change_in_storage_experiences = {}
for k in sorted(self.training_pipeline.current_stage_storage.keys()):
delta = (
self.training_pipeline.current_stage_storage[k].total_experiences
- former_storage_experiences[k]
)
assert delta >= 0
change_in_storage_experiences[k] = self.distributed_weighted_sum(
to_share=delta, weight=1
)
# Then we update `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` with the above
# computed changes.
for storage_uuid, delta in change_in_storage_experiences.items():
self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage[
storage_uuid
] += delta
if self.lr_scheduler is not None:
self.lr_scheduler.step(epoch=self.training_pipeline.total_steps)
# Here we handle saving a checkpoint every `save_interval` steps, saving after
# a pipeline stage completes is controlled above
checkpoint_file_name = None
if should_save_checkpoints and (
self.training_pipeline.total_steps - self.last_save
>= cur_stage_training_settings.save_interval
):
checkpoint_file_name = (
self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter()
)
already_saved_checkpoint = True
if (
self.training_pipeline.total_steps - self.last_log >= self.log_interval
or self.training_pipeline.current_stage.is_complete
):
self.aggregate_and_send_logging_package(
tracking_info_list=self.tracking_info_list,
checkpoint_file_name=checkpoint_file_name,
)
self.tracking_info_list.clear()
self.last_log = self.training_pipeline.total_steps
if (
cur_stage_training_settings.advance_scene_rollout_period is not None
) and (
self.training_pipeline.rollout_count
% cur_stage_training_settings.advance_scene_rollout_period
== 0
):
get_logger().info(
f"[{self.mode} worker {self.worker_id}] Force advance"
f" tasks with {self.training_pipeline.rollout_count} rollouts"
)
self.vector_tasks.next_task(force_advance_scene=True)
self.initialize_storage_and_viz(
storage_to_initialize=cast(
List[ExperienceStorage], list(uuid_to_storage.values())
)
)
def train(
self,
checkpoint_file_name: Optional[str] = None,
restart_pipeline: bool = False,
valid_on_initial_weights: bool = False,
):
assert (
self.mode == TRAIN_MODE_STR
), "train only to be called from a train instance"
training_completed_successfully = False
# noinspection PyBroadException
try:
if checkpoint_file_name is not None:
self.checkpoint_load(checkpoint_file_name, restart_pipeline)
self.run_pipeline(valid_on_initial_weights=valid_on_initial_weights)
training_completed_successfully = True
except KeyboardInterrupt:
get_logger().info(
f"[{self.mode} worker {self.worker_id}] KeyboardInterrupt, exiting."
)
except Exception as e:
get_logger().error(
f"[{self.mode} worker {self.worker_id}] Encountered {type(e).__name__}, exiting."
)
get_logger().error(traceback.format_exc())
finally:
if training_completed_successfully:
if self.worker_id == 0:
self.results_queue.put(("train_stopped", 0))
get_logger().info(
f"[{self.mode} worker {self.worker_id}] Training finished successfully."
)
else:
self.results_queue.put(("train_stopped", 1 + self.worker_id))
self.close()
class OnPolicyInference(OnPolicyRLEngine):
def __init__(
self,
config: ExperimentConfig,
results_queue: mp.Queue, # to output aggregated results
checkpoints_queue: mp.Queue, # to write/read (trainer/evaluator) ready checkpoints
checkpoints_dir: str = "",
mode: str = "valid", # or "test"
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
mp_ctx: Optional[BaseContext] = None,
device: Union[str, torch.device, int] = "cpu",
deterministic_agents: bool = False,
worker_id: int = 0,
num_workers: int = 1,
distributed_port: int = 0,
enforce_expert: bool = False,
**kwargs,
):
super().__init__(
experiment_name="",
config=config,
results_queue=results_queue,
checkpoints_queue=checkpoints_queue,
checkpoints_dir=checkpoints_dir,
mode=mode,
seed=seed,
deterministic_cudnn=deterministic_cudnn,
mp_ctx=mp_ctx,
deterministic_agents=deterministic_agents,
device=device,
worker_id=worker_id,
num_workers=num_workers,
distributed_port=distributed_port,
**kwargs,
)
self.enforce_expert = enforce_expert
def run_eval(
self,
checkpoint_file_path: str,
rollout_steps: int = 100,
visualizer: Optional[VizSuite] = None,
update_secs: float = 20.0,
verbose: bool = False,
) -> LoggingPackage:
assert self.actor_critic is not None, "called `run_eval` with no actor_critic"
# Sanity check that we haven't entered an invalid state. During eval the training_pipeline
# should be only set in this function and always unset at the end of it.
assert self.training_pipeline is None, (
"`training_pipeline` should be `None` before calling `run_eval`."
" This is necessary as we want to initialize new storages."
)
self.training_pipeline = self.config.training_pipeline()
ckpt = self.checkpoint_load(checkpoint_file_path, restart_pipeline=False)
total_steps = cast(int, ckpt["total_steps"])
eval_pipeline_stage = cast(
PipelineStage,
getattr(self.training_pipeline, f"{self.mode}_pipeline_stage"),
)
assert (
len(eval_pipeline_stage.stage_components) <= 1
), "Only one StageComponent is supported during inference."
uuid_to_storage = self.training_pipeline.get_stage_storage(eval_pipeline_stage)
assert len(uuid_to_storage) > 0, (
"No storage found for eval pipeline stage, this is a bug in AllenAct,"
" please submit an issue on GitHub (https://github.com/allenai/allenact/issues)."
)
uuid_to_rollout_storage = {
uuid: storage
for uuid, storage in uuid_to_storage.items()
if isinstance(storage, RolloutStorage)
}
uuid_to_non_rollout_storage = {
uuid: storage
for uuid, storage in uuid_to_storage.items()
if not isinstance(storage, RolloutStorage)
}
if len(uuid_to_rollout_storage) > 1 or len(uuid_to_non_rollout_storage) > 1:
raise NotImplementedError(
"Only one RolloutStorage and non-RolloutStorage object is allowed within an evaluation pipeline stage."
" If you'd like to evaluate against multiple storages please"
" submit an issue on GitHub (https://github.com/allenai/allenact/issues). For the moment you'll need"
" to evaluate against these storages separately."
)
rollout_storage = self.training_pipeline.rollout_storage
if visualizer is not None:
assert visualizer.empty()
num_paused = self.initialize_storage_and_viz(
storage_to_initialize=cast(
List[ExperienceStorage], list(uuid_to_storage.values())
),
visualizer=visualizer,
)
assert num_paused == 0, f"{num_paused} tasks paused when initializing eval"
if rollout_storage is not None:
num_tasks = sum(
self.vector_tasks.command(
"sampler_attr", ["length"] * self.num_active_samplers
)
) + ( # We need to add this as the first tasks have already been sampled
self.num_active_samplers
)
else:
num_tasks = 0
# get_logger().debug("worker {self.worker_id} number of tasks {num_tasks}")
steps = 0
self.actor_critic.eval()
last_time: float = time.time()
init_time: float = last_time
frames: int = 0
if verbose:
get_logger().info(
f"[{self.mode} worker {self.worker_id}] Running evaluation on {num_tasks} tasks"
f" for ckpt {checkpoint_file_path}"
)
if self.enforce_expert:
dist_wrapper_class = partial(
TeacherForcingDistr,
action_space=self.actor_critic.action_space,
num_active_samplers=None,
approx_steps=None,
teacher_forcing=None,
tracking_callback=None,
always_enforce=True,
)
else:
dist_wrapper_class = None
logging_pkg = LoggingPackage(
mode=self.mode,
training_steps=total_steps,
storage_uuid_to_total_experiences=self.training_pipeline.storage_uuid_to_total_experiences,
)
should_compute_onpolicy_losses = (
len(eval_pipeline_stage.loss_names) > 0
and eval_pipeline_stage.stage_components[0].storage_uuid
== self.training_pipeline.rollout_storage_uuid
)
while self.num_active_samplers > 0:
frames += self.num_active_samplers
num_newly_paused = self.collect_step_across_all_task_samplers(
rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid,
uuid_to_storage=uuid_to_rollout_storage,
visualizer=visualizer,
dist_wrapper_class=dist_wrapper_class,
)
steps += 1
if should_compute_onpolicy_losses and num_newly_paused > 0:
# The `collect_step_across_all_task_samplers` method will automatically drop
# parts of the rollout storage that correspond to paused tasks (namely by calling"
# `rollout_storage.sampler_select(UNPAUSED_TASK_INDS)`). This makes sense when you don't need to
# compute losses for tasks but is a bit limiting here as we're throwing away data before
# using it to compute losses. As changing this is non-trivial we'll just warn the user
# for now.
get_logger().warning(
f"[{self.mode} worker {self.worker_id}] {num_newly_paused * rollout_storage.step} steps"
f" will be dropped when computing losses in evaluation. This is a limitation of the current"
f" implementation of rollout collection in AllenAct. If you'd like to see this"
f" functionality improved please submit an issue on GitHub"
f" (https://github.com/allenai/allenact/issues)."
)
if self.num_active_samplers == 0 or steps % rollout_steps == 0:
if should_compute_onpolicy_losses and self.num_active_samplers > 0:
with torch.no_grad():
actor_critic_output, _ = self.actor_critic(
**rollout_storage.agent_input_for_next_step()
)
before_update_info = dict(
next_value=actor_critic_output.values.detach(),
use_gae=eval_pipeline_stage.training_settings.use_gae,
gamma=eval_pipeline_stage.training_settings.gamma,
tau=eval_pipeline_stage.training_settings.gae_lambda,
adv_stats_callback=lambda advantages: {
"mean": advantages.mean(),
"std": advantages.std(),
},
)
# Prepare storage for iteration during loss computation
for storage in uuid_to_rollout_storage.values():
storage.before_updates(**before_update_info)
# Compute losses
with torch.no_grad():
for sc in eval_pipeline_stage.stage_components:
self.compute_losses_track_them_and_backprop(
stage=eval_pipeline_stage,
stage_component=sc,
storage=uuid_to_rollout_storage[sc.storage_uuid],
skip_backprop=True,
)
for storage in uuid_to_rollout_storage.values():
storage.after_updates()
cur_time = time.time()
if self.num_active_samplers == 0 or cur_time - last_time >= update_secs:
logging_pkg = self.aggregate_and_send_logging_package(
tracking_info_list=self.tracking_info_list,
logging_pkg=logging_pkg,
send_logging_package=False,
)
self.tracking_info_list.clear()
if verbose:
npending: int
lengths: List[int]
if self.num_active_samplers > 0:
lengths = self.vector_tasks.command(
"sampler_attr",
["length"] * self.num_active_samplers,
)
npending = sum(lengths)
else:
lengths = []
npending = 0
est_time_to_complete = (
"{:.2f}".format(
(
(cur_time - init_time)
* (npending / (num_tasks - npending))
/ 60
)
)
if npending != num_tasks
else "???"
)
get_logger().info(
f"[{self.mode} worker {self.worker_id}]"
f" For ckpt {checkpoint_file_path}"
f" {frames / (cur_time - init_time):.1f} fps,"
f" {npending}/{num_tasks} tasks pending ({lengths})."
f" ~{est_time_to_complete} min. to complete."
)
if logging_pkg.num_non_empty_metrics_dicts_added != 0:
get_logger().info(
", ".join(
[
f"[{self.mode} worker {self.worker_id}]"
f" num_{self.mode}_tasks_complete {logging_pkg.num_non_empty_metrics_dicts_added}",
*[
f"{k} {v:.3g}"
for k, v in logging_pkg.metrics_tracker.means().items()
],
*[
f"{k0[1]}/{k1} {v1:.3g}"
for k0, v0 in logging_pkg.info_trackers.items()
for k1, v1 in v0.means().items()
],
]
)
)
last_time = cur_time
get_logger().info(
f"[{self.mode} worker {self.worker_id}] Task evaluation complete, all task samplers paused."
)
if rollout_storage is not None:
self.vector_tasks.resume_all()
self.vector_tasks.set_seeds(self.worker_seeds(self.num_samplers, self.seed))
self.vector_tasks.reset_all()
logging_pkg = self.aggregate_and_send_logging_package(
tracking_info_list=self.tracking_info_list,
logging_pkg=logging_pkg,
send_logging_package=False,
)
self.tracking_info_list.clear()
logging_pkg.viz_data = (
visualizer.read_and_reset() if visualizer is not None else None
)
should_compute_offpolicy_losses = (
len(eval_pipeline_stage.loss_names) > 0
and not should_compute_onpolicy_losses
)
if should_compute_offpolicy_losses:
# In this case we are evaluating a non-rollout storage, e.g. some off-policy data
get_logger().info(
f"[{self.mode} worker {self.worker_id}] Non-rollout storage detected, will now compute losses"
f" using this storage."
)
offpolicy_eval_done = False
while not offpolicy_eval_done:
before_update_info = dict(
next_value=None,
use_gae=eval_pipeline_stage.training_settings.use_gae,
gamma=eval_pipeline_stage.training_settings.gamma,
tau=eval_pipeline_stage.training_settings.gae_lambda,
adv_stats_callback=lambda advantages: {
"mean": advantages.mean(),
"std": advantages.std(),
},
)
# Prepare storage for iteration during loss computation
for storage in uuid_to_non_rollout_storage.values():
storage.before_updates(**before_update_info)
# Compute losses
assert len(eval_pipeline_stage.stage_components) == 1
try:
for sc in eval_pipeline_stage.stage_components:
with torch.no_grad():
self.compute_losses_track_them_and_backprop(
stage=eval_pipeline_stage,
stage_component=sc,
storage=uuid_to_non_rollout_storage[sc.storage_uuid],
skip_backprop=True,
)
except EOFError:
offpolicy_eval_done = True
for storage in uuid_to_non_rollout_storage.values():
storage.after_updates()
total_bsize = sum(
tif.info.get("worker_batch_size", 0)
for tif in self.tracking_info_list
)
logging_pkg = self.aggregate_and_send_logging_package(
tracking_info_list=self.tracking_info_list,
logging_pkg=logging_pkg,
send_logging_package=False,
)
self.tracking_info_list.clear()
cur_time = time.time()
if verbose and (cur_time - last_time >= update_secs):
get_logger().info(
f"[{self.mode} worker {self.worker_id}]"
f" For ckpt {checkpoint_file_path}"
f" {total_bsize / (cur_time - init_time):.1f} its/sec."
)
if logging_pkg.info_trackers != 0:
get_logger().info(
", ".join(
[
f"[{self.mode} worker {self.worker_id}]"
f" num_{self.mode}_iters_complete {total_bsize}",
*[
f"{'/'.join(k0)}/{k1} {v1:.3g}"
for k0, v0 in logging_pkg.info_trackers.items()
for k1, v1 in v0.means().items()
],
]
)
)
last_time = cur_time
# Call after_updates here to reset all storages
for storage in uuid_to_storage.values():
storage.after_updates()
# Set the training pipeline to `None` so that the storages do not
# persist across calls to `run_eval`
self.training_pipeline = None
logging_pkg.checkpoint_file_name = checkpoint_file_path
return logging_pkg
@staticmethod
def skip_to_latest(checkpoints_queue: mp.Queue, command: Optional[str], data):
assert (
checkpoints_queue is not None
), "Attempting to process checkpoints queue but this queue is `None`."
cond = True
while cond:
sentinel = ("skip.AUTO.sentinel", time.time())
checkpoints_queue.put(
sentinel
) # valid since a single valid process is the only consumer
forwarded = False
while not forwarded:
new_command: Optional[str]
new_data: Any
(
new_command,
new_data,
) = checkpoints_queue.get() # block until next command arrives
if new_command == command:
data = new_data
elif new_command == sentinel[0]:
assert (
new_data == sentinel[1]
), f"Wrong sentinel found: {new_data} vs {sentinel[1]}"
forwarded = True
else:
raise ValueError(
f"Unexpected command {new_command} with data {new_data}"
)
time.sleep(1)
cond = not checkpoints_queue.empty()
return data
def process_checkpoints(self):
assert (
self.mode != TRAIN_MODE_STR
), "process_checkpoints only to be called from a valid or test instance"
assert (
self.checkpoints_queue is not None
), "Attempting to process checkpoints queue but this queue is `None`."
visualizer: Optional[VizSuite] = None
finalized = False
# noinspection PyBroadException
try:
while True:
command: Optional[str]
ckp_file_path: Any
(
command,
ckp_file_path,
) = self.checkpoints_queue.get() # block until first command arrives
# get_logger().debug(
# "{} {} command {} data {}".format(
# self.mode, self.worker_id, command, data
# )
# )
if command == "eval":
if self.mode == VALID_MODE_STR:
# skip to latest using
# 1. there's only consumer in valid
# 2. there's no quit/exit/close message issued by runner nor trainer
ckp_file_path = self.skip_to_latest(
checkpoints_queue=self.checkpoints_queue,
command=command,
data=ckp_file_path,
)
if (
visualizer is None
and self.machine_params.visualizer is not None
):
visualizer = self.machine_params.visualizer
eval_package = self.run_eval(
checkpoint_file_path=ckp_file_path,
visualizer=visualizer,
verbose=True,
update_secs=20 if self.mode == TEST_MODE_STR else 5 * 60,
)
self.results_queue.put(eval_package)
if self.is_distributed:
dist.barrier()
elif command in ["quit", "exit", "close"]:
finalized = True
break
else:
raise NotImplementedError()
except KeyboardInterrupt:
get_logger().info(
f"[{self.mode} worker {self.worker_id}] KeyboardInterrupt, exiting."
)
except Exception as e:
get_logger().error(
f"[{self.mode} worker {self.worker_id}] Encountered {type(e).__name__}, exiting."
)
get_logger().error(traceback.format_exc())
finally:
if finalized:
if self.mode == TEST_MODE_STR:
self.results_queue.put(("test_stopped", 0))
get_logger().info(
f"[{self.mode} worker {self.worker_id}] Complete, all checkpoints processed."
)
else:
if self.mode == TEST_MODE_STR:
self.results_queue.put(("test_stopped", self.worker_id + 1))
self.close(verbose=self.mode == TEST_MODE_STR)
================================================
FILE: allenact/algorithms/onpolicy_sync/losses/__init__.py
================================================
from .a2cacktr import A2C, ACKTR, A2CACKTR
from .ppo import PPO
================================================
FILE: allenact/algorithms/onpolicy_sync/losses/a2cacktr.py
================================================
"""Implementation of A2C and ACKTR losses."""
from typing import cast, Tuple, Dict, Optional
import torch
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.utils.system import get_logger
class A2CACKTR(AbstractActorCriticLoss):
"""Class implementing A2C and ACKTR losses.
# Attributes
acktr : `True` if should use ACKTR loss (currently not supported), otherwise uses A2C loss.
value_loss_coef : Weight of value loss.
entropy_coef : Weight of entropy (encouraging) loss.
entropy_method_name : Name of Distr's entropy method name. Default is `entropy`,
but we might use `conditional_entropy` for `SequentialDistr`.
"""
def __init__(
self,
value_loss_coef,
entropy_coef,
acktr=False,
entropy_method_name: str = "entropy",
*args,
**kwargs,
):
"""Initializer.
See class documentation for parameter definitions.
"""
super().__init__(*args, **kwargs)
self.acktr = acktr
self.loss_key = "a2c_total" if not acktr else "aktr_total"
self.value_loss_coef = value_loss_coef
self.entropy_coef = entropy_coef
self.entropy_method_name = entropy_method_name
def loss_per_step( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
) -> Dict[str, Tuple[torch.Tensor, Optional[float]]]:
actions = cast(torch.LongTensor, batch["actions"])
values = actor_critic_output.values
action_log_probs = actor_critic_output.distributions.log_prob(actions)
action_log_probs = action_log_probs.view(
action_log_probs.shape
+ (1,)
* (
len(cast(torch.Tensor, batch["adv_targ"]).shape)
- len(action_log_probs.shape)
)
)
dist_entropy: torch.FloatTensor = getattr(
actor_critic_output.distributions, self.entropy_method_name
)()
dist_entropy = dist_entropy.view(
dist_entropy.shape
+ ((1,) * (len(action_log_probs.shape) - len(dist_entropy.shape)))
)
value_loss = 0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow(2)
# TODO: Decided not to use normalized advantages here,
# is this correct? (it's how it's done in Kostrikov's)
action_loss = -(
cast(torch.FloatTensor, batch["adv_targ"]).detach() * action_log_probs
)
if self.acktr:
# TODO: Currently acktr doesn't really work because of this natural gradient stuff
# that we should figure out how to integrate properly.
get_logger().warning("acktr is only partially supported.")
return {
"value": (value_loss, self.value_loss_coef),
"action": (action_loss, None),
"entropy": (dist_entropy.mul_(-1.0), self.entropy_coef), # type: ignore
}
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
*args,
**kwargs,
):
losses_per_step = self.loss_per_step(
step_count=step_count,
batch=batch,
actor_critic_output=actor_critic_output,
)
losses = {
key: (loss.mean(), weight)
for (key, (loss, weight)) in losses_per_step.items()
}
total_loss = cast(
torch.Tensor,
sum(
loss * weight if weight is not None else loss
for loss, weight in losses.values()
),
)
return (
total_loss,
{
self.loss_key: total_loss.item(),
**{key: loss.item() for key, (loss, _) in losses.items()},
},
)
class A2C(A2CACKTR):
"""A2C Loss."""
def __init__(
self,
value_loss_coef,
entropy_coef,
entropy_method_name: str = "entropy",
*args,
**kwargs,
):
super().__init__(
value_loss_coef=value_loss_coef,
entropy_coef=entropy_coef,
acktr=False,
entropy_method_name=entropy_method_name,
*args,
**kwargs,
)
class ACKTR(A2CACKTR):
"""ACKTR Loss.
This code is not supported as it currently lacks an implementation
for recurrent models.
"""
def __init__(
self,
value_loss_coef,
entropy_coef,
entropy_method_name: str = "entropy",
*args,
**kwargs,
):
super().__init__(
value_loss_coef=value_loss_coef,
entropy_coef=entropy_coef,
acktr=True,
entropy_method_name=entropy_method_name,
*args,
**kwargs,
)
A2CConfig = dict(
value_loss_coef=0.5,
entropy_coef=0.01,
)
================================================
FILE: allenact/algorithms/onpolicy_sync/losses/abstract_loss.py
================================================
"""Defining abstract loss classes for actor critic models."""
import abc
from typing import Dict, Tuple, Union
import torch
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import Loss, ActorCriticOutput
class AbstractActorCriticLoss(Loss):
"""Abstract class representing a loss function used to train an
ActorCriticModel."""
# noinspection PyMethodOverriding
@abc.abstractmethod
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
*args,
**kwargs,
) -> Union[
Tuple[torch.FloatTensor, Dict[str, float]],
Tuple[torch.FloatTensor, Dict[str, float], Dict[str, float]],
]:
"""Computes the loss.
# Parameters
batch : A batch of data corresponding to the information collected when rolling out (possibly many) agents
over a fixed number of steps. In particular this batch should have the same format as that returned by
`RolloutStorage.batched_experience_generator`.
actor_critic_output : The output of calling an ActorCriticModel on the observations in `batch`.
args : Extra args.
kwargs : Extra kwargs.
# Returns
A (0-dimensional) torch.FloatTensor corresponding to the computed loss. `.backward()` will be called on this
tensor in order to compute a gradient update to the ActorCriticModel's parameters.
A Dict[str, float] with scalar values corresponding to sub-losses.
An optional Dict[str, float] with scalar values corresponding to extra info to be processed per epoch and
combined across epochs by the engine.
"""
# TODO: The above documentation is missing what the batch dimensions are.
raise NotImplementedError()
================================================
FILE: allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.py
================================================
import functools
from typing import Dict, cast, Sequence, Set
import torch
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
class GroupedActionImitation(AbstractActorCriticLoss):
def __init__(
self, nactions: int, action_groups: Sequence[Set[int]], *args, **kwargs
):
super().__init__(*args, **kwargs)
assert (
sum(len(ag) for ag in action_groups) == nactions
and len(functools.reduce(lambda x, y: x | y, action_groups)) == nactions
), f"`action_groups` (==`{action_groups}`) must be a partition of `[0, 1, 2, ..., nactions - 1]`"
self.nactions = nactions
self.action_groups_mask = torch.FloatTensor(
[
[i in action_group for i in range(nactions)]
for action_group in action_groups
]
+ [[1] * nactions] # type:ignore
)
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
*args,
**kwargs,
):
observations = cast(Dict[str, torch.Tensor], batch["observations"])
assert "expert_group_action" in observations
expert_group_actions = observations["expert_group_action"]
# expert_group_actions = expert_group_actions + (expert_group_actions == -1).long() * (
# 1 + self.action_groups_mask.shape[0]
# )
if self.action_groups_mask.get_device() != expert_group_actions.get_device():
self.action_groups_mask = cast(
torch.FloatTensor,
self.action_groups_mask.cuda(expert_group_actions.get_device()),
)
expert_group_actions_reshaped = expert_group_actions.view(-1, 1)
expert_group_actions_mask = self.action_groups_mask[
expert_group_actions_reshaped
]
probs_tensor = actor_critic_output.distributions.probs_tensor
expert_group_actions_mask = expert_group_actions_mask.view(probs_tensor.shape)
total_loss = -(
torch.log((probs_tensor * expert_group_actions_mask).sum(-1))
).mean()
return total_loss, {
"grouped_action_cross_entropy": total_loss.item(),
}
================================================
FILE: allenact/algorithms/onpolicy_sync/losses/imitation.py
================================================
"""Defining imitation losses for actor critic type models."""
from collections import OrderedDict
from typing import Dict, cast, Optional, Union
import torch
import allenact.utils.spaces_utils as su
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
ObservationType,
)
from allenact.base_abstractions.distributions import (
Distr,
CategoricalDistr,
SequentialDistr,
ConditionalDistr,
)
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.base_abstractions.sensor import AbstractExpertSensor
class Imitation(AbstractActorCriticLoss):
"""Expert imitation loss."""
def __init__(
self, expert_sensor: Optional[AbstractExpertSensor] = None, *args, **kwargs
):
super().__init__(*args, **kwargs)
self.expert_sensor = expert_sensor
@staticmethod
def group_loss(
distribution: Union[CategoricalDistr, ConditionalDistr],
expert_actions: torch.Tensor,
expert_actions_masks: torch.Tensor,
):
assert isinstance(distribution, CategoricalDistr) or (
isinstance(distribution, ConditionalDistr)
and isinstance(distribution.distr, CategoricalDistr)
), "This implementation only supports (groups of) `CategoricalDistr`"
expert_successes = expert_actions_masks.sum()
log_probs = distribution.log_prob(cast(torch.LongTensor, expert_actions))
assert (
log_probs.shape[: len(expert_actions_masks.shape)]
== expert_actions_masks.shape
)
# Add dimensions to `expert_actions_masks` on the right to allow for masking
# if necessary.
len_diff = len(log_probs.shape) - len(expert_actions_masks.shape)
assert len_diff >= 0
expert_actions_masks = expert_actions_masks.view(
*expert_actions_masks.shape, *((1,) * len_diff)
)
group_loss = -(expert_actions_masks * log_probs).sum() / torch.clamp(
expert_successes, min=1
)
return group_loss, expert_successes
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[Distr],
*args,
**kwargs,
):
"""Computes the imitation loss.
# Parameters
batch : A batch of data corresponding to the information collected when rolling out (possibly many) agents
over a fixed number of steps. In particular this batch should have the same format as that returned by
`RolloutStorage.batched_experience_generator`.
Here `batch["observations"]` must contain `"expert_action"` observations
or `"expert_policy"` observations. See `ExpertActionSensor` (or `ExpertPolicySensor`) for an example of
a sensor producing such observations.
actor_critic_output : The output of calling an ActorCriticModel on the observations in `batch`.
args : Extra args. Ignored.
kwargs : Extra kwargs. Ignored.
# Returns
A (0-dimensional) torch.FloatTensor corresponding to the computed loss. `.backward()` will be called on this
tensor in order to compute a gradient update to the ActorCriticModel's parameters.
"""
observations = cast(Dict[str, torch.Tensor], batch["observations"])
losses = OrderedDict()
should_report_loss = False
if "expert_action" in observations:
if self.expert_sensor is None or not self.expert_sensor.use_groups:
expert_actions_and_mask = observations["expert_action"]
assert expert_actions_and_mask.shape[-1] == 2
expert_actions_and_mask_reshaped = expert_actions_and_mask.view(-1, 2)
expert_actions = expert_actions_and_mask_reshaped[:, 0].view(
*expert_actions_and_mask.shape[:-1], 1
)
expert_actions_masks = (
expert_actions_and_mask_reshaped[:, 1]
.float()
.view(*expert_actions_and_mask.shape[:-1], 1)
)
total_loss, expert_successes = self.group_loss(
cast(CategoricalDistr, actor_critic_output.distributions),
expert_actions,
expert_actions_masks,
)
should_report_loss = expert_successes.item() != 0
else:
expert_actions = su.unflatten(
self.expert_sensor.observation_space, observations["expert_action"]
)
total_loss = 0
ready_actions = OrderedDict()
for group_name, cd in zip(
self.expert_sensor.group_spaces,
cast(
SequentialDistr, actor_critic_output.distributions
).conditional_distrs,
):
assert group_name == cd.action_group_name
cd.reset()
cd.condition_on_input(**ready_actions)
expert_action = expert_actions[group_name][
AbstractExpertSensor.ACTION_POLICY_LABEL
]
expert_action_masks = expert_actions[group_name][
AbstractExpertSensor.EXPERT_SUCCESS_LABEL
]
ready_actions[group_name] = expert_action
current_loss, expert_successes = self.group_loss(
cd,
expert_action,
expert_action_masks,
)
should_report_loss = (
expert_successes.item() != 0 or should_report_loss
)
cd.reset()
if expert_successes.item() != 0:
losses[group_name + "_cross_entropy"] = current_loss.item()
total_loss = total_loss + current_loss
elif "expert_policy" in observations:
if self.expert_sensor is None or not self.expert_sensor.use_groups:
assert isinstance(
actor_critic_output.distributions, CategoricalDistr
), "This implementation currently only supports `CategoricalDistr`"
expert_policies = cast(Dict[str, torch.Tensor], batch["observations"])[
"expert_policy"
][..., :-1]
expert_actions_masks = cast(
Dict[str, torch.Tensor], batch["observations"]
)["expert_policy"][..., -1:]
expert_successes = expert_actions_masks.sum()
if expert_successes.item() > 0:
should_report_loss = True
log_probs = cast(
CategoricalDistr, actor_critic_output.distributions
).log_probs_tensor
# Add dimensions to `expert_actions_masks` on the right to allow for masking
# if necessary.
len_diff = len(log_probs.shape) - len(expert_actions_masks.shape)
assert len_diff >= 0
expert_actions_masks = expert_actions_masks.view(
*expert_actions_masks.shape, *((1,) * len_diff)
)
total_loss = (
-(log_probs * expert_policies) * expert_actions_masks
).sum() / torch.clamp(expert_successes, min=1)
else:
raise NotImplementedError(
"This implementation currently only supports `CategoricalDistr`"
)
else:
raise NotImplementedError(
"Imitation loss requires either `expert_action` or `expert_policy`"
" sensor to be active."
)
return (
total_loss,
(
{"expert_cross_entropy": total_loss.item(), **losses}
if should_report_loss
else {}
),
)
================================================
FILE: allenact/algorithms/onpolicy_sync/losses/ppo.py
================================================
"""Defining the PPO loss for actor critic type models."""
from typing import Dict, Optional, Callable, cast, Tuple
import torch
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
class PPO(AbstractActorCriticLoss):
"""Implementation of the Proximal Policy Optimization loss.
# Attributes
clip_param : The clipping parameter to use.
value_loss_coef : Weight of the value loss.
entropy_coef : Weight of the entropy (encouraging) loss.
use_clipped_value_loss : Whether or not to also clip the value loss.
clip_decay : Callable for clip param decay factor (function of the current number of steps)
entropy_method_name : Name of Distr's entropy method name. Default is `entropy`,
but we might use `conditional_entropy` for `SequentialDistr`
show_ratios : If True, adds tracking for the PPO ratio (linear, clamped, and used) in each
epoch to be logged by the engine.
normalize_advantage: Whether or not to use normalized advantage. Default is True.
"""
def __init__(
self,
clip_param: float,
value_loss_coef: float,
entropy_coef: float,
use_clipped_value_loss=True,
clip_decay: Optional[Callable[[int], float]] = None,
entropy_method_name: str = "entropy",
normalize_advantage: bool = True,
show_ratios: bool = False,
*args,
**kwargs
):
"""Initializer.
See the class documentation for parameter definitions.
"""
super().__init__(*args, **kwargs)
self.clip_param = clip_param
self.value_loss_coef = value_loss_coef
self.entropy_coef = entropy_coef
self.use_clipped_value_loss = use_clipped_value_loss
self.clip_decay = clip_decay if clip_decay is not None else (lambda x: 1.0)
self.entropy_method_name = entropy_method_name
self.show_ratios = show_ratios
if normalize_advantage:
self.adv_key = "norm_adv_targ"
else:
self.adv_key = "adv_targ"
def loss_per_step(
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
) -> Tuple[
Dict[str, Tuple[torch.Tensor, Optional[float]]], Dict[str, torch.Tensor]
]: # TODO tuple output
actions = cast(torch.LongTensor, batch["actions"])
values = actor_critic_output.values
action_log_probs = actor_critic_output.distributions.log_prob(actions)
dist_entropy: torch.FloatTensor = getattr(
actor_critic_output.distributions, self.entropy_method_name
)()
def add_trailing_dims(t: torch.Tensor):
assert len(t.shape) <= len(batch[self.adv_key].shape)
return t.view(
t.shape + ((1,) * (len(batch[self.adv_key].shape) - len(t.shape)))
)
dist_entropy = add_trailing_dims(dist_entropy)
clip_param = self.clip_param * self.clip_decay(step_count)
ratio = torch.exp(action_log_probs - batch["old_action_log_probs"])
ratio = add_trailing_dims(ratio)
clamped_ratio = torch.clamp(ratio, 1.0 - clip_param, 1.0 + clip_param)
surr1 = ratio * batch[self.adv_key]
surr2 = clamped_ratio * batch[self.adv_key]
use_clamped = surr2 < surr1
action_loss = -torch.where(cast(torch.Tensor, use_clamped), surr2, surr1)
if self.use_clipped_value_loss:
value_pred_clipped = batch["values"] + (values - batch["values"]).clamp(
-clip_param, clip_param
)
value_losses = (values - batch["returns"]).pow(2)
value_losses_clipped = (value_pred_clipped - batch["returns"]).pow(2)
value_loss = 0.5 * torch.max(value_losses, value_losses_clipped)
else:
value_loss = 0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow(
2
)
# noinspection PyUnresolvedReferences
return (
{
"value": (value_loss, self.value_loss_coef),
"action": (action_loss, None),
"entropy": (dist_entropy.mul_(-1.0), self.entropy_coef), # type: ignore
},
(
{
"ratio": ratio,
"ratio_clamped": clamped_ratio,
"ratio_used": torch.where(
cast(torch.Tensor, use_clamped), clamped_ratio, ratio
),
}
if self.show_ratios
else {}
),
)
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
*args,
**kwargs
):
losses_per_step, ratio_info = self.loss_per_step(
step_count=step_count,
batch=batch,
actor_critic_output=actor_critic_output,
)
losses = {
key: (loss.mean(), weight)
for (key, (loss, weight)) in losses_per_step.items()
}
total_loss = sum(
loss * weight if weight is not None else loss
for loss, weight in losses.values()
)
result = (
total_loss,
{
"ppo_total": cast(torch.Tensor, total_loss).item(),
**{key: loss.item() for key, (loss, _) in losses.items()},
},
{key: float(value.mean().item()) for key, value in ratio_info.items()},
)
return result if self.show_ratios else result[:2]
class PPOValue(AbstractActorCriticLoss):
"""Implementation of the Proximal Policy Optimization loss.
# Attributes
clip_param : The clipping parameter to use.
use_clipped_value_loss : Whether or not to also clip the value loss.
"""
def __init__(
self,
clip_param: float,
use_clipped_value_loss=True,
clip_decay: Optional[Callable[[int], float]] = None,
*args,
**kwargs
):
"""Initializer.
See the class documentation for parameter definitions.
"""
super().__init__(*args, **kwargs)
self.clip_param = clip_param
self.use_clipped_value_loss = use_clipped_value_loss
self.clip_decay = clip_decay if clip_decay is not None else (lambda x: 1.0)
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
*args,
**kwargs
):
values = actor_critic_output.values
clip_param = self.clip_param * self.clip_decay(step_count)
if self.use_clipped_value_loss:
value_pred_clipped = batch["values"] + (values - batch["values"]).clamp(
-clip_param, clip_param
)
value_losses = (values - batch["returns"]).pow(2)
value_losses_clipped = (value_pred_clipped - batch["returns"]).pow(2)
value_loss = 0.5 * torch.max(value_losses, value_losses_clipped).mean()
else:
value_loss = (
0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow(2).mean()
)
return (
value_loss,
{
"value": value_loss.item(),
},
)
PPOConfig = dict(clip_param=0.1, value_loss_coef=0.5, entropy_coef=0.01)
================================================
FILE: allenact/algorithms/onpolicy_sync/misc.py
================================================
from enum import Enum
from typing import Dict, Any, Optional
import attr
class TrackingInfoType(Enum):
LOSS = "loss"
TEACHER_FORCING = "teacher_forcing"
UPDATE_INFO = "update_info"
@attr.s(kw_only=True)
class TrackingInfo:
type: TrackingInfoType = attr.ib()
info: Dict[str, Any] = attr.ib()
n: int = attr.ib()
storage_uuid: Optional[str] = attr.ib()
stage_component_uuid: Optional[str] = attr.ib()
================================================
FILE: allenact/algorithms/onpolicy_sync/policy.py
================================================
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import abc
from collections import OrderedDict
from typing import TypeVar, Generic, Tuple, Optional, Union, Dict, List, Any
import gym
import torch
from gym.spaces.dict import Dict as SpaceDict
import torch.nn as nn
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput, Memory
DistributionType = TypeVar("DistributionType")
MemoryDimType = Tuple[str, Optional[int]]
MemoryShapeType = Tuple[MemoryDimType, ...]
MemorySpecType = Tuple[MemoryShapeType, torch.dtype]
FullMemorySpecType = Dict[str, MemorySpecType]
ObservationType = Dict[str, Union[torch.Tensor, Dict[str, Any]]]
ActionType = Union[torch.Tensor, OrderedDict, Tuple, int]
class ActorCriticModel(Generic[DistributionType], nn.Module):
"""Abstract class defining a deep (recurrent) actor critic agent.
When defining a new agent, you should subclass this class and implement the abstract methods.
# Attributes
action_space : The space of actions available to the agent. This is of type `gym.spaces.Space`.
observation_space: The observation space expected by the agent. This is of type `gym.spaces.dict`.
"""
def __init__(self, action_space: gym.Space, observation_space: SpaceDict):
"""Initializer.
# Parameters
action_space : The space of actions available to the agent.
observation_space: The observation space expected by the agent.
"""
super().__init__()
self.action_space = action_space
self.observation_space = observation_space
self.memory_spec: Optional[List[Optional[FullMemorySpecType]]] = None
@property
def recurrent_memory_specification(self) -> Optional[FullMemorySpecType]:
"""The memory specification for the `ActorCriticModel`. See docs for
`_recurrent_memory_shape`
# Returns
The memory specification from `_recurrent_memory_shape`.
"""
if self.memory_spec is None:
self.memory_spec = [self._recurrent_memory_specification()]
spec = self.memory_spec[0]
if spec is None:
return None
for key in spec:
dims, _ = spec[key]
dim_names = [d[0] for d in dims]
assert (
"step" not in dim_names
), "`step` is automatically added and cannot be reused"
assert "sampler" in dim_names, "`sampler` dim must be defined"
return self.memory_spec[0]
@abc.abstractmethod
def _recurrent_memory_specification(self) -> Optional[FullMemorySpecType]:
"""Implementation of memory specification for the `ActorCriticModel`.
# Returns
If None, it indicates the model is memory-less.
Otherwise, it is a one-level dictionary (a map) with string keys (memory type identification) and
tuple values (memory type specification). Each specification tuple contains:
1. Memory type named shape, e.g.
`(("layer", 1), ("sampler", None), ("agent", 2), ("hidden", 32))`
for a two-agent GRU memory, where
the `sampler` dimension placeholder *always* precedes the optional `agent` dimension;
the optional `agent` dimension has the number of agents in the model and is *always* the one after
`sampler` if present;
and `layer` and `hidden` correspond to the standard RNN hidden state parametrization.
2. The data type, e.g. `torch.float32`.
The `sampler` dimension placeholder is mandatory for all memories.
For a single-agent ActorCritic model it is often more convenient to skip the agent dimension, e.g.
`(("layer", 1), ("sampler", None), ("hidden", 32))` for a GRU memory.
"""
raise NotImplementedError()
@abc.abstractmethod
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: ActionType,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
"""Transforms input observations (& previous hidden state) into action
probabilities and the state value.
# Parameters
observations : Multi-level map from key strings to tensors of shape [steps, samplers, (agents,) ...] with the
current observations.
memory : `Memory` object with recurrent memory. The shape of each tensor is determined by the corresponding
entry in `_recurrent_memory_specification`.
prev_actions : ActionType with tensors of shape [steps, samplers, ...] with the previous actions.
masks : tensor of shape [steps, samplers, agents, 1] with zeros indicating steps where a new episode/task
starts.
# Returns
A tuple whose first element is an object of class ActorCriticOutput which stores
the agents' probability distribution over possible actions (shape [steps, samplers, ...]),
the agents' value for the state (shape [steps, samplers, ..., 1]), and any extra information needed for
loss computations. The second element is an optional `Memory`, which is only used in models with recurrent
memory.
"""
raise NotImplementedError()
class LinearActorCriticHead(nn.Module):
def __init__(self, input_size: int, num_actions: int):
super().__init__()
self.input_size = input_size
self.num_actions = num_actions
self.actor_and_critic = nn.Linear(input_size, 1 + num_actions)
nn.init.orthogonal_(self.actor_and_critic.weight)
nn.init.constant_(self.actor_and_critic.bias, 0)
def forward(self, x) -> Tuple[CategoricalDistr, torch.Tensor]:
out = self.actor_and_critic(x)
logits = out[..., :-1]
values = out[..., -1:]
# noinspection PyArgumentList
return (
# logits are [step, sampler, ...]
CategoricalDistr(logits=logits),
# values are [step, sampler, flattened]
values.view(*values.shape[:2], -1),
)
class LinearCriticHead(nn.Module):
def __init__(self, input_size: int):
super().__init__()
self.fc = nn.Linear(input_size, 1)
nn.init.orthogonal_(self.fc.weight)
nn.init.constant_(self.fc.bias, 0)
def forward(self, x):
return self.fc(x).view(*x.shape[:2], -1) # [steps, samplers, flattened]
class LinearActorHead(nn.Module):
def __init__(self, num_inputs: int, num_outputs: int):
super().__init__()
self.linear = nn.Linear(num_inputs, num_outputs)
nn.init.orthogonal_(self.linear.weight, gain=0.01)
nn.init.constant_(self.linear.bias, 0)
def forward(self, x: torch.FloatTensor): # type: ignore
x = self.linear(x) # type:ignore
# noinspection PyArgumentList
return CategoricalDistr(logits=x) # logits are [step, sampler, ...]
================================================
FILE: allenact/algorithms/onpolicy_sync/runner.py
================================================
"""Defines the reinforcement learning `OnPolicyRunner`."""
import copy
import enum
import glob
import importlib.util
import inspect
import itertools
import json
import math
import os
import pathlib
import queue
import random
import signal
import subprocess
import sys
import time
import traceback
from collections import defaultdict
from multiprocessing.context import BaseContext
from multiprocessing.process import BaseProcess
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, Set
import filelock
import numpy as np
import torch
import torch.multiprocessing as mp
from setproctitle import setproctitle as ptitle
from torch.distributions.utils import lazy_property
from allenact.algorithms.onpolicy_sync.engine import (
TEST_MODE_STR,
TRAIN_MODE_STR,
VALID_MODE_STR,
OnPolicyInference,
OnPolicyRLEngine,
OnPolicyTrainer,
)
from allenact.base_abstractions.callbacks import Callback
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.sensor import Sensor
from allenact.utils.experiment_utils import (
LoggingPackage,
ScalarMeanTracker,
set_deterministic_cudnn,
set_seed,
download_checkpoint_from_wandb,
)
from allenact.utils.misc_utils import (
NumpyJSONEncoder,
all_equal,
get_git_diff_of_project,
)
from allenact.utils.model_utils import md5_hash_of_state_dict
from allenact.utils.system import find_free_port, get_logger
from allenact.utils.tensor_utils import SummaryWriter
from allenact.utils.viz_utils import VizSuite
CONFIG_KWARGS_STR = "__CONFIG_KWARGS__"
class SaveDirFormat(enum.Enum):
"""Directory formats that can be used when saving tensorboard logs,
checkpoints, etc.
during training/evaluation.
FLAT: the first-level directories are logs, checkpoints, metrics, etc; the second-level are time strings of each experiment
NESTED: the opposite to FLAT.
"""
FLAT = "FLAT"
NESTED = "NESTED"
# Has results queue (aggregated per trainer), checkpoints queue and mp context
# Instantiates train, validate, and test workers
# Logging
# Saves configs, makes folder for trainer models
class OnPolicyRunner(object):
def __init__(
self,
config: ExperimentConfig,
output_dir: str,
loaded_config_src_files: Optional[Dict[str, str]],
seed: Optional[int] = None,
mode: str = "train",
deterministic_cudnn: bool = False,
deterministic_agents: bool = False,
mp_ctx: Optional[BaseContext] = None,
multiprocessing_start_method: str = "default",
extra_tag: str = "",
disable_tensorboard: bool = False,
disable_config_saving: bool = False,
distributed_ip_and_port: str = "127.0.0.1:0",
distributed_preemption_threshold: float = 0.7,
machine_id: int = 0,
save_dir_fmt: SaveDirFormat = SaveDirFormat.FLAT,
callbacks_paths: Optional[str] = None,
):
self.config = config
self.output_dir = output_dir
self.loaded_config_src_files = loaded_config_src_files
self.seed = seed if seed is not None else random.randint(0, 2**31 - 1)
self.deterministic_cudnn = deterministic_cudnn
self.distributed_preemption_threshold = distributed_preemption_threshold
if multiprocessing_start_method == "default":
if torch.cuda.is_available():
multiprocessing_start_method = "forkserver"
else:
# Spawn seems to play nicer with cpus and debugging
multiprocessing_start_method = "spawn"
self.mp_ctx = self.init_context(mp_ctx, multiprocessing_start_method)
self.extra_tag = extra_tag
self.mode = mode.lower().strip()
self.visualizer: Optional[VizSuite] = None
self.deterministic_agents = deterministic_agents
self.disable_tensorboard = disable_tensorboard
self.disable_config_saving = disable_config_saving
assert self.mode in [
TRAIN_MODE_STR,
TEST_MODE_STR,
], "Only 'train' and 'test' modes supported in runner"
if self.deterministic_cudnn:
set_deterministic_cudnn()
set_seed(self.seed)
self.queues: Optional[Dict[str, mp.Queue]] = None
self.processes: Dict[str, List[Union[BaseProcess, mp.Process]]] = defaultdict(
list
)
self.current_checkpoint = None
self._local_start_time_str: Optional[str] = None
self._is_closed: bool = False
self._collect_valid_results: bool = False
self.distributed_ip_and_port = distributed_ip_and_port
self.machine_id = machine_id
self.save_dir_fmt = save_dir_fmt
self.callbacks_paths = callbacks_paths
@lazy_property
def callbacks(self):
return self.setup_callback_classes(self.callbacks_paths)
@property
def local_start_time_str(self) -> str:
if self._local_start_time_str is None:
raise RuntimeError(
"Local start time string does not exist as neither `start_train()` or `start_test()`"
" has been called on this runner."
)
return self._local_start_time_str
@property
def running_validation(self):
pipeline = self.config.training_pipeline()
return (
sum(
MachineParams.instance_from(
self.config.machine_params(VALID_MODE_STR)
).nprocesses
)
> 0
or (
pipeline.rollout_storage_uuid is None
and len(pipeline.valid_pipeline_stage.loss_names) > 0
)
) and self.machine_id == 0
@staticmethod
def init_context(
mp_ctx: Optional[BaseContext] = None,
multiprocessing_start_method: str = "forkserver",
valid_start_methods: Tuple[str, ...] = ("forkserver", "spawn", "fork"),
):
if mp_ctx is None:
assert multiprocessing_start_method in valid_start_methods, (
f"multiprocessing_start_method must be one of {valid_start_methods}."
f" Got '{multiprocessing_start_method}'"
)
mp_ctx = mp.get_context(multiprocessing_start_method)
elif multiprocessing_start_method != mp_ctx.get_start_method():
get_logger().warning(
f"ignoring multiprocessing_start_method '{multiprocessing_start_method}'"
f" and using given context with '{mp_ctx.get_start_method()}'"
)
return mp_ctx
def setup_callback_classes(self, callbacks: Optional[str]) -> Set[Callback]:
"""Get a list of Callback classes from a comma-separated list of files,
paths, and/or functions.
After separating the `callbacks` into a list of strings, each string should either
be a:
1. Name of a function defined on the experiment config that, when called, returns an
object with of type `Callback`.
2. Path to a python file containing a single class that inherits from `Callback`.
3. Module path (e.g. `path.to.module`) where this module contains a single class that
inherits from `Callback`.
"""
if callbacks == "" or callbacks is None:
return set()
setup_dict = dict(
name=f"{self.experiment_name}/{self.local_start_time_str}",
config=self.config,
mode=self.mode,
)
callback_objects = set()
files = callbacks.split(",")
for filename in files:
# Check if the `filename` is a function on the config
if not any(k in filename for k in [".", "/"]):
callback_func = getattr(self.config, filename, None)
if callback_func is not None:
callback = callback_func()
callback.setup(**setup_dict)
callback_objects.add(callback)
continue
# Otherwise find the Callback class in the file or module
module_path = filename.replace("/", ".")
if module_path.endswith(".py"):
module_path = module_path[:-3]
module = importlib.import_module(module_path)
classes = inspect.getmembers(module, inspect.isclass)
callback_classes = [
mod_class[1]
for mod_class in classes
if issubclass(mod_class[1], Callback)
]
assert callback_classes == 1, (
f"Expected a single callback class in {filename}, but found {len(callback_classes)}."
f" These classes were found: {callback_classes}."
)
for mod_class in callback_classes:
# NOTE: initialize the callback class
callback = mod_class[1]()
callback.setup(**setup_dict)
callback_objects.add(callback)
return callback_objects
def _acquire_unique_local_start_time_string(self) -> str:
"""Creates a (unique) local start time string for this experiment.
Ensures through file locks that the local start time string
produced is unique. This implies that, if one has many
experiments starting in parallel, at most one will be started
every second (as the local start time string only records the
time up to the current second).
"""
os.makedirs(self.output_dir, exist_ok=True)
start_time_string_lock_path = os.path.abspath(
os.path.join(self.output_dir, ".allenact_start_time_string.lock")
)
try:
with filelock.FileLock(start_time_string_lock_path, timeout=60):
last_start_time_string_path = os.path.join(
self.output_dir, ".allenact_last_start_time_string"
)
pathlib.Path(last_start_time_string_path).touch()
with open(last_start_time_string_path, "r") as f:
last_start_time_string_list = f.readlines()
while True:
candidate_str = time.strftime(
"%Y-%m-%d_%H-%M-%S", time.localtime(time.time())
)
if (
len(last_start_time_string_list) == 0
or last_start_time_string_list[0].strip() != candidate_str
):
break
time.sleep(0.2)
with open(last_start_time_string_path, "w") as f:
f.write(candidate_str)
except filelock.Timeout as e:
get_logger().exception(
f"Could not acquire the lock for {start_time_string_lock_path} for 60 seconds,"
" this suggests an unexpected deadlock. Please close all AllenAct training processes,"
" delete this lockfile, and try again."
)
raise e
assert candidate_str is not None
return candidate_str
def worker_devices(self, mode: str):
machine_params: MachineParams = MachineParams.instance_from(
self.config.machine_params(mode)
)
devices = machine_params.devices
assert all_equal(devices) or all(
d.index >= 0 for d in devices
), f"Cannot have a mix of CPU and GPU devices (`devices == {devices}`)"
get_logger().info(f"Using {len(devices)} {mode} workers on devices {devices}")
return devices
def local_worker_ids(self, mode: str):
machine_params: MachineParams = MachineParams.instance_from(
self.config.machine_params(mode, machine_id=self.machine_id)
)
ids = machine_params.local_worker_ids
get_logger().info(
f"Using local worker ids {ids} (total {len(ids)} workers in machine {self.machine_id})"
)
return ids
def init_visualizer(self, mode: str):
if not self.disable_tensorboard:
# Note: Avoid instantiating anything in machine_params (use Builder if needed)
machine_params = MachineParams.instance_from(
self.config.machine_params(mode)
)
self.visualizer = machine_params.visualizer
@staticmethod
def init_process(mode: str, id: int, to_close_on_termination: OnPolicyRLEngine):
ptitle(f"{mode}-{id}")
def create_handler(termination_type: str):
def handler(_signo, _frame):
prefix = f"{termination_type} signal sent to worker {mode}-{id}."
if to_close_on_termination.is_closed:
get_logger().info(
f"{prefix} Worker {mode}-{id} is already closed, exiting."
)
sys.exit(0)
elif not to_close_on_termination.is_closing:
get_logger().info(
f"{prefix} Forcing worker {mode}-{id} to close and exiting."
)
# noinspection PyBroadException
try:
to_close_on_termination.close(True)
except Exception:
get_logger().error(
f"Error occurred when closing the RL engine used by work {mode}-{id}."
f" We cannot recover from this and will simply exit. The exception:\n"
f"{traceback.format_exc()}"
)
sys.exit(1)
sys.exit(0)
else:
get_logger().info(
f"{prefix} Worker {mode}-{id} is already closing, ignoring this signal."
)
return handler
signal.signal(signal.SIGTERM, create_handler("Termination"))
signal.signal(signal.SIGINT, create_handler("Interrupt"))
@staticmethod
def init_worker(engine_class, args, kwargs):
mode = kwargs["mode"]
id = kwargs["worker_id"]
worker = None
try:
worker = engine_class(*args, **kwargs)
except Exception:
get_logger().error(f"Encountered Exception. Terminating {mode} worker {id}")
get_logger().exception(traceback.format_exc())
kwargs["results_queue"].put((f"{mode}_stopped", 1 + id))
finally:
return worker
@lazy_property
def _get_callback_sensors(self) -> List[Sensor]:
callback_sensors: List[Sensor] = []
for c in self.callbacks:
sensors = c.callback_sensors()
if sensors is not None:
callback_sensors.extend(sensors)
return callback_sensors
@staticmethod
def train_loop(
id: int = 0,
checkpoint: Optional[str] = None,
restart_pipeline: bool = False,
valid_on_initial_weights: bool = False,
*engine_args,
**engine_kwargs,
):
engine_kwargs["mode"] = TRAIN_MODE_STR
engine_kwargs["worker_id"] = id
engine_kwargs_for_print = {
k: (v if k != "initial_model_state_dict" else "[SUPPRESSED]")
for k, v in engine_kwargs.items()
}
get_logger().info(f"train {id} args {engine_kwargs_for_print}")
trainer: OnPolicyTrainer = OnPolicyRunner.init_worker(
engine_class=OnPolicyTrainer, args=engine_args, kwargs=engine_kwargs
)
if trainer is not None:
OnPolicyRunner.init_process("Train", id, to_close_on_termination=trainer)
trainer.train(
checkpoint_file_name=checkpoint,
restart_pipeline=restart_pipeline,
valid_on_initial_weights=valid_on_initial_weights,
)
@staticmethod
def valid_loop(id: int = 0, *engine_args, **engine_kwargs):
engine_kwargs["mode"] = VALID_MODE_STR
engine_kwargs["worker_id"] = id
get_logger().info(f"valid {id} args {engine_kwargs}")
valid = OnPolicyRunner.init_worker(
engine_class=OnPolicyInference, args=engine_args, kwargs=engine_kwargs
)
if valid is not None:
OnPolicyRunner.init_process("Valid", id, to_close_on_termination=valid)
valid.process_checkpoints() # gets checkpoints via queue
@staticmethod
def test_loop(id: int = 0, *engine_args, **engine_kwargs):
engine_kwargs["mode"] = TEST_MODE_STR
engine_kwargs["worker_id"] = id
get_logger().info(f"test {id} args {engine_kwargs}")
test = OnPolicyRunner.init_worker(OnPolicyInference, engine_args, engine_kwargs)
if test is not None:
OnPolicyRunner.init_process("Test", id, to_close_on_termination=test)
test.process_checkpoints() # gets checkpoints via queue
def _initialize_start_train_or_start_test(self):
self._is_closed = False
if self.queues is not None:
for k, q in self.queues.items():
try:
out = q.get(timeout=1)
raise RuntimeError(
f"{k} queue was not empty before starting new training/testing (contained {out})."
f" This should not happen, please report how you obtained this error"
f" by creating an issue at https://github.com/allenai/allenact/issues."
)
except queue.Empty:
pass
self.queues = {
"results": self.mp_ctx.Queue(),
"checkpoints": self.mp_ctx.Queue(),
}
self._local_start_time_str = self._acquire_unique_local_start_time_string()
def get_port(self):
passed_port = int(self.distributed_ip_and_port.split(":")[1])
if passed_port == 0:
assert (
self.machine_id == 0
), "Only runner with `machine_id` == 0 can search for a free port."
distributed_port = find_free_port(
self.distributed_ip_and_port.split(":")[0]
)
else:
distributed_port = passed_port
get_logger().info(
f"Engines on machine_id == {self.machine_id} using port {distributed_port} and seed {self.seed}"
)
return distributed_port
def start_train(
self,
checkpoint: Optional[str] = None,
restart_pipeline: bool = False,
max_sampler_processes_per_worker: Optional[int] = None,
save_ckpt_after_every_pipeline_stage: bool = True,
collect_valid_results: bool = False,
valid_on_initial_weights: bool = False,
try_restart_after_task_error: bool = False,
save_ckpt_at_every_host: bool = False,
):
self._initialize_start_train_or_start_test()
self._collect_valid_results = collect_valid_results
if not self.disable_config_saving:
self.save_project_state()
devices = self.worker_devices(TRAIN_MODE_STR)
num_workers = len(devices)
# Be extra careful to ensure that all models start
# with the same initializations.
set_seed(self.seed)
initial_model_state_dict = self.config.create_model(
sensor_preprocessor_graph=MachineParams.instance_from(
self.config.machine_params(self.mode)
).sensor_preprocessor_graph
).state_dict()
distributed_port = 0 if num_workers == 1 else self.get_port()
if (
num_workers > 1
and "NCCL_ASYNC_ERROR_HANDLING" not in os.environ
and "NCCL_BLOCKING_WAIT" not in os.environ
):
# This ensures the NCCL distributed backend will throw errors
# if we timeout at a call to `barrier()`
os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "1"
worker_ids = self.local_worker_ids(TRAIN_MODE_STR)
if checkpoint is not None:
if checkpoint[:8] == "wandb://":
ckpt_dir = "/tmp/wandb_ckpts"
os.makedirs(ckpt_dir, exist_ok=True)
checkpoint = download_checkpoint_from_wandb(
checkpoint, ckpt_dir, only_allow_one_ckpt=True
)
model_hash = None
for trainer_id in worker_ids:
training_kwargs = dict(
id=trainer_id,
checkpoint=checkpoint,
restart_pipeline=restart_pipeline,
experiment_name=self.experiment_name,
config=self.config,
callback_sensors=self._get_callback_sensors,
results_queue=self.queues["results"],
checkpoints_queue=(
self.queues["checkpoints"] if self.running_validation else None
),
checkpoints_dir=self.checkpoint_dir(),
seed=self.seed,
deterministic_cudnn=self.deterministic_cudnn,
mp_ctx=self.mp_ctx,
num_workers=num_workers,
device=devices[trainer_id],
distributed_ip=self.distributed_ip_and_port.split(":")[0],
distributed_port=distributed_port,
max_sampler_processes_per_worker=max_sampler_processes_per_worker,
save_ckpt_after_every_pipeline_stage=save_ckpt_after_every_pipeline_stage,
initial_model_state_dict=(
initial_model_state_dict if model_hash is None else model_hash
),
first_local_worker_id=worker_ids[0],
distributed_preemption_threshold=self.distributed_preemption_threshold,
valid_on_initial_weights=valid_on_initial_weights,
try_restart_after_task_error=try_restart_after_task_error,
save_ckpt_at_every_host=save_ckpt_at_every_host,
)
train: BaseProcess = self.mp_ctx.Process(
target=self.train_loop,
kwargs=training_kwargs,
)
try:
train.start()
except (ValueError, OSError, ConnectionRefusedError, EOFError) as e:
# If the `initial_model_state_dict` is too large we sometimes
# run into errors passing it with multiprocessing. In such cases
# we instead hash the state_dict and confirm, in each engine worker, that
# this hash equals the model the engine worker instantiates.
if (
(isinstance(e, ValueError) and e.args[0] == "too many fds")
or (isinstance(e, OSError) and e.errno == 22)
or (isinstance(e, ConnectionRefusedError) and e.errno == 111)
or isinstance(e, EOFError)
):
model_hash = md5_hash_of_state_dict(initial_model_state_dict)
training_kwargs["initial_model_state_dict"] = model_hash
train = self.mp_ctx.Process(
target=self.train_loop,
kwargs=training_kwargs,
)
train.start()
else:
raise e
self.processes[TRAIN_MODE_STR].append(train)
get_logger().info(
f"Started {len(self.processes[TRAIN_MODE_STR])} train processes"
)
# Validation
if self.running_validation:
device = self.worker_devices(VALID_MODE_STR)[0]
self.init_visualizer(VALID_MODE_STR)
valid: BaseProcess = self.mp_ctx.Process(
target=self.valid_loop,
args=(0,),
kwargs=dict(
config=self.config,
callback_sensors=self._get_callback_sensors,
results_queue=self.queues["results"],
checkpoints_queue=self.queues["checkpoints"],
seed=12345, # TODO allow same order for randomly sampled tasks? Is this any useful anyway?
deterministic_cudnn=self.deterministic_cudnn,
deterministic_agents=self.deterministic_agents,
mp_ctx=self.mp_ctx,
device=device,
max_sampler_processes_per_worker=max_sampler_processes_per_worker,
),
)
valid.start()
self.processes[VALID_MODE_STR].append(valid)
get_logger().info(
f"Started {len(self.processes[VALID_MODE_STR])} valid processes"
)
else:
get_logger().info(
"No processes allocated to validation, no validation will be run."
)
metrics_file_template: Optional[str] = None
if self._collect_valid_results:
metrics_dir = self.metric_path(self.local_start_time_str)
os.makedirs(metrics_dir, exist_ok=True)
suffix = f"__valid_{self.local_start_time_str}"
metrics_file_template = os.path.join(
metrics_dir, "metrics" + suffix + "{:012d}.json"
) # template for training steps
get_logger().info(
f"Saving valid metrics with template {metrics_file_template}"
)
# Check output file can be written
with open(metrics_file_template.format(0), "w") as f:
json.dump([], f, indent=4, sort_keys=True, cls=NumpyJSONEncoder)
valid_results = self.log_and_close(
start_time_str=self.local_start_time_str,
nworkers=len(worker_ids), # TODO num_workers once we forward metrics,
metrics_file=metrics_file_template,
)
if not self._collect_valid_results:
return self.local_start_time_str
else:
return self.local_start_time_str, valid_results
def start_test(
self,
checkpoint_path_dir_or_pattern: str,
infer_output_dir: bool = False,
approx_ckpt_step_interval: Optional[Union[float, int]] = None,
max_sampler_processes_per_worker: Optional[int] = None,
inference_expert: bool = False,
) -> List[Dict]:
# Tester always runs on a single machine
assert (
self.machine_id == 0
), f"Received `machine_id={self.machine_id} for test. Only one machine supported."
assert isinstance(
checkpoint_path_dir_or_pattern, str
), "Must provide a --checkpoint path or pattern to test on."
self.extra_tag += (
"__" * (len(self.extra_tag) > 0) + "enforced_test_expert"
) * inference_expert
self._initialize_start_train_or_start_test()
devices = self.worker_devices(TEST_MODE_STR)
self.init_visualizer(TEST_MODE_STR)
num_testers = len(devices)
distributed_port = 0
if num_testers > 1:
distributed_port = find_free_port()
# Tester always runs on a single machine
for tester_it in range(num_testers):
test: BaseProcess = self.mp_ctx.Process(
target=self.test_loop,
args=(tester_it,),
kwargs=dict(
config=self.config,
callback_sensors=self._get_callback_sensors,
results_queue=self.queues["results"],
checkpoints_queue=self.queues["checkpoints"],
seed=12345, # TODO allow same order for randomly sampled tasks? Is this any useful anyway?
deterministic_cudnn=self.deterministic_cudnn,
deterministic_agents=self.deterministic_agents,
mp_ctx=self.mp_ctx,
num_workers=num_testers,
device=devices[tester_it],
max_sampler_processes_per_worker=max_sampler_processes_per_worker,
distributed_port=distributed_port,
enforce_expert=inference_expert,
),
)
test.start()
self.processes[TEST_MODE_STR].append(test)
get_logger().info(
f"Started {len(self.processes[TEST_MODE_STR])} test processes"
)
checkpoint_paths = self.get_checkpoint_files(
checkpoint_path_dir_or_pattern=checkpoint_path_dir_or_pattern,
approx_ckpt_step_interval=approx_ckpt_step_interval,
)
steps = [self.step_from_checkpoint(cp) for cp in checkpoint_paths]
get_logger().info(f"Running test on {len(steps)} steps {steps}")
for checkpoint_path in checkpoint_paths:
# Make all testers work on each checkpoint
for tester_it in range(num_testers):
self.queues["checkpoints"].put(("eval", checkpoint_path))
# Signal all testers to terminate cleanly
for _ in range(num_testers):
self.queues["checkpoints"].put(("quit", None))
if self.save_dir_fmt == SaveDirFormat.NESTED:
if infer_output_dir: # NOTE: we change output_dir here
self.output_dir = self.checkpoint_log_folder_str(checkpoint_paths[0])
suffix = ""
elif self.save_dir_fmt == SaveDirFormat.FLAT:
suffix = f"__test_{self.local_start_time_str}"
else:
raise NotImplementedError
metrics_dir = self.metric_path(self.local_start_time_str)
os.makedirs(metrics_dir, exist_ok=True)
metrics_file_path = os.path.join(metrics_dir, "metrics" + suffix + ".json")
get_logger().info(f"Saving test metrics in {metrics_file_path}")
# Check output file can be written
with open(metrics_file_path, "w") as f:
json.dump([], f, indent=4, sort_keys=True, cls=NumpyJSONEncoder)
return self.log_and_close(
start_time_str=self.checkpoint_start_time_str(checkpoint_paths[0]),
nworkers=num_testers,
test_steps=steps,
metrics_file=metrics_file_path,
)
@staticmethod
def checkpoint_start_time_str(checkpoint_file_name):
parts = checkpoint_file_name.split(os.path.sep)
assert len(parts) > 1, f"{checkpoint_file_name} is not a valid checkpoint path"
start_time_str = parts[-2]
get_logger().info(f"Using checkpoint start time {start_time_str}")
return start_time_str
@staticmethod
def checkpoint_log_folder_str(checkpoint_file_name):
parts = checkpoint_file_name.split(os.path.sep)
assert len(parts) > 1, f"{checkpoint_file_name} is not a valid checkpoint path"
log_folder_str = os.path.sep.join(parts[:-2]) # remove checkpoints/*.pt
get_logger().info(f"Using log folder {log_folder_str}")
return log_folder_str
@property
def experiment_name(self):
if len(self.extra_tag) > 0:
return f"{self.config.tag()}_{self.extra_tag}"
return self.config.tag()
def checkpoint_dir(
self, start_time_str: Optional[str] = None, create_if_none: bool = True
):
path_parts = [
(
self.config.tag()
if self.extra_tag == ""
else os.path.join(self.config.tag(), self.extra_tag)
),
start_time_str or self.local_start_time_str,
]
if self.save_dir_fmt == SaveDirFormat.NESTED:
folder = os.path.join(
self.output_dir,
*path_parts,
"checkpoints",
)
elif self.save_dir_fmt == SaveDirFormat.FLAT:
folder = os.path.join(
self.output_dir,
"checkpoints",
*path_parts,
)
else:
raise NotImplementedError
if create_if_none:
os.makedirs(folder, exist_ok=True)
return folder
def log_writer_path(self, start_time_str: str) -> str:
if self.save_dir_fmt == SaveDirFormat.NESTED:
if self.mode == TEST_MODE_STR:
return os.path.join(
self.output_dir,
"test",
self.config.tag(),
self.local_start_time_str,
)
path = os.path.join(
self.output_dir,
(
self.config.tag()
if self.extra_tag == ""
else os.path.join(self.config.tag(), self.extra_tag)
),
start_time_str,
"train_tb",
)
return path
elif self.save_dir_fmt == SaveDirFormat.FLAT:
path = os.path.join(
self.output_dir,
"tb",
(
self.config.tag()
if self.extra_tag == ""
else os.path.join(self.config.tag(), self.extra_tag)
),
start_time_str,
)
if self.mode == TEST_MODE_STR:
path = os.path.join(path, "test", self.local_start_time_str)
return path
else:
raise NotImplementedError
def metric_path(self, start_time_str: str) -> str:
if self.save_dir_fmt == SaveDirFormat.NESTED:
return os.path.join(
self.output_dir,
"test",
self.config.tag(),
start_time_str,
)
elif self.save_dir_fmt == SaveDirFormat.FLAT:
return os.path.join(
self.output_dir,
"metrics",
(
self.config.tag()
if self.extra_tag == ""
else os.path.join(self.config.tag(), self.extra_tag)
),
start_time_str,
)
else:
raise NotImplementedError
def save_project_state(self):
path_parts = [
(
self.config.tag()
if self.extra_tag == ""
else os.path.join(self.config.tag(), self.extra_tag)
),
self.local_start_time_str,
]
if self.save_dir_fmt == SaveDirFormat.NESTED:
base_dir = os.path.join(
self.output_dir,
*path_parts,
"used_configs",
)
elif self.save_dir_fmt == SaveDirFormat.FLAT:
base_dir = os.path.join(
self.output_dir,
"used_configs",
*path_parts,
)
else:
raise NotImplementedError
os.makedirs(base_dir, exist_ok=True)
# Saving current git diff
try:
sha, diff_str = get_git_diff_of_project()
with open(os.path.join(base_dir, f"{sha}.patch"), "w") as f:
f.write(diff_str)
get_logger().info(f"Git diff saved to {base_dir}")
except subprocess.CalledProcessError:
get_logger().warning(
"Failed to get a git diff of the current project."
f" Is it possible that {os.getcwd()} is not under version control?"
)
# Saving configs
if self.loaded_config_src_files is not None:
for src_path in self.loaded_config_src_files:
if src_path == CONFIG_KWARGS_STR:
# We also save key-word arguments passed to the experiment
# initializer.
save_path = os.path.join(base_dir, "config_kwargs.json")
assert not os.path.exists(
save_path
), f"{save_path} should not already exist."
with open(save_path, "w") as f:
json.dump(json.loads(self.loaded_config_src_files[src_path]), f)
continue
assert os.path.isfile(src_path), f"Config file {src_path} not found"
src_path = os.path.abspath(src_path)
# To prevent overwriting files with the same name, we loop
# here until we find a prefix (if necessary) to prevent
# name collisions.
k = -1
while True:
prefix = "" if k == -1 else f"namecollision{k}__"
k += 1
dst_path = os.path.join(
base_dir,
f"{prefix}{os.path.basename(src_path)}",
)
if not os.path.exists(dst_path):
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
with open(src_path, "r") as f:
file_contents = f.read()
with open(dst_path, "w") as f:
f.write(
f"### THIS FILE ORIGINALLY LOCATED AT '{src_path}'\n\n{file_contents}"
)
break
get_logger().info(f"Config files saved to {base_dir}")
for callback in self.callbacks:
callback.after_save_project_state(base_dir=base_dir)
def _update_keys(
self,
d: Union[Dict[str, Any], str],
tag_if_not_a_loss: str,
mode: str,
stage_component_uuid: Optional[str] = None,
) -> Union[Dict[str, Any], str]:
midfix = "-" if stage_component_uuid is None else f"-{stage_component_uuid}-"
def _convert(key: str):
if key.startswith("losses/"):
return f"{mode}{midfix}{key}"
else:
return f"{mode}{midfix}{tag_if_not_a_loss}/{key}"
if isinstance(d, str):
return _convert(d)
return {_convert(k): v for k, v in d.items()}
def _process_logging_packages(
self,
log_writer: Optional[SummaryWriter],
pkgs: Union[LoggingPackage, List[LoggingPackage]],
last_steps: Optional[int],
last_storage_uuid_to_total_experiences: Optional[Dict[str, int]],
last_time: Optional[float],
all_results: Optional[List[Any]] = None,
):
mode = pkgs[0].mode
assert all(
pkg.mode == mode for pkg in pkgs
), "All logging packages must be the same mode."
assert mode == self.mode or (
mode == VALID_MODE_STR and self.mode == TRAIN_MODE_STR
), (
"Logging package mode must match the logger mode except when training where the logging package may"
"be of mode 'valid'."
)
training = mode == TRAIN_MODE_STR # Are we logging training packages
current_time = time.time()
training_steps = pkgs[0].training_steps
storage_uuid_to_total_experiences = pkgs[0].storage_uuid_to_total_experiences
callback_metric_means = dict()
def update_keys_misc(
key_or_dict: Union[str, Dict[str, Any]],
stage_component_uuid: Optional[str] = None,
):
# Important to use mode and not self.mode here
return self._update_keys(
d=key_or_dict,
tag_if_not_a_loss="misc",
mode=mode,
stage_component_uuid=stage_component_uuid,
)
def update_keys_metric(
key_or_dict: Union[str, Dict[str, Any]],
stage_component_uuid: Optional[str] = None,
):
# Important to use mode and not self.mode here
return self._update_keys(
d=key_or_dict,
tag_if_not_a_loss="metrics",
mode=mode,
stage_component_uuid=stage_component_uuid,
)
if training and log_writer is not None:
log_writer.add_scalar(
tag=update_keys_misc("pipeline_stage"),
scalar_value=pkgs[0].pipeline_stage,
global_step=training_steps,
)
callback_metric_means[update_keys_misc("pipeline_stage")] = pkgs[
0
].pipeline_stage
storage_uuid_to_total_experiences_key = {}
for storage_uuid, val in storage_uuid_to_total_experiences.items():
total_experiences_key = update_keys_misc(
f"{storage_uuid}_total_experiences"
)
storage_uuid_to_total_experiences_key[storage_uuid] = total_experiences_key
if training and log_writer is not None:
log_writer.add_scalar(
tag=total_experiences_key,
scalar_value=val,
global_step=training_steps,
)
callback_metric_means[total_experiences_key] = val
metrics_and_info_tracker = ScalarMeanTracker()
scalar_name_to_total_storage_experience = {}
scalar_name_to_total_experiences_key = {}
storage_uuid_to_stage_component_uuids = defaultdict(lambda: set())
metric_dicts_list, render, checkpoint_file_name = [], {}, []
tasks_callback_data = []
for pkg in pkgs:
metrics_and_info_tracker.add_scalars(
scalars=update_keys_metric(pkg.metrics_tracker.means()),
n=update_keys_metric(pkg.metrics_tracker.counts()),
)
tasks_callback_data.extend(pkg.task_callback_data)
metric_dicts_list.extend(pkg.metric_dicts)
if pkg.viz_data is not None:
render.update(pkg.viz_data)
checkpoint_file_name.append(pkg.checkpoint_file_name)
for (
(stage_component_uuid, storage_uuid),
info_tracker,
) in pkg.info_trackers.items():
if stage_component_uuid is not None:
storage_uuid_to_stage_component_uuids[storage_uuid].add(
stage_component_uuid
)
info_means = update_keys_misc(
info_tracker.means(),
stage_component_uuid,
)
info_counts = update_keys_misc(
info_tracker.counts(),
stage_component_uuid,
)
metrics_and_info_tracker.add_scalars(
scalars=info_means,
n=info_counts,
)
total_exp_for_storage = pkg.storage_uuid_to_total_experiences[
storage_uuid
]
if stage_component_uuid is None:
assert total_exp_for_storage == training_steps
for scalar_name in info_means:
if scalar_name in scalar_name_to_total_storage_experience:
assert (
total_exp_for_storage
== scalar_name_to_total_storage_experience[scalar_name]
), (
f"For metric {scalar_name}: there is disagreement between the training steps parameter"
f" across different workers ({total_exp_for_storage} !="
f" {scalar_name_to_total_storage_experience[scalar_name]}). This suggests an error in "
f" AllenAct, please report this issue at https://github.com/allenai/allenact/issues."
)
else:
scalar_name_to_total_storage_experience[scalar_name] = (
total_exp_for_storage
)
scalar_name_to_total_experiences_key[scalar_name] = (
storage_uuid_to_total_experiences_key[storage_uuid]
)
if any(checkpoint_file_name):
ckpt_to_store = None
for ckpt in checkpoint_file_name:
if ckpt is not None:
ckpt_to_store = ckpt
assert ckpt_to_store is not None
checkpoint_file_name = [ckpt_to_store]
# assert all_equal(
# checkpoint_file_name
# ), f"All {mode} logging packages must have the same checkpoint_file_name."
message = [
f"{mode.upper()}: {training_steps} rollout steps ({pkgs[0].storage_uuid_to_total_experiences})"
]
metrics_and_info_means = metrics_and_info_tracker.means()
callback_metric_means.update(metrics_and_info_means)
for k in sorted(
metrics_and_info_means.keys(),
key=lambda mean_key: (mean_key.count("/"), mean_key),
):
if log_writer is not None:
log_writer.add_scalar(
tag=k,
scalar_value=metrics_and_info_means[k],
global_step=scalar_name_to_total_storage_experience.get(
k, training_steps
),
)
short_key = (
"/".join(k.split("/")[1:])
if k.startswith(f"{mode}-") and "/" in k
else k
)
message.append(f"{short_key} {metrics_and_info_means[k]:.3g}")
if training:
# Log information about FPS and EPS (experiences per second, for non-rollout storage).
# Not needed during testing or validation.
message += [f"elapsed_time {(current_time - last_time):.3g}s"]
if last_steps > 0:
fps = (training_steps - last_steps) / (current_time - last_time)
message += [f"approx_fps {fps:.3g}"]
approx_fps_key = update_keys_misc("approx_fps")
if log_writer is not None:
log_writer.add_scalar(approx_fps_key, fps, training_steps)
callback_metric_means[approx_fps_key] = fps
for (
storage_uuid,
last_total_exp,
) in last_storage_uuid_to_total_experiences.items():
if storage_uuid in storage_uuid_to_total_experiences:
cur_total_exp = storage_uuid_to_total_experiences[storage_uuid]
eps = (cur_total_exp - last_total_exp) / (current_time - last_time)
message += [f"{storage_uuid}/approx_eps {eps:.3g}"]
for stage_component_uuid in storage_uuid_to_stage_component_uuids[
storage_uuid
]:
approx_eps_key = update_keys_misc(
f"approx_eps",
stage_component_uuid,
)
callback_metric_means[approx_eps_key] = eps
scalar_name_to_total_experiences_key[approx_eps_key] = (
storage_uuid_to_total_experiences_key[storage_uuid]
)
if log_writer is not None:
log_writer.add_scalar(
approx_eps_key,
eps,
cur_total_exp,
)
metrics_and_info_means_with_metrics_dicts_list = copy.deepcopy(
metrics_and_info_means
)
metrics_and_info_means_with_metrics_dicts_list.update(
{"training_steps": training_steps, "tasks": metric_dicts_list}
)
if all_results is not None:
all_results.append(metrics_and_info_means_with_metrics_dicts_list)
num_tasks = sum([pkg.num_non_empty_metrics_dicts_added for pkg in pkgs])
num_tasks_completed_key = update_keys_misc("num_tasks_completed_since_last_log")
if log_writer is not None:
log_writer.add_scalar(num_tasks_completed_key, num_tasks, training_steps)
callback_metric_means[num_tasks_completed_key] = num_tasks
message.append(f"new_tasks_completed {num_tasks}")
if not training:
message.append(f"checkpoint {checkpoint_file_name[0]}")
get_logger().info(" ".join(message))
for callback in self.callbacks:
if mode == TRAIN_MODE_STR:
callback.on_train_log(
metrics=metric_dicts_list,
metric_means=callback_metric_means,
step=training_steps,
checkpoint_file_name=checkpoint_file_name[0],
tasks_data=tasks_callback_data,
scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,
)
if mode == VALID_MODE_STR:
callback.on_valid_log(
metrics=metrics_and_info_means_with_metrics_dicts_list,
metric_means=callback_metric_means,
step=training_steps,
checkpoint_file_name=checkpoint_file_name[0],
tasks_data=tasks_callback_data,
scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,
)
if mode == TEST_MODE_STR:
callback.on_test_log(
metrics=metrics_and_info_means_with_metrics_dicts_list,
metric_means=callback_metric_means,
step=training_steps,
checkpoint_file_name=checkpoint_file_name[0],
tasks_data=tasks_callback_data,
scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,
)
if self.visualizer is not None:
self.visualizer.log(
log_writer=log_writer,
task_outputs=metric_dicts_list,
render=render,
num_steps=training_steps,
)
return training_steps, storage_uuid_to_total_experiences, current_time
def process_valid_package(
self,
log_writer: Optional[SummaryWriter],
pkg: LoggingPackage,
all_results: Optional[List[Any]] = None,
):
return self._process_logging_packages(
log_writer=log_writer,
pkgs=[pkg],
last_steps=None,
last_storage_uuid_to_total_experiences=None,
last_time=None,
all_results=all_results,
)
def process_train_packages(
self,
log_writer: Optional[SummaryWriter],
pkgs: List[LoggingPackage],
last_steps: int,
last_storage_uuid_to_total_experiences: Dict[str, int],
last_time: float,
):
return self._process_logging_packages(
log_writer=log_writer,
pkgs=pkgs,
last_steps=last_steps,
last_storage_uuid_to_total_experiences=last_storage_uuid_to_total_experiences,
last_time=last_time,
)
def process_test_packages(
self,
log_writer: Optional[SummaryWriter],
pkgs: List[LoggingPackage],
all_results: Optional[List[Any]] = None,
):
return self._process_logging_packages(
log_writer=log_writer,
pkgs=pkgs,
last_steps=None,
last_storage_uuid_to_total_experiences=None,
last_time=None,
all_results=all_results,
)
def log_and_close(
self,
start_time_str: str,
nworkers: int,
test_steps: Sequence[int] = (),
metrics_file: Optional[str] = None,
) -> List[Dict]:
ptitle(f"AllenAct-Logging-{self.local_start_time_str}")
finalized = False
log_writer: Optional[SummaryWriter] = None
if not self.disable_tensorboard:
log_writer = SummaryWriter(
log_dir=self.log_writer_path(start_time_str),
filename_suffix=f"__{self.mode}_{self.local_start_time_str}",
)
# To aggregate/buffer metrics from trainers/testers
collected: List[LoggingPackage] = []
last_train_steps = 0
last_storage_uuid_to_total_experiences = {}
last_train_time = time.time()
# test_steps = sorted(test_steps, reverse=True)
eval_results: List[Dict] = []
unfinished_workers = nworkers
try:
while True:
try:
package: Union[
LoggingPackage, Union[Tuple[str, Any], Tuple[str, Any, Any]]
] = self.queues["results"].get(timeout=1)
if isinstance(package, LoggingPackage):
pkg_mode = package.mode
if pkg_mode == TRAIN_MODE_STR:
collected.append(package)
if len(collected) >= nworkers:
collected = sorted(
collected,
key=lambda pkg: (
pkg.training_steps,
*sorted(
pkg.storage_uuid_to_total_experiences.items()
),
),
)
if (
collected[nworkers - 1].training_steps
== collected[0].training_steps
and collected[
nworkers - 1
].storage_uuid_to_total_experiences
== collected[0].storage_uuid_to_total_experiences
): # ensure all workers have provided the same training_steps and total_experiences
(
last_train_steps,
last_storage_uuid_to_total_experiences,
last_train_time,
) = self.process_train_packages(
log_writer=log_writer,
pkgs=collected[:nworkers],
last_steps=last_train_steps,
last_storage_uuid_to_total_experiences=last_storage_uuid_to_total_experiences,
last_time=last_train_time,
)
collected = collected[nworkers:]
elif len(collected) > 2 * nworkers:
get_logger().warning(
f"Unable to aggregate train packages from all {nworkers} workers"
f"after {len(collected)} packages collected"
)
elif (
pkg_mode == VALID_MODE_STR
): # they all come from a single worker
if (
package.training_steps is not None
): # no validation samplers
self.process_valid_package(
log_writer=log_writer,
pkg=package,
all_results=(
eval_results
if self._collect_valid_results
else None
),
)
if metrics_file is not None:
with open(
metrics_file.format(package.training_steps), "w"
) as f:
json.dump(
eval_results[-1],
f,
indent=4,
sort_keys=True,
cls=NumpyJSONEncoder,
)
get_logger().info(
"Written valid results file {}".format(
metrics_file.format(
package.training_steps
),
)
)
if (
finalized and self.queues["checkpoints"].empty()
): # assume queue is actually empty after trainer finished and no checkpoints in queue
break
elif pkg_mode == TEST_MODE_STR:
collected.append(package)
if len(collected) >= nworkers:
collected = sorted(
collected, key=lambda x: x.training_steps
) # sort by num_steps
if (
collected[nworkers - 1].training_steps
== collected[0].training_steps
): # ensure nworkers have provided the same num_steps
self.process_test_packages(
log_writer=log_writer,
pkgs=collected[:nworkers],
all_results=eval_results,
)
collected = collected[nworkers:]
with open(metrics_file, "w") as f:
json.dump(
eval_results,
f,
indent=4,
sort_keys=True,
cls=NumpyJSONEncoder,
)
get_logger().info(
f"Updated {metrics_file} up to checkpoint"
f" {test_steps[len(eval_results) - 1]}"
)
else:
get_logger().error(
f"Runner received unknown package of type {pkg_mode}"
)
else:
pkg_mode = package[0]
if pkg_mode == "train_stopped":
if package[1] == 0:
finalized = True
if not self.running_validation:
get_logger().info(
"Terminating runner after trainer done (no validation)"
)
break
else:
raise Exception(
f"Train worker {package[1] - 1} abnormally terminated"
)
elif pkg_mode == "valid_stopped":
raise Exception(
f"Valid worker {package[1] - 1} abnormally terminated"
)
elif pkg_mode == "test_stopped":
if package[1] == 0:
unfinished_workers -= 1
if unfinished_workers == 0:
get_logger().info(
"Last tester finished. Terminating"
)
finalized = True
break
else:
raise RuntimeError(
f"Test worker {package[1] - 1} abnormally terminated"
)
else:
get_logger().error(
f"Runner received invalid package tuple {package}"
)
except queue.Empty as _:
if all(
p.exitcode is not None
for p in itertools.chain(*self.processes.values())
):
break
except KeyboardInterrupt:
get_logger().info("KeyboardInterrupt. Terminating runner.")
except Exception:
get_logger().error("Encountered Exception. Terminating runner.")
get_logger().exception(traceback.format_exc())
finally:
if finalized:
get_logger().info("Done")
if log_writer is not None:
log_writer.close()
self.close()
return eval_results
def get_checkpoint_files(
self,
checkpoint_path_dir_or_pattern: str,
approx_ckpt_step_interval: Optional[int] = None,
):
if "wandb://" == checkpoint_path_dir_or_pattern[:8]:
eval_dir = "/tmp/wandb_ckpts_to_eval/{}".format(self.local_start_time_str)
os.makedirs(eval_dir, exist_ok=True)
return download_checkpoint_from_wandb(
checkpoint_path_dir_or_pattern, eval_dir, only_allow_one_ckpt=False
)
if os.path.isdir(checkpoint_path_dir_or_pattern):
# The fragment is a path to a directory, lets use this directory
# as the base dir to search for checkpoints
checkpoint_path_dir_or_pattern = os.path.join(
checkpoint_path_dir_or_pattern, "*.pt"
)
ckpt_paths = glob.glob(checkpoint_path_dir_or_pattern, recursive=True)
if len(ckpt_paths) == 0:
raise FileNotFoundError(
f"Could not find any checkpoints at {os.path.abspath(checkpoint_path_dir_or_pattern)}, is it possible"
f" the path has been mispecified?"
)
step_count_ckpt_pairs = [(self.step_from_checkpoint(p), p) for p in ckpt_paths]
step_count_ckpt_pairs.sort()
ckpts_paths = [p for _, p in step_count_ckpt_pairs]
step_counts = np.array([sc for sc, _ in step_count_ckpt_pairs])
if approx_ckpt_step_interval is not None:
assert (
approx_ckpt_step_interval > 0
), "`approx_ckpt_step_interval` must be >0"
inds_to_eval = set()
for i in range(
math.ceil(step_count_ckpt_pairs[-1][0] / approx_ckpt_step_interval) + 1
):
inds_to_eval.add(
int(np.argmin(np.abs(step_counts - i * approx_ckpt_step_interval)))
)
ckpts_paths = [ckpts_paths[ind] for ind in sorted(list(inds_to_eval))]
return ckpts_paths
@staticmethod
def step_from_checkpoint(ckpt_path: str) -> int:
parts = os.path.basename(ckpt_path).split("__")
for part in parts:
if "steps_" in part:
possible_num = part.split("_")[-1].split(".")[0]
if possible_num.isdigit():
return int(possible_num)
get_logger().warning(
f"The checkpoint {os.path.basename(ckpt_path)} does not follow the checkpoint naming convention"
f" used by AllenAct. As a fall back we must load the checkpoint into memory to find the"
f" training step count, this may increase startup time if the checkpoints are large or many"
f" must be loaded in sequence."
)
ckpt = torch.load(ckpt_path, map_location="cpu")
return ckpt["total_steps"]
def close(self, verbose=True):
if self._is_closed:
return
def logif(s: Union[str, Exception]):
if verbose:
if isinstance(s, str):
get_logger().info(s)
elif isinstance(s, Exception):
get_logger().exception(traceback.format_exc())
else:
raise NotImplementedError()
# First send termination signals
for process_type in self.processes:
for it, process in enumerate(self.processes[process_type]):
if process.is_alive():
logif(f"Terminating {process_type} {it}")
process.terminate()
# Now join processes
for process_type in self.processes:
for it, process in enumerate(self.processes[process_type]):
try:
logif(f"Joining {process_type} {it}")
process.join(1)
logif(f"Closed {process_type} {it}")
except Exception as e:
logif(f"Exception raised when closing {process_type} {it}")
logif(e)
self.processes.clear()
self._is_closed = True
def __del__(self):
self.close(verbose=True)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close(verbose=True)
================================================
FILE: allenact/algorithms/onpolicy_sync/storage.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import abc
import random
from typing import (
Union,
List,
Dict,
Tuple,
Sequence,
cast,
Optional,
Callable,
Any,
Generator,
)
import gym
import numpy as np
import torch
import allenact.utils.spaces_utils as su
from allenact.algorithms.onpolicy_sync.policy import (
FullMemorySpecType,
ObservationType,
ActionType,
)
from allenact.base_abstractions.misc import Memory
from allenact.utils.system import get_logger
class ExperienceStorage(abc.ABC):
@abc.abstractmethod
def initialize(self, *, observations: ObservationType, **kwargs):
raise NotImplementedError
@abc.abstractmethod
def add(
self,
observations: ObservationType,
memory: Optional[Memory],
actions: torch.Tensor,
action_log_probs: torch.Tensor,
value_preds: torch.Tensor,
rewards: torch.Tensor,
masks: torch.Tensor,
):
"""
# Parameters
observations : Observations after taking `actions`
memory: Memory after having observed the last set of observations.
actions: Actions taken to reach the current state, i.e. taking these actions has led to a new state with
new `observations`.
action_log_probs : Log probs of `actions`
value_preds : Value predictions corresponding to the last observations
(i.e. the states before taking `actions`).
rewards : Rewards from taking `actions` in the last set of states.
masks : Masks corresponding to the current states, having 0 entries where `observations` correspond to
observations from the beginning of a new episode.
"""
raise NotImplementedError
def before_updates(self, **kwargs):
pass
def after_updates(self, **kwargs) -> int:
pass
@abc.abstractmethod
def to(self, device: torch.device):
pass
@abc.abstractmethod
def set_partition(self, index: int, num_parts: int):
raise NotImplementedError
@property
@abc.abstractmethod
def total_experiences(self) -> int:
raise NotImplementedError
class RolloutStorage(ExperienceStorage, abc.ABC):
# noinspection PyMethodOverriding
@abc.abstractmethod
def initialize(
self,
*,
observations: ObservationType,
num_samplers: int,
recurrent_memory_specification: FullMemorySpecType,
action_space: gym.Space,
**kwargs,
):
raise NotImplementedError
@abc.abstractmethod
def agent_input_for_next_step(self) -> Dict[str, Any]:
raise NotImplementedError
@abc.abstractmethod
def sampler_select(self, keep_list: Sequence[int]):
raise NotImplementedError
class StreamingStorageMixin(abc.ABC):
@abc.abstractmethod
def next_batch(self) -> Dict[str, Any]:
raise NotImplementedError
def reset_stream(self):
raise NotImplementedError
@abc.abstractmethod
def empty(self) -> bool:
raise NotImplementedError
class MiniBatchStorageMixin(abc.ABC):
@abc.abstractmethod
def batched_experience_generator(
self,
num_mini_batch: int,
) -> Generator[Dict[str, Any], None, None]:
raise NotImplementedError
class RolloutBlockStorage(RolloutStorage, MiniBatchStorageMixin):
"""Class for storing rollout information for RL trainers."""
FLATTEN_SEPARATOR: str = "._AUTOFLATTEN_."
def __init__(self, init_size: int = 50):
self.full_size = init_size
self.flattened_to_unflattened: Dict[str, Dict[str, List[str]]] = {
"memory": dict(),
"observations": dict(),
}
self.unflattened_to_flattened: Dict[str, Dict[Tuple[str, ...], str]] = {
"memory": dict(),
"observations": dict(),
}
self.dim_names = ["step", "sampler", None]
self.memory_specification: Optional[FullMemorySpecType] = None
self.action_space: Optional[gym.Space] = None
self.memory_first_last: Optional[Memory] = None
self._observations_full: Memory = Memory()
self._value_preds_full: Optional[torch.Tensor] = None
self._returns_full: Optional[torch.Tensor] = None
self._rewards_full: Optional[torch.Tensor] = None
self._action_log_probs_full: Optional[torch.Tensor] = None
self.step = 0
self._total_steps = 0
self._before_update_called = False
self.device = torch.device("cpu")
# self._advantages and self._normalized_advantages are only computed
# when `before_updates` is called
self._advantages: Optional[torch.Tensor] = None
self._normalized_advantages: Optional[torch.Tensor] = None
self._masks_full: Optional[torch.Tensor] = None
self._actions_full: Optional[torch.Tensor] = None
self._prev_actions_full: Optional[torch.Tensor] = None
def initialize(
self,
*,
observations: ObservationType,
num_samplers: int,
recurrent_memory_specification: FullMemorySpecType,
action_space: gym.Space,
**kwargs,
):
if self.memory_specification is None:
self.memory_specification = recurrent_memory_specification or {}
self.action_space = action_space
self.memory_first_last: Memory = self.create_memory(
spec=self.memory_specification,
num_samplers=num_samplers,
).to(self.device)
for key in self.memory_specification:
self.flattened_to_unflattened["memory"][key] = [key]
self.unflattened_to_flattened["memory"][(key,)] = key
self._masks_full = torch.zeros(
self.full_size + 1, num_samplers, 1, device=self.device
)
action_flat_dim = su.flatdim(self.action_space)
self._actions_full = torch.zeros(
self.full_size, num_samplers, action_flat_dim, device=self.device
)
self._prev_actions_full = torch.zeros(
self.full_size + 1, num_samplers, action_flat_dim, device=self.device
)
assert self.step == 0, "Must call `after_updates` before calling `initialize`"
self.insert_observations(observations=observations, time_step=0)
self.prev_actions[0].zero_() # Have to zero previous actions
self.masks[0].zero_() # Have to zero masks
@property
def total_experiences(self) -> int:
return self._total_steps
@total_experiences.setter
def total_experiences(self, value: int):
self._total_steps = value
def set_partition(self, index: int, num_parts: int):
pass
@property
def value_preds(self) -> torch.Tensor:
return self._value_preds_full[: self.step + 1]
@property
def rewards(self) -> torch.Tensor:
return self._rewards_full[: self.step]
@property
def returns(self) -> torch.Tensor:
return self._returns_full[: self.step + 1]
@property
def action_log_probs(self) -> torch.Tensor:
return self._action_log_probs_full[: self.step]
@property
def actions(self) -> torch.Tensor:
return self._actions_full[: self.step]
@property
def prev_actions(self) -> torch.Tensor:
return self._prev_actions_full[: self.step + 1]
@property
def masks(self) -> torch.Tensor:
return self._masks_full[: self.step + 1]
@property
def observations(self) -> Memory:
return self._observations_full.slice(dim=0, start=0, stop=self.step + 1)
@staticmethod
def create_memory(
spec: Optional[FullMemorySpecType],
num_samplers: int,
) -> Memory:
if spec is None:
return Memory()
memory = Memory()
for key in spec:
dims_template, dtype = spec[key]
dim_names = ["step"] + [d[0] for d in dims_template]
sampler_dim = dim_names.index("sampler")
all_dims = [2] + [d[1] for d in dims_template]
all_dims[sampler_dim] = num_samplers
memory.check_append(
key=key,
tensor=torch.zeros(*all_dims, dtype=dtype),
sampler_dim=sampler_dim,
)
return memory
def to(self, device: torch.device):
for key in [
"_observations_full",
"memory_first_last",
"_actions_full",
"_prev_actions_full",
"_masks_full",
"_rewards_full",
"_value_preds_full",
"_returns_full",
"_action_log_probs_full",
]:
val = getattr(self, key)
if val is not None:
setattr(self, key, val.to(device))
self.device = device
def insert_observations(
self,
observations: ObservationType,
time_step: int,
):
self.insert_tensors(
storage=self._observations_full,
storage_name="observations",
unflattened=observations,
time_step=time_step,
)
def insert_memory(
self,
memory: Optional[Memory],
time_step: int,
):
if memory is None:
assert len(self.memory_first_last) == 0
return
# `min(time_step, 1)` as we only store the first and last memories:
# * first memory is used for loss computation when the agent model has to compute
# all its outputs again given the full batch.
# * last memory ised used by the agent when collecting rollouts
self.insert_tensors(
storage=self.memory_first_last,
storage_name="memory",
unflattened=memory,
time_step=min(time_step, 1),
)
def insert_tensors(
self,
storage: Memory,
storage_name: str,
unflattened: Union[ObservationType, Memory],
prefix: str = "",
path: Sequence[str] = (),
time_step: int = 0,
):
path = list(path)
for name in unflattened:
current_data = unflattened[name]
if isinstance(current_data, Dict):
self.insert_tensors(
storage=storage,
storage_name=storage_name,
unflattened=cast(ObservationType, current_data),
prefix=prefix + name + self.FLATTEN_SEPARATOR,
path=path + [name],
time_step=time_step,
)
continue
sampler_dim = self.dim_names.index("sampler")
if isinstance(current_data, tuple):
sampler_dim = current_data[1]
current_data = current_data[0]
flatten_name = prefix + name
if flatten_name not in storage:
assert storage_name == "observations"
storage[flatten_name] = (
torch.zeros_like(current_data) # type:ignore
.repeat(
self.full_size + 1, # required for observations (and memory)
*(1 for _ in range(len(current_data.shape))),
)
.to(self.device),
sampler_dim,
)
assert (
flatten_name not in self.flattened_to_unflattened[storage_name]
), f"new flattened name {flatten_name} already existing in flattened spaces[{storage_name}]"
self.flattened_to_unflattened[storage_name][flatten_name] = path + [
name
]
self.unflattened_to_flattened[storage_name][
tuple(path + [name])
] = flatten_name
try:
if storage_name == "observations":
# current_data has a step dimension
assert time_step >= 0
storage[flatten_name][0][time_step : time_step + 1].copy_(
current_data
)
elif storage_name == "memory":
# current_data does not have a step dimension
storage[flatten_name][0][time_step].copy_(current_data)
else:
raise NotImplementedError
except:
get_logger().error(
f"Error while inserting data in storage for name {flatten_name}"
)
raise
def create_tensor_storage(
self, num_steps: int, template: torch.Tensor
) -> torch.Tensor:
return torch.cat([torch.zeros_like(template).to(self.device)] * num_steps)
def _double_storage_size(self):
def pad_tensor_with_zeros(old_t: Optional[torch.Tensor]):
if old_t is None:
return None
assert old_t.shape[0] in [self.full_size, self.full_size + 1]
padded_t = torch.zeros(
old_t.shape[0] + self.full_size,
*old_t.shape[1:],
dtype=old_t.dtype,
device=old_t.device,
)
padded_t[: old_t.shape[0]] = old_t
return padded_t
for key in list(self._observations_full.keys()):
obs_tensor, sampler_dim = self._observations_full[key]
self._observations_full[key] = (
pad_tensor_with_zeros(obs_tensor),
sampler_dim,
)
self._actions_full = pad_tensor_with_zeros(self._actions_full)
self._prev_actions_full = pad_tensor_with_zeros(self._prev_actions_full)
self._masks_full = pad_tensor_with_zeros(self._masks_full)
self._rewards_full = pad_tensor_with_zeros(self._rewards_full)
self._value_preds_full = pad_tensor_with_zeros(self._value_preds_full)
self._returns_full = pad_tensor_with_zeros(self._returns_full)
self._action_log_probs_full = pad_tensor_with_zeros(self._action_log_probs_full)
self.full_size *= 2
def add(
self,
observations: ObservationType,
memory: Optional[Memory],
actions: torch.Tensor,
action_log_probs: torch.Tensor,
value_preds: torch.Tensor,
rewards: torch.Tensor,
masks: torch.Tensor,
):
"""See `ExperienceStorage.add` documentation."""
assert (
len(masks.shape) == 2 and masks.shape[1] == 1
), f"Can only add a single step worth of data at a time (mask shape = {masks.shape})."
self.total_experiences += masks.shape[0]
if self.step == self.full_size:
self._double_storage_size()
elif self.step > self.full_size:
raise RuntimeError
self.insert_observations(observations, time_step=self.step + 1)
self.insert_memory(memory, time_step=self.step + 1)
assert actions.shape == self._actions_full.shape[1:]
self._actions_full[self.step].copy_(actions) # type:ignore
self._prev_actions_full[self.step + 1].copy_(actions) # type:ignore
self._masks_full[self.step + 1].copy_(masks) # type:ignore
if self._rewards_full is None:
# We delay the instantiation of storage for `rewards`, `value_preds`, `action_log_probs` and `returns`
# as we do not, a priori, know what shape these will be. For instance, if we are in a multi-agent setting
# then there may be many rewards (one for each agent).
self._rewards_full = self.create_tensor_storage(
self.full_size, rewards.unsqueeze(0)
) # add step
value_returns_template = value_preds.unsqueeze(0) # add step
self._value_preds_full = self.create_tensor_storage(
self.full_size + 1, value_returns_template
)
self._returns_full = self.create_tensor_storage(
self.full_size + 1, value_returns_template
)
self._action_log_probs_full = self.create_tensor_storage(
self.full_size, action_log_probs.unsqueeze(0)
)
self._value_preds_full[self.step].copy_(value_preds) # type:ignore
self._rewards_full[self.step].copy_(rewards) # type:ignore
self._action_log_probs_full[self.step].copy_( # type:ignore
action_log_probs
)
self.step += 1
self._before_update_called = False
# We set the below to be None just for extra safety.
self._advantages = None
self._normalized_advantages = None
def sampler_select(self, keep_list: Sequence[int]):
keep_list = list(keep_list)
if self._actions_full.shape[1] == len(keep_list): # samplers dim
return # we are keeping everything, no need to copy
self._observations_full = self._observations_full.sampler_select(keep_list)
self.memory_first_last = self.memory_first_last.sampler_select(keep_list)
self._actions_full = self._actions_full[:, keep_list]
self._prev_actions_full = self._prev_actions_full[:, keep_list]
self._action_log_probs_full = self._action_log_probs_full[:, keep_list]
self._masks_full = self._masks_full[:, keep_list]
if self._rewards_full is not None:
self._value_preds_full = self._value_preds_full[:, keep_list]
self._rewards_full = self._rewards_full[:, keep_list]
self._returns_full = self._returns_full[:, keep_list]
def before_updates(
self,
*,
next_value: torch.Tensor,
use_gae: bool,
gamma: float,
tau: float,
adv_stats_callback: Callable[[torch.Tensor], Dict[str, torch.Tensor]],
**kwargs,
):
assert len(kwargs) == 0
self.compute_returns(
next_value=next_value,
use_gae=use_gae,
gamma=gamma,
tau=tau,
)
self._advantages = self.returns[:-1] - self.value_preds[:-1]
adv_stats = adv_stats_callback(self._advantages)
self._normalized_advantages = (self._advantages - adv_stats["mean"]) / (
adv_stats["std"] + 1e-5
)
self._before_update_called = True
def after_updates(self, **kwargs):
assert len(kwargs) == 0
for storage in [self.observations, self.memory_first_last]:
for key in storage:
storage[key][0][0].copy_(storage[key][0][-1])
if self._masks_full is not None:
self.masks[0].copy_(self.masks[-1])
if self._prev_actions_full is not None:
self.prev_actions[0].copy_(self.prev_actions[-1])
self._before_update_called = False
self._advantages = None
self._normalized_advantages = None
self.step = 0
@staticmethod
def _extend_tensor_with_ones(stored_tensor: torch.Tensor, desired_num_dims: int):
# Ensure broadcast to all flattened dimensions
extended_shape = stored_tensor.shape + (1,) * (
desired_num_dims - len(stored_tensor.shape)
)
return stored_tensor.view(*extended_shape)
def compute_returns(
self, next_value: torch.Tensor, use_gae: bool, gamma: float, tau: float
):
extended_mask = self._extend_tensor_with_ones(
self.masks, desired_num_dims=len(self.value_preds.shape)
)
extended_rewards = self._extend_tensor_with_ones(
self.rewards, desired_num_dims=len(self.value_preds.shape)
)
if use_gae:
self.value_preds[-1] = next_value
gae = 0
for step in reversed(range(extended_rewards.shape[0])):
delta = (
extended_rewards[step]
+ gamma * self.value_preds[step + 1] * extended_mask[step + 1]
- self.value_preds[step]
)
gae = delta + gamma * tau * extended_mask[step + 1] * gae # type:ignore
self.returns[step] = gae + self.value_preds[step]
else:
self.returns[-1] = next_value
for step in reversed(range(extended_rewards.shape[0])):
self.returns[step] = (
self.returns[step + 1] * gamma * extended_mask[step + 1]
+ extended_rewards[step]
)
def batched_experience_generator(
self,
num_mini_batch: int,
):
assert self._before_update_called, (
"self._before_update_called() must be called before"
" attempting to generated batched rollouts."
)
num_samplers = self.rewards.shape[1]
assert num_samplers >= num_mini_batch, (
f"The number of task samplers ({num_samplers}) "
f"must be greater than or equal to the number of "
f"mini batches ({num_mini_batch})."
)
inds = np.round(
np.linspace(0, num_samplers, num_mini_batch + 1, endpoint=True)
).astype(np.int32)
pairs = list(zip(inds[:-1], inds[1:]))
random.shuffle(pairs)
for start_ind, end_ind in pairs:
cur_samplers = list(range(start_ind, end_ind))
memory_batch = self.memory_first_last.step_squeeze(0).sampler_select(
cur_samplers
)
observations_batch = self.unflatten_observations(
self.observations.slice(dim=0, stop=-1).sampler_select(cur_samplers)
)
actions_batch = []
prev_actions_batch = []
value_preds_batch = []
return_batch = []
masks_batch = []
old_action_log_probs_batch = []
adv_targ = []
norm_adv_targ = []
for ind in cur_samplers:
actions_batch.append(self.actions[:, ind])
prev_actions_batch.append(self.prev_actions[:-1, ind])
value_preds_batch.append(self.value_preds[:-1, ind])
return_batch.append(self.returns[:-1, ind])
masks_batch.append(self.masks[:-1, ind])
old_action_log_probs_batch.append(self.action_log_probs[:, ind])
adv_targ.append(self._advantages[:, ind])
norm_adv_targ.append(self._normalized_advantages[:, ind])
actions_batch = torch.stack(actions_batch, 1) # type:ignore
prev_actions_batch = torch.stack(prev_actions_batch, 1) # type:ignore
value_preds_batch = torch.stack(value_preds_batch, 1) # type:ignore
return_batch = torch.stack(return_batch, 1) # type:ignore
masks_batch = torch.stack(masks_batch, 1) # type:ignore
old_action_log_probs_batch = torch.stack( # type:ignore
old_action_log_probs_batch, 1
)
adv_targ = torch.stack(adv_targ, 1) # type:ignore
norm_adv_targ = torch.stack(norm_adv_targ, 1) # type:ignore
yield {
"observations": observations_batch,
"memory": memory_batch,
"actions": su.unflatten(self.action_space, actions_batch),
"prev_actions": su.unflatten(self.action_space, prev_actions_batch),
"values": value_preds_batch,
"returns": return_batch,
"masks": masks_batch,
"old_action_log_probs": old_action_log_probs_batch,
"adv_targ": adv_targ,
"norm_adv_targ": norm_adv_targ,
"bsize": int(np.prod(masks_batch.shape[:2])),
}
def unflatten_observations(self, flattened_batch: Memory) -> ObservationType:
result: ObservationType = {}
for name in flattened_batch:
full_path = self.flattened_to_unflattened["observations"][name]
cur_dict = result
for part in full_path[:-1]:
if part not in cur_dict:
cur_dict[part] = {}
cur_dict = cast(ObservationType, cur_dict[part])
cur_dict[full_path[-1]] = flattened_batch[name][0]
return result
def pick_observation_step(self, step: int) -> ObservationType:
return self.unflatten_observations(self.observations.step_select(step))
def pick_memory_step(self, step: int) -> Memory:
assert step in [0, self.step, -1], "Can only access the first or last memory."
return self.memory_first_last.step_squeeze(min(step, 1))
def pick_prev_actions_step(self, step: int) -> ActionType:
return su.unflatten(self.action_space, self.prev_actions[step : step + 1])
def agent_input_for_next_step(self) -> Dict[str, Any]:
return {
"observations": self.pick_observation_step(self.step),
"memory": self.pick_memory_step(self.step),
"prev_actions": self.pick_prev_actions_step(self.step),
"masks": self.masks[self.step : self.step + 1],
}
================================================
FILE: allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import signal
import time
import traceback
from multiprocessing.connection import Connection
from multiprocessing.context import BaseContext
from multiprocessing.process import BaseProcess
from threading import Thread
from typing import (
Any,
Callable,
Dict,
Generator,
Iterator,
List,
Optional,
Sequence,
Set,
Tuple,
Union,
cast,
)
import numpy as np
from gym.spaces.dict import Dict as SpaceDict
from setproctitle import setproctitle as ptitle
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import SensorSuite, Sensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.misc_utils import partition_sequence
from allenact.utils.system import get_logger
from allenact.utils.tensor_utils import tile_images
try:
# Use torch.multiprocessing if we can.
# We have yet to find a reason to not use it and
# you are required to use it when sending a torch.Tensor
# between processes
import torch.multiprocessing as mp
except ImportError:
import multiprocessing as mp # type: ignore
DEFAULT_MP_CONTEXT_TYPE = "forkserver"
COMPLETE_TASK_METRICS_KEY = "__AFTER_TASK_METRICS__"
COMPLETE_TASK_CALLBACK_KEY = "__AFTER_TASK_CALLBACK__"
STEP_COMMAND = "step"
NEXT_TASK_COMMAND = "next_task"
RENDER_COMMAND = "render"
CLOSE_COMMAND = "close"
OBSERVATION_SPACE_COMMAND = "observation_space"
ACTION_SPACE_COMMAND = "action_space"
CALL_COMMAND = "call"
SAMPLER_COMMAND = "call_sampler"
ATTR_COMMAND = "attr"
SAMPLER_ATTR_COMMAND = "sampler_attr"
RESET_COMMAND = "reset"
SEED_COMMAND = "seed"
PAUSE_COMMAND = "pause"
RESUME_COMMAND = "resume"
class DelaySignalHandling:
# Modified from https://stackoverflow.com/a/21919644
def __init__(self):
self.int_signal_received: Optional[Any] = None
self.term_signal_received: Optional[Any] = None
self.old_int_handler = None
self.old_term_handler = None
def __enter__(self):
self.int_signal_received: Optional[Any] = None
self.term_signal_received: Optional[Any] = None
self.old_int_handler = signal.signal(signal.SIGINT, self.int_handler)
self.old_term_handler = signal.signal(signal.SIGTERM, self.term_handler)
def int_handler(self, sig, frame):
self.int_signal_received = (sig, frame)
get_logger().debug("SIGINT received. Delaying KeyboardInterrupt.")
def term_handler(self, sig, frame):
self.term_signal_received = (sig, frame)
get_logger().debug("SIGTERM received. Delaying termination.")
def __exit__(self, type, value, traceback):
signal.signal(signal.SIGINT, self.old_int_handler)
signal.signal(signal.SIGTERM, self.old_term_handler)
if self.term_signal_received:
# For some reason there appear to be cases where the original termination
# handler is not callable. It is unclear to me exactly why this is the case
# but here we add a guard to double check that the handler is callable and,
# if it's not, we re-send the termination signal to the process and let
# the python internals handle it (note that we've already reset the termination
# handler to what it was originaly above in the signal.signal(...) code).
if callable(self.old_term_handler):
self.old_term_handler(*self.term_signal_received)
else:
get_logger().debug(
"Termination handler could not be called after delaying signal handling."
f" Resending the SIGTERM signal. Last (sig, frame) == ({self.term_signal_received})."
)
os.kill(os.getpid(), signal.SIGTERM)
if self.int_signal_received:
if callable(self.old_int_handler):
self.old_int_handler(*self.int_signal_received)
else:
signal.default_int_handler(*self.int_signal_received)
class VectorSampledTasks:
"""Vectorized collection of tasks. Creates multiple processes where each
process runs its own TaskSampler. Each process generates one Task from its
TaskSampler at a time and this class allows for interacting with these
tasks in a vectorized manner. When a task on a process completes, the
process samples another task from its task sampler. All the tasks are
synchronized (for step and new_task methods).
# Attributes
make_sampler_fn : function which creates a single TaskSampler.
sampler_fn_args : sequence of dictionaries describing the args
to pass to make_sampler_fn on each individual process.
auto_resample_when_done : automatically sample a new Task from the TaskSampler when
the Task completes. If False, a new Task will not be resampled until all
Tasks on all processes have completed. This functionality is provided for seamless training
of vectorized Tasks.
multiprocessing_start_method : the multiprocessing method used to
spawn worker processes. Valid methods are
``{'spawn', 'forkserver', 'fork'}`` ``'forkserver'`` is the
recommended method as it works well with CUDA. If
``'fork'`` is used, the subproccess must be started before
any other GPU useage.
"""
observation_space: SpaceDict
_workers: List[Union[mp.Process, Thread, BaseProcess]]
_is_waiting: bool
_num_task_samplers: int
_auto_resample_when_done: bool
_mp_ctx: BaseContext
_connection_read_fns: List[Callable[[], Any]]
_connection_write_fns: List[Callable[[Any], None]]
_read_timeout: Optional[float]
def __init__(
self,
make_sampler_fn: Callable[..., TaskSampler],
sampler_fn_args: Sequence[Dict[str, Any]] = None,
callback_sensors: Optional[Sequence[Sensor]] = None,
auto_resample_when_done: bool = True,
multiprocessing_start_method: Optional[str] = "forkserver",
mp_ctx: Optional[BaseContext] = None,
should_log: bool = True,
max_processes: Optional[int] = None,
read_timeout: Optional[
float
] = 60, # Seconds to wait for a task to return a response before timing out
) -> None:
self._is_waiting = False
self._is_closed = True
self.should_log = should_log
self.max_processes = max_processes
self.read_timeout = read_timeout
assert (
sampler_fn_args is not None and len(sampler_fn_args) > 0
), "number of processes to be created should be greater than 0"
self._num_task_samplers = len(sampler_fn_args)
self._num_processes = (
self._num_task_samplers
if max_processes is None
else min(max_processes, self._num_task_samplers)
)
self._auto_resample_when_done = auto_resample_when_done
assert (multiprocessing_start_method is None) != (
mp_ctx is None
), "Exactly one of `multiprocessing_start_method`, and `mp_ctx` must be not None."
if multiprocessing_start_method is not None:
assert multiprocessing_start_method in self._valid_start_methods, (
"multiprocessing_start_method must be one of {}. Got '{}'"
).format(self._valid_start_methods, multiprocessing_start_method)
self._mp_ctx = mp.get_context(multiprocessing_start_method)
else:
self._mp_ctx = cast(BaseContext, mp_ctx)
self.npaused_per_process = [0] * self._num_processes
self.sampler_index_to_process_ind_and_subprocess_ind: Optional[
List[List[int]]
] = None
self._reset_sampler_index_to_process_ind_and_subprocess_ind()
self._workers: Optional[List[Union[mp.Process, Thread, BaseProcess]]] = None
for args in sampler_fn_args:
args["mp_ctx"] = self._mp_ctx
(
connection_poll_fns,
connection_read_fns,
self._connection_write_fns,
) = self._spawn_workers( # noqa
make_sampler_fn=make_sampler_fn,
sampler_fn_args_list=[
args_list for args_list in self._partition_to_processes(sampler_fn_args)
],
callback_sensor_suite=(
SensorSuite(callback_sensors)
if isinstance(callback_sensors, Sequence)
else callback_sensors
),
)
self._connection_read_fns = [
self._create_read_function_with_timeout(
read_fn=read_fn, poll_fn=poll_fn, timeout=self.read_timeout
)
for read_fn, poll_fn in zip(connection_read_fns, connection_poll_fns)
]
self._is_closed = False
for write_fn in self._connection_write_fns:
write_fn((OBSERVATION_SPACE_COMMAND, None))
# Note that we increase the read timeout below as initialization can take some time
observation_spaces = [
space
for read_fn in self._connection_read_fns
for space in read_fn(timeout_to_use=5 * self.read_timeout if self.read_timeout is not None else None) # type: ignore
]
if any(os is None for os in observation_spaces):
raise NotImplementedError(
"It appears that the `all_observation_spaces_equal`"
" is not True for some task sampler created by"
" VectorSampledTasks. This is not currently supported."
)
if any(observation_spaces[0] != os for os in observation_spaces):
raise NotImplementedError(
"It appears that the observation spaces of the samplers"
" created in VectorSampledTasks are not equal."
" This is not currently supported."
)
self.observation_space = observation_spaces[0]
for write_fn in self._connection_write_fns:
write_fn((ACTION_SPACE_COMMAND, None))
self.action_spaces = [
space for read_fn in self._connection_read_fns for space in read_fn()
]
@staticmethod
def _create_read_function_with_timeout(
*,
read_fn: Callable[[], Any],
poll_fn: Callable[[float], bool],
timeout: Optional[float],
) -> Callable[[], Any]:
def read_with_timeout(timeout_to_use: Optional[float] = timeout):
if timeout_to_use is not None:
# noinspection PyArgumentList
if not poll_fn(timeout=timeout_to_use):
raise TimeoutError(
f"Did not receive output from `VectorSampledTask` worker for {timeout_to_use} seconds."
)
return read_fn()
return read_with_timeout
def _reset_sampler_index_to_process_ind_and_subprocess_ind(self):
self.sampler_index_to_process_ind_and_subprocess_ind = [
[i, j]
for i, part in enumerate(
partition_sequence([1] * self._num_task_samplers, self._num_processes)
)
for j in range(len(part))
]
def _partition_to_processes(self, seq: Union[Iterator, Sequence]):
subparts_list: List[List] = [[] for _ in range(self._num_processes)]
seq = list(seq)
assert len(seq) == len(self.sampler_index_to_process_ind_and_subprocess_ind)
for sampler_index, (process_ind, subprocess_ind) in enumerate(
self.sampler_index_to_process_ind_and_subprocess_ind
):
assert len(subparts_list[process_ind]) == subprocess_ind
subparts_list[process_ind].append(seq[sampler_index])
return subparts_list
@property
def is_closed(self) -> bool:
"""Has the vector task been closed."""
return self._is_closed
@property
def num_unpaused_tasks(self) -> int:
"""Number of unpaused processes.
# Returns
Number of unpaused processes.
"""
return self._num_task_samplers - sum(self.npaused_per_process)
@property
def mp_ctx(self):
"""Get the multiprocessing process used by the vector task.
# Returns
The multiprocessing context.
"""
return self._mp_ctx
@staticmethod
def _task_sampling_loop_worker(
worker_id: Union[int, str],
connection_read_fn: Callable,
connection_write_fn: Callable,
make_sampler_fn: Callable[..., TaskSampler],
sampler_fn_args_list: List[Dict[str, Any]],
callback_sensor_suite: Optional[SensorSuite],
auto_resample_when_done: bool,
should_log: bool,
child_pipe: Optional[Connection] = None,
parent_pipe: Optional[Connection] = None,
) -> None:
"""process worker for creating and interacting with the
Tasks/TaskSampler."""
ptitle(f"VectorSampledTask: {worker_id}")
sp_vector_sampled_tasks = SingleProcessVectorSampledTasks(
make_sampler_fn=make_sampler_fn,
sampler_fn_args_list=sampler_fn_args_list,
callback_sensor_suite=callback_sensor_suite,
auto_resample_when_done=auto_resample_when_done,
should_log=should_log,
)
if parent_pipe is not None:
parent_pipe.close() # Means this pipe will close when the calling process closes it
try:
while True:
read_input = connection_read_fn()
# TODO: Was the below necessary?
# with DelaySignalHandling():
# # Delaying signal handling here is necessary to ensure that we don't
# # (when processing a SIGTERM/SIGINT signal) attempt to send data to
# # a generator while it is already processing other data.
if len(read_input) == 3:
sampler_index, command, data = read_input
assert command != CLOSE_COMMAND, "Must close all processes at once."
assert (
command != RESUME_COMMAND
), "Must resume all task samplers at once."
if command == PAUSE_COMMAND:
sp_vector_sampled_tasks.pause_at(sampler_index=sampler_index)
connection_write_fn("done")
else:
connection_write_fn(
sp_vector_sampled_tasks.command_at(
sampler_index=sampler_index,
command=command,
data=data,
)
)
else:
commands, data_list = read_input
assert (
commands != PAUSE_COMMAND
), "Cannot pause all task samplers at once."
if commands == CLOSE_COMMAND:
# Will close the `sp_vector_sampled_tasks` in the `finally` clause below
break
elif commands == RESUME_COMMAND:
sp_vector_sampled_tasks.resume_all()
connection_write_fn("done")
else:
if isinstance(commands, str):
commands = [
commands
] * sp_vector_sampled_tasks.num_unpaused_tasks
connection_write_fn(
sp_vector_sampled_tasks.command(
commands=commands, data_list=data_list
)
)
except KeyboardInterrupt:
if should_log:
get_logger().info(f"Worker {worker_id} KeyboardInterrupt")
except Exception as e:
get_logger().error(
f"Worker {worker_id} encountered an exception:\n{traceback.format_exc()}"
)
raise e
finally:
try:
sp_vector_sampled_tasks.close()
except Exception:
pass
if child_pipe is not None:
child_pipe.close()
if should_log:
get_logger().info(f"Worker {worker_id} closing.")
def _spawn_workers(
self,
make_sampler_fn: Callable[..., TaskSampler],
sampler_fn_args_list: Sequence[Sequence[Dict[str, Any]]],
callback_sensor_suite: Optional[SensorSuite],
) -> Tuple[
List[Callable[[], bool]], List[Callable[[], Any]], List[Callable[[Any], None]]
]:
parent_connections, worker_connections = zip(
*[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_processes)]
)
self._workers = []
k = 0
id: Union[int, str]
for id, (worker_conn, parent_conn, current_sampler_fn_args_list) in enumerate(
zip(worker_connections, parent_connections, sampler_fn_args_list)
):
if len(current_sampler_fn_args_list) != 1:
id = f"{id}({k}-{k + len(current_sampler_fn_args_list) - 1})"
k += len(current_sampler_fn_args_list)
if self.should_log:
get_logger().info(
f"Starting {id}-th VectorSampledTask worker with args {current_sampler_fn_args_list}"
)
ps = self._mp_ctx.Process( # type: ignore
target=self._task_sampling_loop_worker,
kwargs=dict(
worker_id=id,
connection_read_fn=worker_conn.recv,
connection_write_fn=worker_conn.send,
make_sampler_fn=make_sampler_fn,
sampler_fn_args_list=current_sampler_fn_args_list,
callback_sensor_suite=callback_sensor_suite,
auto_resample_when_done=self._auto_resample_when_done,
should_log=self.should_log,
child_pipe=worker_conn,
parent_pipe=parent_conn,
),
)
self._workers.append(ps)
ps.daemon = True
ps.start()
worker_conn.close() # Means this pipe will close when the child process closes it
time.sleep(
0.1
) # Useful to ensure things don't lock up when spawning many envs
return (
[p.poll for p in parent_connections],
[p.recv for p in parent_connections],
[p.send for p in parent_connections],
)
def next_task(self, **kwargs):
"""Move to the the next Task for all TaskSamplers.
# Parameters
kwargs : key word arguments passed to the `next_task` function of the samplers.
# Returns
List of initial observations for each of the new tasks.
"""
return self.command(
commands=NEXT_TASK_COMMAND, data_list=[kwargs] * self.num_unpaused_tasks
)
def get_observations(self):
"""Get observations for all unpaused tasks.
# Returns
List of observations for each of the unpaused tasks.
"""
return self.call(
["get_observations"] * self.num_unpaused_tasks,
)
def command_at(
self, sampler_index: int, command: str, data: Optional[Any] = None
) -> Any:
"""Runs the command on the selected task and returns the result.
# Parameters
# Returns
Result of the command.
"""
self._is_waiting = True
(
process_ind,
subprocess_ind,
) = self.sampler_index_to_process_ind_and_subprocess_ind[sampler_index]
self._connection_write_fns[process_ind]((subprocess_ind, command, data))
result = self._connection_read_fns[process_ind]()
self._is_waiting = False
return result
def call_at(
self,
sampler_index: int,
function_name: str,
function_args: Optional[List[Any]] = None,
) -> Any:
"""Calls a function (which is passed by name) on the selected task and
returns the result.
# Parameters
index : Which task to call the function on.
function_name : The name of the function to call on the task.
function_args : Optional function args.
# Returns
Result of calling the function.
"""
return self.command_at(
sampler_index=sampler_index,
command=CALL_COMMAND,
data=(function_name, function_args),
)
def next_task_at(self, sampler_index: int) -> List[RLStepResult]:
"""Move to the the next Task from the TaskSampler in index_process
process in the vector.
# Parameters
index_process : Index of the process to be reset.
# Returns
List of length one containing the observations the newly sampled task.
"""
return [
self.command_at(
sampler_index=sampler_index, command=NEXT_TASK_COMMAND, data=None
)
]
def step_at(self, sampler_index: int, action: Any) -> List[RLStepResult]:
"""Step in the index_process task in the vector.
# Parameters
sampler_index : Index of the sampler to be reset.
action : The action to take.
# Returns
List containing the output of step method on the task in the indexed process.
"""
return [
self.command_at(
sampler_index=sampler_index, command=STEP_COMMAND, data=action
)
]
def async_step(self, actions: Sequence[Any]) -> None:
"""Asynchronously step in the vectorized Tasks.
# Parameters
actions : actions to be performed in the vectorized Tasks.
"""
self._is_waiting = True
for write_fn, action in zip(
self._connection_write_fns, self._partition_to_processes(actions)
):
write_fn((STEP_COMMAND, action))
def wait_step(self) -> List[Dict[str, Any]]:
"""Wait until all the asynchronized processes have synchronized."""
observations = []
for read_fn in self._connection_read_fns:
observations.extend(read_fn())
self._is_waiting = False
return observations
def step(self, actions: Sequence[Any]):
"""Perform actions in the vectorized tasks.
# Parameters
actions: List of size _num_samplers containing action to be taken in each task.
# Returns
List of outputs from the step method of tasks.
"""
self.async_step(actions)
return self.wait_step()
def reset_all(self):
"""Reset all task samplers to their initial state (except for the RNG
seed)."""
self.command(commands=RESET_COMMAND, data_list=None)
def set_seeds(self, seeds: List[int]):
"""Sets new tasks' RNG seeds.
# Parameters
seeds: List of size _num_samplers containing new RNG seeds.
"""
self.command(commands=SEED_COMMAND, data_list=seeds)
def close(self) -> None:
if self._is_closed:
return
if self._is_waiting:
for read_fn in self._connection_read_fns:
try:
# noinspection PyArgumentList
read_fn(0) # Time out immediately
except Exception:
pass
for write_fn in self._connection_write_fns:
try:
write_fn((CLOSE_COMMAND, None))
except Exception:
pass
for process in self._workers:
try:
process.join(timeout=0.1)
except Exception:
pass
for process in self._workers:
if process.is_alive():
process.kill()
self._is_closed = True
def pause_at(self, sampler_index: int) -> None:
"""Pauses computation on the Task in process `index` without destroying
the Task. This is useful for not needing to call steps on all Tasks
when only some are active (for example during the last samples of
running eval).
# Parameters
index : which process to pause. All indexes after this
one will be shifted down by one.
"""
if self._is_waiting:
for read_fn in self._connection_read_fns:
read_fn()
(
process_ind,
subprocess_ind,
) = self.sampler_index_to_process_ind_and_subprocess_ind[sampler_index]
self.command_at(sampler_index=sampler_index, command=PAUSE_COMMAND, data=None)
for i in range(
sampler_index + 1, len(self.sampler_index_to_process_ind_and_subprocess_ind)
):
other_process_and_sub_process_inds = (
self.sampler_index_to_process_ind_and_subprocess_ind[i]
)
if other_process_and_sub_process_inds[0] == process_ind:
other_process_and_sub_process_inds[1] -= 1
else:
break
self.sampler_index_to_process_ind_and_subprocess_ind.pop(sampler_index)
self.npaused_per_process[process_ind] += 1
def resume_all(self) -> None:
"""Resumes any paused processes."""
self._is_waiting = True
for connection_write_fn in self._connection_write_fns:
connection_write_fn((RESUME_COMMAND, None))
for connection_read_fn in self._connection_read_fns:
connection_read_fn()
self._is_waiting = False
self._reset_sampler_index_to_process_ind_and_subprocess_ind()
for i in range(len(self.npaused_per_process)):
self.npaused_per_process[i] = 0
def command(
self, commands: Union[List[str], str], data_list: Optional[List]
) -> List[Any]:
""""""
self._is_waiting = True
if isinstance(commands, str):
commands = [commands] * self.num_unpaused_tasks
if data_list is None:
data_list = [None] * self.num_unpaused_tasks
for write_fn, subcommands, subdata_list in zip(
self._connection_write_fns,
self._partition_to_processes(commands),
self._partition_to_processes(data_list),
):
write_fn((subcommands, subdata_list))
results = []
for read_fn in self._connection_read_fns:
results.extend(read_fn())
self._is_waiting = False
return results
def call(
self,
function_names: Union[str, List[str]],
function_args_list: Optional[List[Any]] = None,
) -> List[Any]:
"""Calls a list of functions (which are passed by name) on the
corresponding task (by index).
# Parameters
function_names : The name of the functions to call on the tasks.
function_args_list : List of function args for each function.
If provided, len(function_args_list) should be as long as len(function_names).
# Returns
List of results of calling the functions.
"""
self._is_waiting = True
if isinstance(function_names, str):
function_names = [function_names] * self.num_unpaused_tasks
if function_args_list is None:
function_args_list = [None] * len(function_names)
assert len(function_names) == len(function_args_list)
func_names_and_args_list = zip(function_names, function_args_list)
for write_fn, func_names_and_args in zip(
self._connection_write_fns,
self._partition_to_processes(func_names_and_args_list),
):
write_fn((CALL_COMMAND, func_names_and_args))
results = []
for read_fn in self._connection_read_fns:
results.extend(read_fn())
self._is_waiting = False
return results
def attr_at(self, sampler_index: int, attr_name: str) -> Any:
"""Gets the attribute (specified by name) on the selected task and
returns it.
# Parameters
index : Which task to call the function on.
attr_name : The name of the function to call on the task.
# Returns
Result of calling the function.
"""
return self.command_at(sampler_index, command=ATTR_COMMAND, data=attr_name)
def attr(self, attr_names: Union[List[str], str]) -> List[Any]:
"""Gets the attributes (specified by name) on the tasks.
# Parameters
attr_names : The name of the functions to call on the tasks.
# Returns
List of results of calling the functions.
"""
if isinstance(attr_names, str):
attr_names = [attr_names] * self.num_unpaused_tasks
return self.command(commands=ATTR_COMMAND, data_list=attr_names)
def render(
self, mode: str = "human", *args, **kwargs
) -> Union[np.ndarray, None, List[np.ndarray]]:
"""Render observations from all Tasks in a tiled image or list of
images."""
images = self.command(
commands=RENDER_COMMAND,
data_list=[(args, {"mode": "rgb", **kwargs})] * self.num_unpaused_tasks,
)
if mode == "raw_rgb_list":
return images
tile = tile_images(images)
if mode == "human":
import cv2
cv2.imshow("vectask", tile[:, :, ::-1])
cv2.waitKey(1)
return None
elif mode == "rgb_array":
return tile
else:
raise NotImplementedError
@property
def _valid_start_methods(self) -> Set[str]:
return {"forkserver", "spawn", "fork"}
def __del__(self):
self.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
class SingleProcessVectorSampledTasks(object):
"""Vectorized collection of tasks.
Simultaneously handles the state of multiple TaskSamplers and their associated tasks.
Allows for interacting with these tasks in a vectorized manner. When a task completes,
another task is sampled from the appropriate task sampler. All the tasks are
synchronized (for step and new_task methods).
# Attributes
make_sampler_fn : function which creates a single TaskSampler.
sampler_fn_args : sequence of dictionaries describing the args
to pass to make_sampler_fn on each individual process.
auto_resample_when_done : automatically sample a new Task from the TaskSampler when
the Task completes. If False, a new Task will not be resampled until all
Tasks on all processes have completed. This functionality is provided for seamless training
of vectorized Tasks.
"""
observation_space: SpaceDict
_vector_task_generators: List[Generator]
_num_task_samplers: int
_auto_resample_when_done: bool
def __init__(
self,
make_sampler_fn: Callable[..., TaskSampler],
sampler_fn_args_list: Sequence[Dict[str, Any]] = None,
callback_sensor_suite: Optional[SensorSuite] = None,
auto_resample_when_done: bool = True,
should_log: bool = True,
) -> None:
self._is_closed = True
assert (
sampler_fn_args_list is not None and len(sampler_fn_args_list) > 0
), "number of processes to be created should be greater than 0"
self._num_task_samplers = len(sampler_fn_args_list)
self._auto_resample_when_done = auto_resample_when_done
self.should_log = should_log
self._vector_task_generators: List[Generator] = self._create_generators(
make_sampler_fn=make_sampler_fn,
sampler_fn_args=[{"mp_ctx": None, **args} for args in sampler_fn_args_list],
callback_sensor_suite=callback_sensor_suite,
)
self._is_closed = False
observation_spaces = [
vsi.send((OBSERVATION_SPACE_COMMAND, None))
for vsi in self._vector_task_generators
]
if any(os is None for os in observation_spaces):
raise NotImplementedError(
"It appears that the `all_observation_spaces_equal`"
" is not True for some task sampler created by"
" VectorSampledTasks. This is not currently supported."
)
if any(observation_spaces[0] != os for os in observation_spaces):
raise NotImplementedError(
"It appears that the observation spaces of the samplers"
" created in VectorSampledTasks are not equal."
" This is not currently supported."
)
self.observation_space = observation_spaces[0]
self.action_spaces = [
vsi.send((ACTION_SPACE_COMMAND, None))
for vsi in self._vector_task_generators
]
self._paused: List[Tuple[int, Generator]] = []
@property
def is_closed(self) -> bool:
"""Has the vector task been closed."""
return self._is_closed
@property
def mp_ctx(self) -> Optional[BaseContext]:
return None
@property
def num_unpaused_tasks(self) -> int:
"""Number of unpaused processes.
# Returns
Number of unpaused processes.
"""
return self._num_task_samplers - len(self._paused)
@staticmethod
def _task_sampling_loop_generator_fn(
worker_id: int,
make_sampler_fn: Callable[..., TaskSampler],
sampler_fn_args: Dict[str, Any],
callback_sensor_suite: Optional[SensorSuite],
auto_resample_when_done: bool,
should_log: bool,
) -> Generator:
"""Generator for working with Tasks/TaskSampler."""
task_sampler = make_sampler_fn(**sampler_fn_args)
current_task = task_sampler.next_task()
if current_task is None:
raise RuntimeError(
"Newly created task sampler had `None` as it's first task. This likely means that"
" it was not provided with any tasks to generate. This can happen if, e.g., during testing"
" you have started more processes than you had tasks to test. Currently this is not supported:"
" every task sampler must be able to generate at least one task."
)
try:
command, data = yield "started"
while command != CLOSE_COMMAND:
if command == STEP_COMMAND:
step_result: RLStepResult = current_task.step(data)
if current_task.is_done():
metrics = current_task.metrics()
if metrics is not None and len(metrics) != 0:
if step_result.info is None:
step_result = step_result.clone({"info": {}})
step_result.info[COMPLETE_TASK_METRICS_KEY] = metrics
if callback_sensor_suite is not None:
task_callback_data = callback_sensor_suite.get_observations(
env=current_task.env, task=current_task
)
if step_result.info is None:
step_result = step_result.clone({"info": {}})
step_result.info[COMPLETE_TASK_CALLBACK_KEY] = (
task_callback_data
)
if auto_resample_when_done:
current_task = task_sampler.next_task()
if current_task is None:
step_result = step_result.clone({"observation": None})
else:
step_result = step_result.clone(
{"observation": current_task.get_observations()}
)
command, data = yield step_result
elif command == NEXT_TASK_COMMAND:
if data is not None:
current_task = task_sampler.next_task(**data)
else:
current_task = task_sampler.next_task()
observations = current_task.get_observations()
command, data = yield observations
elif command == RENDER_COMMAND:
command, data = yield current_task.render(*data[0], **data[1])
elif (
command == OBSERVATION_SPACE_COMMAND
or command == ACTION_SPACE_COMMAND
):
res = getattr(current_task, command)
command, data = yield res
elif command == CALL_COMMAND:
function_name, function_args = data
if function_args is None or len(function_args) == 0:
result = getattr(current_task, function_name)()
else:
result = getattr(current_task, function_name)(*function_args)
command, data = yield result
elif command == SAMPLER_COMMAND:
function_name, function_args = data
if function_args is None or len(function_args) == 0:
result = getattr(task_sampler, function_name)()
else:
result = getattr(task_sampler, function_name)(*function_args)
command, data = yield result
elif command == ATTR_COMMAND:
property_name = data
result = getattr(current_task, property_name)
command, data = yield result
elif command == SAMPLER_ATTR_COMMAND:
property_name = data
result = getattr(task_sampler, property_name)
command, data = yield result
elif command == RESET_COMMAND:
task_sampler.reset()
current_task = task_sampler.next_task()
if current_task is None:
raise RuntimeError(
"After resetting the task sampler it seems to have"
" no new tasks (the `task_sampler.next_task()` call"
" returned `None` after the reset). This suggests that"
" the task sampler's reset method was not implemented"
f" correctly (task sampler type is {type(task_sampler)})."
)
command, data = yield "done"
elif command == SEED_COMMAND:
task_sampler.set_seed(data)
command, data = yield "done"
else:
raise NotImplementedError()
except KeyboardInterrupt:
if should_log:
get_logger().info(
"SingleProcessVectorSampledTask {} KeyboardInterrupt".format(
worker_id
)
)
except Exception as e:
get_logger().error(traceback.format_exc())
raise e
finally:
if should_log:
get_logger().info(
"SingleProcessVectorSampledTask {} closing.".format(worker_id)
)
task_sampler.close()
def _create_generators(
self,
make_sampler_fn: Callable[..., TaskSampler],
sampler_fn_args: Sequence[Dict[str, Any]],
callback_sensor_suite: Optional[SensorSuite],
) -> List[Generator]:
generators = []
for id, current_sampler_fn_args in enumerate(sampler_fn_args):
if self.should_log:
get_logger().info(
f"Starting {id}-th SingleProcessVectorSampledTasks generator with args {current_sampler_fn_args}."
)
generators.append(
self._task_sampling_loop_generator_fn(
worker_id=id,
make_sampler_fn=make_sampler_fn,
sampler_fn_args=current_sampler_fn_args,
callback_sensor_suite=callback_sensor_suite,
auto_resample_when_done=self._auto_resample_when_done,
should_log=self.should_log,
)
)
if next(generators[-1]) != "started":
raise RuntimeError("Generator failed to start.")
return generators
def next_task(self, **kwargs):
"""Move to the the next Task for all TaskSamplers.
# Parameters
kwargs : key word arguments passed to the `next_task` function of the samplers.
# Returns
List of initial observations for each of the new tasks.
"""
return [
g.send((NEXT_TASK_COMMAND, kwargs)) for g in self._vector_task_generators
]
def get_observations(self):
"""Get observations for all unpaused tasks.
# Returns
List of observations for each of the unpaused tasks.
"""
return self.call(
["get_observations"] * self.num_unpaused_tasks,
)
def next_task_at(self, index_process: int) -> List[RLStepResult]:
"""Move to the the next Task from the TaskSampler in index_process
process in the vector.
# Parameters
index_process : Index of the generator to be reset.
# Returns
List of length one containing the observations the newly sampled task.
"""
return [
self._vector_task_generators[index_process].send((NEXT_TASK_COMMAND, None))
]
def step_at(self, index_process: int, action: int) -> List[RLStepResult]:
"""Step in the index_process task in the vector.
# Parameters
index_process : Index of the process to be reset.
action : The action to take.
# Returns
List containing the output of step method on the task in the indexed process.
"""
return self._vector_task_generators[index_process].send((STEP_COMMAND, action))
def step(self, actions: List[List[int]]):
"""Perform actions in the vectorized tasks.
# Parameters
actions: List of size _num_samplers containing action to be taken in each task.
# Returns
List of outputs from the step method of tasks.
"""
return [
g.send((STEP_COMMAND, action))
for g, action in zip(self._vector_task_generators, actions)
]
def reset_all(self):
"""Reset all task samplers to their initial state (except for the RNG
seed)."""
return [g.send((RESET_COMMAND, None)) for g in self._vector_task_generators]
def set_seeds(self, seeds: List[int]):
"""Sets new tasks' RNG seeds.
# Parameters
seeds: List of size _num_samplers containing new RNG seeds.
"""
return [
g.send((SEED_COMMAND, seed))
for g, seed in zip(self._vector_task_generators, seeds)
]
def close(self) -> None:
if self._is_closed:
return
for g in self._vector_task_generators:
try:
try:
g.send((CLOSE_COMMAND, None))
except StopIteration:
pass
except KeyboardInterrupt:
pass
self._is_closed = True
def pause_at(self, sampler_index: int) -> None:
"""Pauses computation on the Task in process `index` without destroying
the Task. This is useful for not needing to call steps on all Tasks
when only some are active (for example during the last samples of
running eval).
# Parameters
index : which process to pause. All indexes after this
one will be shifted down by one.
"""
generator = self._vector_task_generators.pop(sampler_index)
self._paused.append((sampler_index, generator))
def resume_all(self) -> None:
"""Resumes any paused processes."""
for index, generator in reversed(self._paused):
self._vector_task_generators.insert(index, generator)
self._paused = []
def command_at(
self, sampler_index: int, command: str, data: Optional[Any] = None
) -> Any:
"""Calls a function (which is passed by name) on the selected task and
returns the result.
# Parameters
index : Which task to call the function on.
function_name : The name of the function to call on the task.
function_args : Optional function args.
# Returns
Result of calling the function.
"""
return self._vector_task_generators[sampler_index].send((command, data))
def command(
self, commands: Union[List[str], str], data_list: Optional[List]
) -> List[Any]:
""""""
if isinstance(commands, str):
commands = [commands] * self.num_unpaused_tasks
if data_list is None:
data_list = [None] * self.num_unpaused_tasks
return [
g.send((command, data))
for g, command, data in zip(
self._vector_task_generators, commands, data_list
)
]
def call_at(
self,
sampler_index: int,
function_name: str,
function_args: Optional[List[Any]] = None,
) -> Any:
"""Calls a function (which is passed by name) on the selected task and
returns the result.
# Parameters
index : Which task to call the function on.
function_name : The name of the function to call on the task.
function_args : Optional function args.
# Returns
Result of calling the function.
"""
return self._vector_task_generators[sampler_index].send(
(CALL_COMMAND, (function_name, function_args))
)
def call(
self,
function_names: Union[str, List[str]],
function_args_list: Optional[List[Any]] = None,
) -> List[Any]:
"""Calls a list of functions (which are passed by name) on the
corresponding task (by index).
# Parameters
function_names : The name of the functions to call on the tasks.
function_args_list : List of function args for each function.
If provided, len(function_args_list) should be as long as len(function_names).
# Returns
List of results of calling the functions.
"""
if isinstance(function_names, str):
function_names = [function_names] * self.num_unpaused_tasks
if function_args_list is None:
function_args_list = [None] * len(function_names)
assert len(function_names) == len(function_args_list)
return [
g.send((CALL_COMMAND, args))
for g, args in zip(
self._vector_task_generators, zip(function_names, function_args_list)
)
]
def attr_at(self, sampler_index: int, attr_name: str) -> Any:
"""Gets the attribute (specified by name) on the selected task and
returns it.
# Parameters
index : Which task to call the function on.
attr_name : The name of the function to call on the task.
# Returns
Result of calling the function.
"""
return self._vector_task_generators[sampler_index].send(
(ATTR_COMMAND, attr_name)
)
def attr(self, attr_names: Union[List[str], str]) -> List[Any]:
"""Gets the attributes (specified by name) on the tasks.
# Parameters
attr_names : The name of the functions to call on the tasks.
# Returns
List of results of calling the functions.
"""
if isinstance(attr_names, str):
attr_names = [attr_names] * self.num_unpaused_tasks
return [
g.send((ATTR_COMMAND, attr_name))
for g, attr_name in zip(self._vector_task_generators, attr_names)
]
def render(
self, mode: str = "human", *args, **kwargs
) -> Union[np.ndarray, None, List[np.ndarray]]:
"""Render observations from all Tasks in a tiled image or a list of
images."""
images = [
g.send((RENDER_COMMAND, (args, {"mode": "rgb", **kwargs})))
for g in self._vector_task_generators
]
if mode == "raw_rgb_list":
return images
for index, _ in reversed(self._paused):
images.insert(index, np.zeros_like(images[0]))
tile = tile_images(images)
if mode == "human":
import cv2
cv2.imshow("vectask", tile[:, :, ::-1])
cv2.waitKey(1)
return None
elif mode == "rgb_array":
return tile
else:
raise NotImplementedError
def __del__(self):
self.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
================================================
FILE: allenact/base_abstractions/__init__.py
================================================
================================================
FILE: allenact/base_abstractions/callbacks.py
================================================
import abc
from typing import List, Dict, Any, Sequence, Optional
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import Sensor
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal
class Callback(abc.ABC):
def setup(
self,
name: str,
config: ExperimentConfig,
mode: Literal["train", "valid", "test"],
**kwargs,
) -> None:
"""Called once before training begins."""
def on_train_log(
self,
*,
metrics: List[Dict[str, Any]],
metric_means: Dict[str, float],
tasks_data: List[Any],
step: int,
scalar_name_to_total_experiences_key: Dict[str, str],
checkpoint_file_name: str,
**kwargs,
) -> None:
"""Called once train is supposed to log."""
def on_valid_log(
self,
*,
metrics: Dict[str, Any],
metric_means: Dict[str, float],
tasks_data: List[Any],
step: int,
scalar_name_to_total_experiences_key: Dict[str, str],
checkpoint_file_name: str,
**kwargs,
) -> None:
"""Called after validation ends."""
def on_test_log(
self,
*,
metrics: Dict[str, Any],
metric_means: Dict[str, float],
tasks_data: List[Any],
step: int,
scalar_name_to_total_experiences_key: Dict[str, str],
checkpoint_file_name: str,
**kwargs,
) -> None:
"""Called after test ends."""
def after_save_project_state(self, base_dir: str) -> None:
"""Called after saving the project state in base_dir."""
def callback_sensors(self) -> Optional[Sequence[Sensor]]:
"""Determines the data returned to the `tasks_data` parameter in the
above *_log functions."""
================================================
FILE: allenact/base_abstractions/distributions.py
================================================
import abc
from collections import OrderedDict
from typing import Any, Union, Callable, TypeVar, Dict, Optional, cast, Protocol
import gym
import torch
import torch.nn as nn
from torch.distributions.utils import lazy_property
from allenact.algorithms.onpolicy_sync.misc import TrackingInfoType
from allenact.base_abstractions.sensor import AbstractExpertActionSensor as Expert
from allenact.utils import spaces_utils as su
from allenact.utils.misc_utils import all_unique
TeacherForcingAnnealingType = TypeVar("TeacherForcingAnnealingType")
"""
Modify standard PyTorch distributions so they are compatible with this code.
"""
class Distr(abc.ABC):
@abc.abstractmethod
def log_prob(self, actions: Any):
"""Return the log probability/ies of the provided action/s."""
raise NotImplementedError()
@abc.abstractmethod
def entropy(self):
"""Return the entropy or entropies."""
raise NotImplementedError()
@abc.abstractmethod
def sample(self, sample_shape=torch.Size()):
"""Sample actions."""
raise NotImplementedError()
def mode(self):
"""If available, return the action(s) with highest probability.
It will only be called if using deterministic agents.
"""
raise NotImplementedError()
class CategoricalDistr(torch.distributions.Categorical, Distr):
"""A categorical distribution extending PyTorch's Categorical.
probs or logits are assumed to be passed with step and sampler
dimensions as in: [step, samplers, ...]
"""
def mode(self):
return self._param.argmax(dim=-1, keepdim=False) # match sample()'s shape
def log_prob(self, value: torch.Tensor):
if value.shape == self.logits.shape[:-1]:
return super(CategoricalDistr, self).log_prob(value=value)
elif value.shape == self.logits.shape[:-1] + (1,):
return (
super(CategoricalDistr, self)
.log_prob(value=value.squeeze(-1))
.unsqueeze(-1)
)
else:
raise NotImplementedError(
"Broadcasting in categorical distribution is disabled as it often leads"
f" to unexpected results. We have that `value.shape == {value.shape}` but"
f" expected a shape of "
f" `self.logits.shape[:-1] == {self.logits.shape[:-1]}` or"
f" `self.logits.shape[:-1] + (1,) == {self.logits.shape[:-1] + (1,)}`"
)
@lazy_property
def log_probs_tensor(self):
return torch.log_softmax(self.logits, dim=-1)
@lazy_property
def probs_tensor(self):
return torch.softmax(self.logits, dim=-1)
class ConditionalDistr(Distr):
"""Action distribution conditional which is conditioned on other
information (i.e. part of a hierarchical distribution)
# Attributes
action_group_name : the identifier of the group of actions (`OrderedDict`) produced by this `ConditionalDistr`
"""
action_group_name: str
def __init__(
self,
distr_conditioned_on_input_fn_or_instance: Union[Callable, Distr],
action_group_name: str,
*distr_conditioned_on_input_args,
**distr_conditioned_on_input_kwargs,
):
"""Initialize an ConditionalDistr.
# Parameters
distr_conditioned_on_input_fn_or_instance : Callable to generate `ConditionalDistr` given sampled actions,
or given `Distr`.
action_group_name : the identifier of the group of actions (`OrderedDict`) produced by this `ConditionalDistr`
distr_conditioned_on_input_args : positional arguments for Callable `distr_conditioned_on_input_fn_or_instance`
distr_conditioned_on_input_kwargs : keyword arguments for Callable `distr_conditioned_on_input_fn_or_instance`
"""
self.distr: Optional[Distr] = None
self.distr_conditioned_on_input_fn: Optional[Callable] = None
self.distr_conditioned_on_input_args = distr_conditioned_on_input_args
self.distr_conditioned_on_input_kwargs = distr_conditioned_on_input_kwargs
if isinstance(distr_conditioned_on_input_fn_or_instance, Distr):
self.distr = distr_conditioned_on_input_fn_or_instance
else:
self.distr_conditioned_on_input_fn = (
distr_conditioned_on_input_fn_or_instance
)
self.action_group_name = action_group_name
def log_prob(self, actions):
return self.distr.log_prob(actions)
def entropy(self):
return self.distr.entropy()
def condition_on_input(self, **ready_actions):
if self.distr is None:
assert all(
key not in self.distr_conditioned_on_input_kwargs
for key in ready_actions
)
self.distr = self.distr_conditioned_on_input_fn(
*self.distr_conditioned_on_input_args,
**self.distr_conditioned_on_input_kwargs,
**ready_actions,
)
def reset(self):
if (self.distr is not None) and (
self.distr_conditioned_on_input_fn is not None
):
self.distr = None
def sample(self, sample_shape=torch.Size()) -> OrderedDict:
return OrderedDict([(self.action_group_name, self.distr.sample(sample_shape))])
def mode(self) -> OrderedDict:
return OrderedDict([(self.action_group_name, self.distr.mode())])
class SequentialDistr(Distr):
def __init__(self, *conditional_distrs: ConditionalDistr):
action_group_names = [cd.action_group_name for cd in conditional_distrs]
assert all_unique(
action_group_names
), f"All conditional distribution `action_group_name`, must be unique, given names {action_group_names}"
self.conditional_distrs = conditional_distrs
def sample(self, sample_shape=torch.Size()):
actions = OrderedDict()
for cd in self.conditional_distrs:
cd.condition_on_input(**actions)
actions.update(cd.sample(sample_shape=sample_shape))
return actions
def mode(self):
actions = OrderedDict()
for cd in self.conditional_distrs:
cd.condition_on_input(**actions)
actions.update(cd.mode())
return actions
def conditional_entropy(self):
total = 0
for cd in self.conditional_distrs:
total = total + cd.entropy()
return total
def entropy(self):
raise NotImplementedError(
"Please use 'conditional_entropy' instead of 'entropy' as the `entropy_method_name` "
"parameter in your loss when using `SequentialDistr`."
)
def log_prob(
self, actions: Dict[str, Any], return_dict: bool = False
) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
assert len(actions) == len(
self.conditional_distrs
), f"{len(self.conditional_distrs)} conditional distributions for {len(actions)} action groups"
res: Union[int, torch.Tensor, Dict[str, torch.Tensor]] = (
0 if not return_dict else OrderedDict()
)
for cd in self.conditional_distrs:
cd.condition_on_input(**actions)
current_log_prob = cd.log_prob(actions[cd.action_group_name])
if not return_dict:
res = res + current_log_prob
else:
res[cd.action_group_name] = current_log_prob
return res
class TrackingCallback(Protocol):
def __call__(self, type: TrackingInfoType, info: Dict[str, Any], n: int): ...
class TeacherForcingDistr(Distr):
def __init__(
self,
distr: Distr,
obs: Dict[str, Any],
action_space: gym.spaces.Space,
num_active_samplers: Optional[int],
approx_steps: Optional[int],
teacher_forcing: Optional[TeacherForcingAnnealingType],
tracking_callback: Optional[TrackingCallback],
always_enforce: bool = False,
):
self.distr = distr
self.is_sequential = isinstance(self.distr, SequentialDistr)
# action_space is a gym.spaces.Dict for SequentialDistr, or any gym.Space for other Distr
self.action_space = action_space
self.num_active_samplers = num_active_samplers
self.approx_steps = approx_steps
self.teacher_forcing = teacher_forcing
self.tracking_callback = tracking_callback
self.always_enforce = always_enforce
assert (
"expert_action" in obs
), "When using teacher forcing, obs must contain an `expert_action` uuid"
obs_space = Expert.flagged_space(
self.action_space, use_dict_as_groups=self.is_sequential
)
self.expert = su.unflatten(obs_space, obs["expert_action"])
def enforce(
self,
sample: Any,
action_space: gym.spaces.Space,
teacher: OrderedDict,
teacher_force_info: Optional[Dict[str, Any]],
action_name: Optional[str] = None,
):
actions = su.flatten(action_space, sample)
assert (
len(actions.shape) == 3
), f"Got flattened actions with shape {actions.shape} (it should be [1 x `samplers` x `flatdims`])"
if self.num_active_samplers is not None:
assert actions.shape[1] == self.num_active_samplers
expert_actions = su.flatten(action_space, teacher[Expert.ACTION_POLICY_LABEL])
assert (
expert_actions.shape == actions.shape
), f"expert actions shape {expert_actions.shape} doesn't match the model's {actions.shape}"
# expert_success is 0 if the expert action could not be computed and otherwise equals 1.
expert_action_exists_mask = teacher[Expert.EXPERT_SUCCESS_LABEL]
if not self.always_enforce:
teacher_forcing_mask = (
torch.distributions.bernoulli.Bernoulli(
torch.tensor(self.teacher_forcing(self.approx_steps))
)
.sample(expert_action_exists_mask.shape)
.long()
.to(actions.device)
) * expert_action_exists_mask
else:
teacher_forcing_mask = expert_action_exists_mask
if teacher_force_info is not None:
teacher_force_info[
"teacher_ratio/sampled{}".format(
f"_{action_name}" if action_name is not None else ""
)
] = (teacher_forcing_mask.float().mean().item())
extended_shape = teacher_forcing_mask.shape + (1,) * (
len(actions.shape) - len(teacher_forcing_mask.shape)
)
actions = torch.where(
teacher_forcing_mask.byte().view(extended_shape), expert_actions, actions
)
return su.unflatten(action_space, actions)
def log_prob(self, actions: Any):
return self.distr.log_prob(actions)
def entropy(self):
return self.distr.entropy()
def conditional_entropy(self):
if hasattr(self.distr, "conditional_entropy"):
return self.distr.conditional_entropy()
raise NotImplementedError(
f"`conditional_entropy` is not defined for {self.distr}."
)
def sample(self, sample_shape=torch.Size()):
teacher_force_info: Optional[Dict[str, Any]] = None
if self.approx_steps is not None:
teacher_force_info = {
"teacher_ratio/enforced": self.teacher_forcing(self.approx_steps),
}
if self.is_sequential:
res = OrderedDict()
for cd in cast(SequentialDistr, self.distr).conditional_distrs:
cd.condition_on_input(**res)
action_group_name = cd.action_group_name
res[action_group_name] = self.enforce(
cd.sample(sample_shape)[action_group_name],
cast(gym.spaces.Dict, self.action_space)[action_group_name],
self.expert[action_group_name],
teacher_force_info,
action_group_name,
)
else:
res = self.enforce(
self.distr.sample(sample_shape),
self.action_space,
self.expert,
teacher_force_info,
)
if self.tracking_callback is not None and self.num_active_samplers is not None:
self.tracking_callback(
type=TrackingInfoType.TEACHER_FORCING,
info=teacher_force_info,
n=self.num_active_samplers,
)
return res
class AddBias(nn.Module):
"""Adding bias parameters to input values."""
def __init__(self, bias: torch.FloatTensor):
"""Initializer.
# Parameters
bias : data to use as the initial values of the bias.
"""
super(AddBias, self).__init__()
self._bias = nn.Parameter(bias.unsqueeze(1), requires_grad=True)
def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: # type: ignore
"""Adds the stored bias parameters to `x`."""
assert x.dim() in [2, 4]
if x.dim() == 2:
bias = self._bias.t().view(1, -1)
else:
bias = self._bias.t().view(1, -1, 1, 1)
return x + bias # type:ignore
================================================
FILE: allenact/base_abstractions/experiment_config.py
================================================
"""Defines the `ExperimentConfig` abstract class used as the basis of all
experiments."""
import abc
from typing import Dict, Any, Optional, List, Union, Sequence, Tuple, cast
import torch
import torch.nn as nn
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import TrainingPipeline, Builder
from allenact.utils.system import get_logger
from allenact.utils.viz_utils import VizSuite
def split_processes_onto_devices(nprocesses: int, ndevices: int):
assert (
nprocesses == 0 or nprocesses >= ndevices
), "NUM_PROCESSES {} < ndevices {}".format(nprocesses, ndevices)
res = [0] * ndevices
for it in range(nprocesses):
res[it % ndevices] += 1
return res
class MachineParams(object):
def __init__(
self,
nprocesses: Union[int, Sequence[int]],
devices: Union[
None, int, str, torch.device, Sequence[Union[int, str, torch.device]]
] = None,
sensor_preprocessor_graph: Optional[
Union[SensorPreprocessorGraph, Builder[SensorPreprocessorGraph]]
] = None,
sampler_devices: Union[
None, int, str, torch.device, Sequence[Union[int, str, torch.device]]
] = None,
visualizer: Optional[Union[VizSuite, Builder[VizSuite]]] = None,
gpu_ids: Union[int, Sequence[int]] = None,
local_worker_ids: Optional[List[int]] = None,
):
assert (
gpu_ids is None or devices is None
), "only one of `gpu_ids` or `devices` should be set."
if gpu_ids is not None:
get_logger().warning(
"The `gpu_ids` parameter will be deprecated, use `devices` instead."
)
devices = gpu_ids
self.nprocesses = (
nprocesses if isinstance(nprocesses, Sequence) else (nprocesses,)
)
self.devices: Tuple[torch.device, ...] = self._standardize_devices(
devices=devices, nworkers=len(self.nprocesses)
)
self._sensor_preprocessor_graph_maybe_builder = sensor_preprocessor_graph
self.sampler_devices: Tuple[torch.device, ...] = (
None
if sampler_devices is None
else self._standardize_devices(
devices=sampler_devices, nworkers=len(self.nprocesses)
)
)
self._visualizer_maybe_builder = visualizer
self._sensor_preprocessor_graph_cached: Optional[SensorPreprocessorGraph] = None
self._visualizer_cached: Optional[VizSuite] = None
self.local_worker_ids: Optional[List[int]] = None
self.set_local_worker_ids(local_worker_ids)
def set_local_worker_ids(self, local_worker_ids: Optional[List[int]]):
self.local_worker_ids = local_worker_ids or list(range(len(self.devices)))
assert all(0 <= id < len(self.devices) for id in self.local_worker_ids), (
f"Passed {len(self.local_worker_ids)} local worker ids {self.local_worker_ids}"
f" for {len(self.devices)} total devices (workers)"
)
@classmethod
def instance_from(
cls, machine_params: Union["MachineParams", Dict[str, Any]]
) -> "MachineParams":
if isinstance(machine_params, cls):
return machine_params
assert isinstance(machine_params, Dict)
return cls(**machine_params)
@staticmethod
def _standardize_devices(
devices: Optional[
Union[int, str, torch.device, Sequence[Union[int, str, torch.device]]]
],
nworkers: int,
) -> Tuple[torch.device, ...]:
if devices is None or (isinstance(devices, Sequence) and len(devices) == 0):
devices = torch.device("cpu")
if not isinstance(devices, Sequence):
devices = (devices,) * nworkers
assert len(devices) == nworkers, (
f"The number of devices (len({devices})={len(devices)})"
f" must equal the number of workers ({nworkers})"
)
devices = tuple(
torch.device("cpu") if d == -1 else torch.device(d) for d in devices # type: ignore
)
for d in devices:
if d != torch.device("cpu"):
try:
torch.cuda.get_device_capability(d) # type: ignore
except Exception:
raise RuntimeError(
f"It appears the cuda device {d} is not available on your system."
)
return cast(Tuple[torch.device, ...], devices)
@property
def sensor_preprocessor_graph(self) -> Optional[SensorPreprocessorGraph]:
if self._sensor_preprocessor_graph_maybe_builder is None:
return None
if self._sensor_preprocessor_graph_cached is None:
if isinstance(self._sensor_preprocessor_graph_maybe_builder, Builder):
self._sensor_preprocessor_graph_cached = (
self._sensor_preprocessor_graph_maybe_builder()
)
else:
self._sensor_preprocessor_graph_cached = (
self._sensor_preprocessor_graph_maybe_builder
)
return self._sensor_preprocessor_graph_cached
def set_visualizer(self, viz: VizSuite):
if self._visualizer_cached is None:
self._visualizer_maybe_builder = viz
else:
get_logger().warning("Ignoring viz (already instantiated)")
@property
def visualizer(self) -> Optional[VizSuite]:
if self._visualizer_maybe_builder is None:
return None
if self._visualizer_cached is None:
if isinstance(self._visualizer_maybe_builder, Builder):
self._visualizer_cached = self._visualizer_maybe_builder()
else:
self._visualizer_cached = self._visualizer_maybe_builder
return self._visualizer_cached
class FrozenClassVariables(abc.ABCMeta):
"""Metaclass for ExperimentConfig.
Ensures ExperimentConfig class-level attributes cannot be modified.
ExperimentConfig attributes can still be modified at the object
level.
"""
def __setattr__(cls, attr, value):
if isinstance(cls, type) and (
attr != "__abstractmethods__" and not attr.startswith("_abc_")
):
raise RuntimeError(
"Cannot edit class-level attributes.\n"
"Changing the values of class-level attributes is disabled in ExperimentConfig classes.\n"
"This is to prevent problems that can occur otherwise when using multiprocessing.\n"
"If you wish to change the value of a configuration, please do so for an instance of that"
" configuration.\nTriggered by attempting to modify {}".format(
cls.__name__
)
)
else:
super().__setattr__(attr, value)
class ExperimentConfig(metaclass=FrozenClassVariables):
"""Abstract class used to define experiments.
Instead of using yaml or text files, experiments in our framework
are defined as a class. In particular, to define an experiment one
must define a new class inheriting from this class which implements
all of the below methods. The below methods will then be called when
running the experiment.
"""
@abc.abstractmethod
def tag(self) -> str:
"""A string describing the experiment."""
raise NotImplementedError()
@abc.abstractmethod
def training_pipeline(self, **kwargs) -> TrainingPipeline:
"""Creates the training pipeline.
# Parameters
kwargs : Extra kwargs. Currently unused.
# Returns
An instantiate `TrainingPipeline` object.
"""
raise NotImplementedError()
@abc.abstractmethod
def machine_params(
self, mode="train", **kwargs
) -> Union[MachineParams, Dict[str, Any]]:
"""Parameters used to specify machine information.
Machine information includes at least (1) the number of processes
to train with and (2) the gpu devices indices to use.
mode : Whether or not the machine parameters should be those for
"train", "valid", or "test".
kwargs : Extra kwargs.
# Returns
A dictionary of the form `{"nprocesses": ..., "gpu_ids": ..., ...}`.
Here `nprocesses` must be a non-negative integer, `gpu_ids` must
be a sequence of non-negative integers (if empty, then everything
will be run on the cpu).
"""
raise NotImplementedError()
@abc.abstractmethod
def create_model(self, **kwargs) -> nn.Module:
"""Create the neural model."""
raise NotImplementedError()
@abc.abstractmethod
def make_sampler_fn(self, **kwargs) -> TaskSampler:
"""Create the TaskSampler given keyword arguments.
These `kwargs` will be generated by one of
`ExperimentConfig.train_task_sampler_args`,
`ExperimentConfig.valid_task_sampler_args`, or
`ExperimentConfig.test_task_sampler_args` depending on whether
the user has chosen to train, validate, or test.
"""
raise NotImplementedError()
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
"""Specifies the training parameters for the `process_ind`th training
process.
These parameters are meant be passed as keyword arguments to `ExperimentConfig.make_sampler_fn`
to generate a task sampler.
# Parameters
process_ind : The unique index of the training process (`0 ≤ process_ind < total_processes`).
total_processes : The total number of training processes.
devices : Gpu devices (if any) to use.
seeds : The seeds to use, if any.
deterministic_cudnn : Whether or not to use deterministic cudnn.
# Returns
The parameters for `make_sampler_fn`
"""
raise NotImplementedError()
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
"""Specifies the validation parameters for the `process_ind`th
validation process.
See `ExperimentConfig.train_task_sampler_args` for parameter
definitions.
"""
raise NotImplementedError()
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
"""Specifies the test parameters for the `process_ind`th test process.
See `ExperimentConfig.train_task_sampler_args` for parameter
definitions.
"""
raise NotImplementedError()
================================================
FILE: allenact/base_abstractions/misc.py
================================================
import abc
from typing import (
Dict,
Any,
TypeVar,
Sequence,
NamedTuple,
Optional,
List,
Union,
Generic,
)
import attr
import torch
EnvType = TypeVar("EnvType")
DistributionType = TypeVar("DistributionType")
ModelType = TypeVar("ModelType")
ObservationType = Dict[str, Union[torch.Tensor, Dict[str, Any]]]
class RLStepResult(NamedTuple):
observation: Optional[Any]
reward: Optional[Union[float, List[float]]]
done: Optional[bool]
info: Optional[Dict[str, Any]]
def clone(self, new_info: Dict[str, Any]):
return RLStepResult(
observation=(
self.observation
if "observation" not in new_info
else new_info["observation"]
),
reward=self.reward if "reward" not in new_info else new_info["reward"],
done=self.done if "done" not in new_info else new_info["done"],
info=self.info if "info" not in new_info else new_info["info"],
)
def merge(self, other: "RLStepResult"):
return RLStepResult(
observation=(
self.observation if other.observation is None else other.observation
),
reward=self.reward if other.reward is None else other.reward,
done=self.done if other.done is None else other.done,
info={
**(self.info if self.info is not None else {}),
**(other.info if other is not None else {}),
},
)
class ActorCriticOutput(tuple, Generic[DistributionType]):
distributions: DistributionType
values: torch.FloatTensor
extras: Dict[str, Any]
# noinspection PyTypeChecker
def __new__(
cls,
distributions: DistributionType,
values: torch.FloatTensor,
extras: Dict[str, Any],
):
self = tuple.__new__(cls, (distributions, values, extras))
self.distributions = distributions
self.values = values
self.extras = extras
return self
def __repr__(self) -> str:
return (
f"Group(distributions={self.distributions},"
f" values={self.values},"
f" extras={self.extras})"
)
class Memory(Dict):
def __init__(self, *args, **kwargs):
super().__init__()
if len(args) > 0:
assert len(args) == 1, (
"Only one of Sequence[Tuple[str, Tuple[torch.Tensor, int]]]"
"or Dict[str, Tuple[torch.Tensor, int]] accepted as unnamed args"
)
if isinstance(args[0], Sequence):
for key, tensor_dim in args[0]:
assert (
len(tensor_dim) == 2
), "Only Tuple[torch.Tensor, int]] accepted as second item in Tuples"
tensor, dim = tensor_dim
self.check_append(key, tensor, dim)
elif isinstance(args[0], Dict):
for key in args[0]:
assert (
len(args[0][key]) == 2
), "Only Tuple[torch.Tensor, int]] accepted as values in Dict"
tensor, dim = args[0][key]
self.check_append(key, tensor, dim)
elif len(kwargs) > 0:
for key in kwargs:
assert (
len(kwargs[key]) == 2
), "Only Tuple[torch.Tensor, int]] accepted as keyword arg"
tensor, dim = kwargs[key]
self.check_append(key, tensor, dim)
def check_append(
self, key: str, tensor: torch.Tensor, sampler_dim: int
) -> "Memory":
"""Appends a new memory type given its identifier, its memory tensor
and its sampler dim.
# Parameters
key: string identifier of the memory type
tensor: memory tensor
sampler_dim: sampler dimension
# Returns
Updated Memory
"""
assert isinstance(key, str), "key {} must be str".format(key)
assert isinstance(
tensor, torch.Tensor
), "tensor {} must be torch.Tensor".format(tensor)
assert isinstance(sampler_dim, int), "sampler_dim {} must be int".format(
sampler_dim
)
assert key not in self, "Reused key {}".format(key)
assert (
0 <= sampler_dim < len(tensor.shape)
), "Got sampler_dim {} for tensor with shape {}".format(
sampler_dim, tensor.shape
)
self[key] = (tensor, sampler_dim)
return self
def tensor(self, key: str) -> torch.Tensor:
"""Returns the memory tensor for a given memory type.
# Parameters
key: string identifier of the memory type
# Returns
Memory tensor for type `key`
"""
assert key in self, "Missing key {}".format(key)
return self[key][0]
def sampler_dim(self, key: str) -> int:
"""Returns the sampler dimension for the given memory type.
# Parameters
key: string identifier of the memory type
# Returns
The sampler dim
"""
assert key in self, "Missing key {}".format(key)
return self[key][1]
def sampler_select(self, keep: Sequence[int]) -> "Memory":
"""Equivalent to PyTorch index_select along the `sampler_dim` of each
memory type.
# Parameters
keep: a list of sampler indices to keep
# Returns
Selected memory
"""
res = Memory()
valid = False
for name in self:
sampler_dim = self.sampler_dim(name)
tensor = self.tensor(name)
assert len(keep) == 0 or (
0 <= min(keep) and max(keep) < tensor.shape[sampler_dim]
), "Got min(keep)={} max(keep)={} for memory type {} with shape {}, dim {}".format(
min(keep), max(keep), name, tensor.shape, sampler_dim
)
if tensor.shape[sampler_dim] > len(keep):
tensor = tensor.index_select(
dim=sampler_dim,
index=torch.as_tensor(
list(keep), dtype=torch.int64, device=tensor.device
),
)
res.check_append(name, tensor, sampler_dim)
valid = True
if valid:
return res
return self
def set_tensor(self, key: str, tensor: torch.Tensor) -> "Memory":
"""Replaces tensor for given key with an updated version.
# Parameters
key: memory type identifier to update
tensor: updated tensor
# Returns
Updated memory
"""
assert key in self, "Missing key {}".format(key)
assert (
tensor.shape == self[key][0].shape
), "setting tensor with shape {} for former {}".format(
tensor.shape, self[key][0].shape
)
self[key] = (tensor, self[key][1])
return self
def step_select(self, step: int) -> "Memory":
"""Equivalent to slicing with length 1 for the `step` (i.e first)
dimension in rollouts storage.
# Parameters
step: step to keep
# Returns
Sliced memory with a single step
"""
res = Memory()
for key in self:
tensor = self.tensor(key)
assert (
tensor.shape[0] > step
), "attempting to access step {} for memory type {} of shape {}".format(
step, key, tensor.shape
)
if step != -1:
res.check_append(
key, self.tensor(key)[step : step + 1, ...], self.sampler_dim(key)
)
else:
res.check_append(
key, self.tensor(key)[step:, ...], self.sampler_dim(key)
)
return res
def step_squeeze(self, step: int) -> "Memory":
"""Equivalent to simple indexing for the `step` (i.e first) dimension
in rollouts storage.
# Parameters
step: step to keep
# Returns
Sliced memory with a single step (and squeezed step dimension)
"""
res = Memory()
for key in self:
tensor = self.tensor(key)
assert (
tensor.shape[0] > step
), "attempting to access step {} for memory type {} of shape {}".format(
step, key, tensor.shape
)
res.check_append(
key, self.tensor(key)[step, ...], self.sampler_dim(key) - 1
)
return res
def slice(
self,
dim: int,
start: Optional[int] = None,
stop: Optional[int] = None,
step: int = 1,
) -> "Memory":
"""Slicing for dimensions that have same extents in all memory types.
It also accepts negative indices.
# Parameters
dim: the dimension to slice
start: the index of the first item to keep if given (default 0 if None)
stop: the index of the first item to discard if given (default tensor size along `dim` if None)
step: the increment between consecutive indices (default 1)
# Returns
Sliced memory
"""
checked = False
total: Optional[int] = None
res = Memory()
for key in self:
tensor = self.tensor(key)
assert (
len(tensor.shape) > dim
), f"attempting to access dim {dim} for memory type {key} of shape {tensor.shape}"
if not checked:
total = tensor.shape[dim]
checked = True
assert (
total == tensor.shape[dim]
), f"attempting to slice along non-uniform dimension {dim}"
if start is not None or stop is not None or step != 1:
slice_tuple = (
(slice(None),) * dim
+ (slice(start, stop, step),)
+ (slice(None),) * (len(tensor.shape) - (1 + dim))
)
sliced_tensor = tensor[slice_tuple]
res.check_append(
key=key,
tensor=sliced_tensor,
sampler_dim=self.sampler_dim(key),
)
else:
res.check_append(
key,
tensor,
self.sampler_dim(key),
)
return res
def to(self, device: torch.device) -> "Memory":
for key in self:
tensor = self.tensor(key)
if tensor.device != device:
self.set_tensor(key, tensor.to(device))
return self
class Loss(abc.ABC):
pass
@attr.s(kw_only=True)
class LossOutput:
value: torch.Tensor = attr.ib()
info: Dict[str, Union[float, int]] = attr.ib()
per_epoch_info: Dict[str, Union[float, int]] = attr.ib()
batch_memory: Memory = attr.ib()
stream_memory: Memory = attr.ib()
bsize: int = attr.ib()
class GenericAbstractLoss(Loss):
# noinspection PyMethodOverriding
@abc.abstractmethod
def loss( # type: ignore
self,
*, # No positional arguments
model: ModelType,
batch: ObservationType,
batch_memory: Memory,
stream_memory: Memory,
) -> LossOutput:
"""Computes the loss.
Loss after processing a batch of data with (part of) a model (possibly with memory).
We support two different types of memory: `batch_memory` and `stream_memory` that can be
used to compute losses and share computation.
## `batch_memory`
During the update phase of training, the following
steps happen in order:
1. A `batch` of data is sampled from an `ExperienceStorage` (which stores data possibly collected during previous
rollout steps).
2. This `batch` is passed to each of the specified `GenericAbstractLoss`'s and is used, along with the `model`,
to compute each such loss.
3. The losses are summed together, gradients are computed by backpropagation, and an update step is taken.
4. The process loops back to (1) with a new batch until.
Now supposed that the computation used by a `GenericAbstractLoss` (`LossA`) can be shared across multiple of the
`GenericAbstractLoss`'s (`LossB`, ...). For instance, `LossA` might run the visual encoder of `model` across
all the images contained in `batch` so that it can compute a classification loss while `LossB` would like to
run the same visual encoder on the same images to compute a depth-prediction loss. Without having some sort
of memory, you would need to rerun this visual encoder on all images multiple times, wasting computational
resources. This is where `batch_memory` comes in: `LossA` is can store the visual representations it computed
in `batch_memory` and then `LossB` can access them. Note that the `batch_memory` will be reinitialized after
each new `batch` is sampled.
## `stream_memory`
As described above, `batch_memory` treats each batch as its own independent collection of data. But what if
your `ExperienceStorage` samples its batches in a streaming fashion? E.g. your `ExperienceStorage`
might be a fixed collection of expert trajectories for use with imitation learning. In this case you can't
simply treat each batch independently: you might want to save information from one batch to use in another.
The simplest case of this would be if your agent `model` uses an RNN and produces a recurrent hidden state.
In this case, the hidden state from the end of one batch should be used at the start of computations for the
next batch. To allow for this, you can use the `stream_memory`. `stream_memory` is not cleared across
batches but, **importantly**, `stream_memory` is detached from the computation graph after each backpropagation
step so that the size of the computation graph does not grow unboundedly.
# Parameters
model: model to run on data batch (both assumed to be on the same device)
batch: data to use as input for model (already on the same device as model)
batch_memory: See above.
stream_memory: See above.
# Returns
A tuple with:
current_loss: total loss
current_info: additional information about the current loss
batch_memory: `batch_memory` memory after processing current data batch, see above.
stream_memory: `stream_memory` memory after processing current data batch, see above.
bsize: batch size
"""
raise NotImplementedError()
================================================
FILE: allenact/base_abstractions/preprocessor.py
================================================
import abc
from typing import List, Any, Dict
from typing import Sequence
from typing import Union
import gym
import networkx as nx
import torch
from gym.spaces import Dict as SpaceDict
from allenact.utils.experiment_utils import Builder
class Preprocessor(abc.ABC):
"""Represents a preprocessor that transforms data from a sensor or another
preprocessor to the input of agents or other preprocessors. The user of
this class needs to implement the process method and the user is also
required to set the below attributes:
# Attributes:
input_uuids : List of input universally unique ids.
uuid : Universally unique id.
observation_space : ``gym.Space`` object corresponding to processed observation spaces.
"""
input_uuids: List[str]
uuid: str
observation_space: gym.Space
def __init__(
self,
input_uuids: List[str],
output_uuid: str,
observation_space: gym.Space,
**kwargs: Any
) -> None:
self.uuid = output_uuid
self.input_uuids = input_uuids
self.observation_space = observation_space
@abc.abstractmethod
def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
"""Returns processed observations from sensors or other preprocessors.
# Parameters
obs : Dict with available observations and processed observations.
# Returns
Processed observation.
"""
raise NotImplementedError()
@abc.abstractmethod
def to(self, device: torch.device) -> "Preprocessor":
raise NotImplementedError()
class SensorPreprocessorGraph:
"""Represents a graph of preprocessors, with each preprocessor being
identified through a universally unique id.
Allows for the construction of observations that are a function of
sensor readings. For instance, perhaps rather than giving your agent
a raw RGB image, you'd rather first pass that image through a pre-trained
convolutional network and only give your agent the resulting features
(see e.g. the `ResNetPreprocessor` class).
# Attributes
preprocessors : List containing preprocessors with required input uuids, output uuid of each
sensor must be unique.
observation_spaces: The observation spaces of the values returned when calling `get_observations`.
By default (see the `additionally_exposed_uuids` parameter to to change this default) the observations
returned by the `SensorPreprocessorGraph` **include only the sink nodes** of the graph (i.e.
those that are not used by any other preprocessor).
Thus if one of the input preprocessors takes as input the `'YOUR_SENSOR_UUID'` sensor, then
`'YOUR_SENSOR_UUID'` will not be returned when calling `get_observations`.
device: The `torch.device` upon which the preprocessors are run.
"""
preprocessors: Dict[str, Preprocessor]
observation_spaces: SpaceDict
device: torch.device
def __init__(
self,
source_observation_spaces: SpaceDict,
preprocessors: Sequence[Union[Preprocessor, Builder[Preprocessor]]],
additional_output_uuids: Sequence[str] = tuple(),
) -> None:
"""Initializer.
# Parameters
source_observation_spaces : The observation spaces of all sensors before preprocessing.
This generally should be the output of `SensorSuite.observation_spaces`.
preprocessors : The preprocessors that will be included in the graph.
additional_output_uuids: As described in the documentation for this class, the observations
returned when calling `get_observations` only include, by default, those observations
that are not processed by any preprocessor. If you'd like to include observations that
would otherwise not be included, the uuids of these sensors should be included as
a sequence of strings here.
"""
self.device: torch.device = torch.device("cpu")
obs_spaces: Dict[str, gym.Space] = {
k: source_observation_spaces[k] for k in source_observation_spaces
}
self.preprocessors: Dict[str, Preprocessor] = {}
for preprocessor in preprocessors:
if isinstance(preprocessor, Builder):
preprocessor = preprocessor()
assert (
preprocessor.uuid not in self.preprocessors
), "'{}' is duplicated preprocessor uuid".format(preprocessor.uuid)
self.preprocessors[preprocessor.uuid] = preprocessor
obs_spaces[preprocessor.uuid] = preprocessor.observation_space
g = nx.DiGraph()
for k in obs_spaces:
g.add_node(k)
for k in self.preprocessors:
for j in self.preprocessors[k].input_uuids:
g.add_edge(j, k)
assert nx.is_directed_acyclic_graph(
g
), "preprocessors do not form a direct acyclic graph"
# noinspection PyCallingNonCallable
self.observation_spaces = SpaceDict(
spaces={
uuid: obs_spaces[uuid]
for uuid in obs_spaces
if uuid in additional_output_uuids or g.out_degree(uuid) == 0
}
)
# ensure dependencies are precomputed
self.compute_order = [n for n in nx.dfs_preorder_nodes(g)]
def get(self, uuid: str) -> Preprocessor:
"""Return preprocessor with the given `uuid`.
# Parameters
uuid : The unique id of the preprocessor.
# Returns
The preprocessor with unique id `uuid`.
"""
return self.preprocessors[uuid]
def to(self, device: torch.device) -> "SensorPreprocessorGraph":
for k, v in self.preprocessors.items():
self.preprocessors[k] = v.to(device)
self.device = device
return self
def get_observations(
self, obs: Dict[str, Any], *args: Any, **kwargs: Any
) -> Dict[str, Any]:
"""Get processed observations.
# Returns
Collect observations processed from all sensors and return them packaged inside a Dict.
"""
for uuid in self.compute_order:
if uuid not in obs:
obs[uuid] = self.preprocessors[uuid].process(obs)
return {uuid: obs[uuid] for uuid in self.observation_spaces}
class PreprocessorGraph(SensorPreprocessorGraph):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
raise DeprecationWarning(
"`PreprocessorGraph` has been deprecated, use `SensorPreprocessorGraph` instead."
)
class ObservationSet:
def __init__(self, *args, **kwargs) -> None:
raise DeprecationWarning(
"`ObservationSet` has been deprecated. Use `SensorPreprocessorGraph` instead."
)
================================================
FILE: allenact/base_abstractions/sensor.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from collections import OrderedDict
from typing import (
Generic,
Dict,
Any,
Optional,
TYPE_CHECKING,
TypeVar,
Sequence,
Union,
Tuple,
cast,
)
import abc
import gym
import gym.spaces as gyms
import numpy as np
from torch.distributions.utils import lazy_property
from allenact.base_abstractions.misc import EnvType
from allenact.utils import spaces_utils as su
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact.utils.system import get_logger
if TYPE_CHECKING:
from allenact.base_abstractions.task import SubTaskType
else:
SubTaskType = TypeVar("SubTaskType", bound="Task")
SpaceDict = gyms.Dict
class Sensor(Generic[EnvType, SubTaskType]):
"""Represents a sensor that provides data from the environment to agent.
The user of this class needs to implement the get_observation method and
the user is also required to set the below attributes:
# Attributes
uuid : universally unique id.
observation_space : ``gym.Space`` object corresponding to observation of
sensor.
"""
uuid: str
observation_space: gym.Space
def __init__(self, uuid: str, observation_space: gym.Space, **kwargs: Any) -> None:
self.uuid = uuid
self.observation_space = observation_space
def get_observation(
self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any
) -> Any:
"""Returns observations from the environment (or task).
# Parameters
env : The environment the sensor is used upon.
task : (Optionally) a Task from which the sensor should get data.
# Returns
Current observation for Sensor.
"""
raise NotImplementedError()
class SensorSuite(Generic[EnvType]):
"""Represents a set of sensors, with each sensor being identified through a
unique id.
# Attributes
sensors: list containing sensors for the environment, uuid of each
sensor must be unique.
"""
sensors: Dict[str, Sensor[EnvType, Any]]
observation_spaces: gyms.Dict
def __init__(self, sensors: Sequence[Sensor]) -> None:
"""Initializer.
# Parameters
param sensors: the sensors that will be included in the suite.
"""
self.sensors = OrderedDict()
spaces: OrderedDict[str, gym.Space] = OrderedDict()
for sensor in sensors:
assert (
sensor.uuid not in self.sensors
), "'{}' is duplicated sensor uuid".format(sensor.uuid)
self.sensors[sensor.uuid] = sensor
spaces[sensor.uuid] = sensor.observation_space
self.observation_spaces = SpaceDict(spaces=spaces)
def get(self, uuid: str) -> Sensor:
"""Return sensor with the given `uuid`.
# Parameters
uuid : The unique id of the sensor
# Returns
The sensor with unique id `uuid`.
"""
return self.sensors[uuid]
def get_observations(
self, env: EnvType, task: Optional[SubTaskType], **kwargs: Any
) -> Dict[str, Any]:
"""Get all observations corresponding to the sensors in the suite.
# Parameters
env : The environment from which to get the observation.
task : (Optionally) the task from which to get the observation.
# Returns
Data from all sensors packaged inside a Dict.
"""
return {
uuid: sensor.get_observation(env=env, task=task, **kwargs) # type: ignore
for uuid, sensor in self.sensors.items()
}
class AbstractExpertSensor(Sensor[EnvType, SubTaskType], abc.ABC):
"""Base class for sensors that obtain the expert action for a given task
(if available)."""
ACTION_POLICY_LABEL: str = "action_or_policy"
EXPERT_SUCCESS_LABEL: str = "expert_success"
_NO_GROUPS_LABEL: str = "__dummy_expert_group__"
def __init__(
self,
action_space: Optional[Union[gym.Space, int]] = None,
uuid: str = "expert_sensor_type_uuid",
expert_args: Optional[Dict[str, Any]] = None,
nactions: Optional[int] = None,
use_dict_as_groups: bool = True,
**kwargs: Any,
) -> None:
"""Initialize an `ExpertSensor`.
# Parameters
action_space : The action space of the agent. This is necessary in order for this sensor
to know what its output observation space is.
uuid : A string specifying the unique ID of this sensor.
expert_args : This sensor obtains an expert action from the task by calling the `query_expert`
method of the task. `expert_args` are any keyword arguments that should be passed to the
`query_expert` method when called.
nactions : [DEPRECATED] The number of actions available to the agent, corresponds to an `action_space`
of `gym.spaces.Discrete(nactions)`.
use_dict_as_groups : Whether to use the top-level action_space of type `gym.spaces.Dict` as action groups.
"""
if isinstance(action_space, int):
action_space = gym.spaces.Discrete(action_space)
elif action_space is None:
assert (
nactions is not None
), "One of `action_space` or `nactions` must be not `None`."
get_logger().warning(
"The `nactions` parameter to `AbstractExpertSensor` is deprecated and will be removed, please use"
" the `action_space` parameter instead."
)
action_space = gym.spaces.Discrete(nactions)
self.action_space = action_space
self.use_groups = (
isinstance(action_space, gym.spaces.Dict) and use_dict_as_groups
)
self.group_spaces = (
self.action_space
if self.use_groups
else OrderedDict(
[
(
self._NO_GROUPS_LABEL,
self.action_space,
)
]
)
)
self.expert_args: Dict[str, Any] = expert_args or {}
assert (
"expert_sensor_group_name" not in self.expert_args
), "`expert_sensor_group_name` is reserved for `AbstractExpertSensor`"
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
@classmethod
@abc.abstractmethod
def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict:
"""gym space resulting from wrapping the given action space (or a
derived space, as in `AbstractExpertPolicySensor`) together with a
binary action space corresponding to an expert success flag, in a Dict
space.
# Parameters
group_space : The source action space to be (optionally used to derive a policy space,) flagged and wrapped
"""
raise NotImplementedError
@classmethod
def flagged_space(
cls, action_space: gym.spaces.Space, use_dict_as_groups: bool = True
) -> gym.spaces.Dict:
"""gym space resulting from wrapping the given action space (or every
highest-level entry in a Dict action space), together with binary
action space corresponding to an expert success flag, in a Dict space.
# Parameters
action_space : The agent's action space (to be flagged and wrapped)
use_dict_as_groups : Flag enabling every highest-level entry in a Dict action space to be independently flagged.
"""
use_groups = isinstance(action_space, gym.spaces.Dict) and use_dict_as_groups
if not use_groups:
return cls.flagged_group_space(action_space)
else:
return gym.spaces.Dict(
[
(
group_space,
cls.flagged_group_space(action_space[group_space]),
)
for group_space in cast(gym.spaces.Dict, action_space)
]
)
def _get_observation_space(self) -> gym.spaces.Dict:
"""The observation space of the expert sensor.
For the most basic discrete agent's ExpertActionSensor, it will
equal `gym.spaces.Dict([ (self.ACTION_POLICY_LABEL,
self.action_space), (self.EXPERT_SUCCESS_LABEL,
gym.spaces.Discrete(2))])`, where the first entry hosts the
expert action index and the second equals 0 if and only if the
expert failed to generate a true expert action.
"""
return self.flagged_space(self.action_space, use_dict_as_groups=self.use_groups)
@lazy_property
def _zeroed_observation(self) -> Union[OrderedDict, Tuple]:
# AllenAct-style flattened space (to easily generate an all-zeroes action as an array)
flat_space = su.flatten_space(self.observation_space)
# torch point to correctly unflatten `Discrete` for zeroed output
flat_zeroed = su.torch_point(flat_space, np.zeros_like(flat_space.sample()))
# unflatten zeroed output and convert to numpy
return su.numpy_point(
self.observation_space, su.unflatten(self.observation_space, flat_zeroed)
)
def flatten_output(self, unflattened):
return (
su.flatten(
self.observation_space,
su.torch_point(self.observation_space, unflattened),
)
.cpu()
.numpy()
)
@abc.abstractmethod
def query_expert(
self,
task: SubTaskType,
expert_sensor_group_name: Optional[str],
) -> Tuple[Any, bool]:
"""Query the expert for the given task (and optional group name).
# Returns
A tuple (x, y) where x is the expert action or policy and y is False \
if the expert could not determine the optimal action (otherwise True). Here y \
is used for masking. Even when y is False, x should still lie in the space of \
possible values (e.g. if x is the expert policy then x should be the correct length, \
sum to 1, and have non-negative entries).
"""
raise NotImplementedError
def get_observation(
self, env: EnvType, task: SubTaskType, *args: Any, **kwargs: Any
) -> Union[OrderedDict, Tuple]:
# If the task is completed, we needn't (perhaps can't) find the expert
# action from the (current) terminal state.
if task.is_done():
return self.flatten_output(self._zeroed_observation)
actions_or_policies = OrderedDict()
for group_name in self.group_spaces:
action_or_policy, expert_was_successful = self.query_expert(
task=task, expert_sensor_group_name=group_name
)
actions_or_policies[group_name] = OrderedDict(
[
(self.ACTION_POLICY_LABEL, action_or_policy),
(self.EXPERT_SUCCESS_LABEL, expert_was_successful),
]
)
return self.flatten_output(
actions_or_policies
if self.use_groups
else actions_or_policies[self._NO_GROUPS_LABEL]
)
class AbstractExpertActionSensor(AbstractExpertSensor, abc.ABC):
def __init__(
self,
action_space: Optional[Union[gym.Space, int]] = None,
uuid: str = "expert_action",
expert_args: Optional[Dict[str, Any]] = None,
nactions: Optional[int] = None,
use_dict_as_groups: bool = True,
**kwargs: Any,
) -> None:
super().__init__(**prepare_locals_for_super(locals()))
@classmethod
def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict:
"""gym space resulting from wrapping the given action space, together
with a binary action space corresponding to an expert success flag, in
a Dict space.
# Parameters
group_space : The action space to be flagged and wrapped
"""
return gym.spaces.Dict(
[
(cls.ACTION_POLICY_LABEL, group_space),
(cls.EXPERT_SUCCESS_LABEL, gym.spaces.Discrete(2)),
]
)
class ExpertActionSensor(AbstractExpertActionSensor):
"""(Deprecated) A sensor that obtains the expert action from a given task
(if available)."""
def query_expert(
self, task: SubTaskType, expert_sensor_group_name: Optional[str]
) -> Tuple[Any, bool]:
return task.query_expert(
**self.expert_args, expert_sensor_group_name=expert_sensor_group_name
)
class AbstractExpertPolicySensor(AbstractExpertSensor, abc.ABC):
def __init__(
self,
action_space: Optional[Union[gym.Space, int]] = None,
uuid: str = "expert_policy",
expert_args: Optional[Dict[str, Any]] = None,
nactions: Optional[int] = None,
use_dict_as_groups: bool = True,
**kwargs: Any,
) -> None:
super().__init__(**prepare_locals_for_super(locals()))
@classmethod
def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict:
"""gym space resulting from wrapping the policy space corresponding to
`allenact.utils.spaces_utils.policy_space(group_space)` together with a
binary action space corresponding to an expert success flag, in a Dict
space.
# Parameters
group_space : The source action space to be used to derive a policy space, flagged and wrapped
"""
return gym.spaces.Dict(
[
(cls.ACTION_POLICY_LABEL, su.policy_space(group_space)),
(cls.EXPERT_SUCCESS_LABEL, gym.spaces.Discrete(2)),
]
)
class ExpertPolicySensor(AbstractExpertPolicySensor):
"""(Deprecated) A sensor that obtains the expert policy from a given task
(if available)."""
def query_expert(
self, task: SubTaskType, expert_sensor_group_name: Optional[str]
) -> Tuple[Any, bool]:
return task.query_expert(
**self.expert_args, expert_sensor_group_name=expert_sensor_group_name
)
================================================
FILE: allenact/base_abstractions/task.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Defines the primary data structures by which agents interact with their
environment."""
import abc
from typing import Any, Dict, Generic, List, Optional, Sequence, Tuple, TypeVar, Union
import gym
import numpy as np
from gym.spaces.dict import Dict as SpaceDict
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.utils.misc_utils import deprecated
EnvType = TypeVar("EnvType")
class Task(Generic[EnvType]):
"""An abstract class defining a, goal directed, 'task.' Agents interact
with their environment through a task by taking a `step` after which they
receive new observations, rewards, and (potentially) other useful
information.
A Task is a helpful generalization of the OpenAI gym's `Env` class
and allows for multiple tasks (e.g. point and object navigation) to
be defined on a single environment (e.g. AI2-THOR).
# Attributes
env : The environment.
sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer.
task_info : Dictionary of (k, v) pairs defining task goals and other task information.
max_steps : The maximum number of steps an agent can take an in the task before it is considered failed.
observation_space: The observation space returned on each step from the sensors.
"""
env: EnvType
sensor_suite: SensorSuite[EnvType]
task_info: Dict[str, Any]
max_steps: int
observation_space: SpaceDict
def __init__(
self,
env: EnvType,
sensors: Union[SensorSuite, Sequence[Sensor]],
task_info: Dict[str, Any],
max_steps: int,
**kwargs
) -> None:
self.env = env
self.sensor_suite = (
SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors
)
self.task_info = task_info
self.max_steps = max_steps
self.observation_space = self.sensor_suite.observation_spaces
self._num_steps_taken = 0
self._total_reward: Union[float, List[float]] = 0.0
def get_observations(self, **kwargs) -> Any:
return self.sensor_suite.get_observations(env=self.env, task=self, **kwargs)
@property
@abc.abstractmethod
def action_space(self) -> gym.Space:
"""Task's action space.
# Returns
The action space for the task.
"""
raise NotImplementedError()
@abc.abstractmethod
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
"""Render the current task state.
Rendered task state can come in any supported modes.
# Parameters
mode : The mode in which to render. For example, you might have a 'rgb'
mode that renders the agent's egocentric viewpoint or a 'dev' mode
returning additional information.
args : Extra args.
kwargs : Extra kwargs.
# Returns
An numpy array corresponding to the requested render.
"""
raise NotImplementedError()
def _increment_num_steps_taken(self) -> None:
"""Helper function that increases the number of steps counter by
one."""
self._num_steps_taken += 1
def step(self, action: Any) -> RLStepResult:
"""Take an action in the environment (one per agent).
Takes the action in the environment and returns
observations (& rewards and any additional information)
corresponding to the agent's new state. Note that this function
should not be overwritten without care (instead
implement the `_step` function).
# Parameters
action : The action to take, should be of the same form as specified by `self.action_space`.
# Returns
A `RLStepResult` object encoding the new observations, reward, and
(possibly) additional information.
"""
assert not self.is_done()
sr = self._step(action=action)
# If reward is Sequence, it's assumed to follow the same order imposed by spaces' flatten operation
if isinstance(sr.reward, Sequence):
if isinstance(self._total_reward, Sequence):
for it, rew in enumerate(sr.reward):
self._total_reward[it] += float(rew)
else:
self._total_reward = [float(r) for r in sr.reward]
else:
self._total_reward += float(sr.reward) # type:ignore
self._increment_num_steps_taken()
# TODO: We need a better solution to the below. It's not a good idea
# to pre-increment the step counter as this might play poorly with `_step`
# if it relies on some aspect of the current number of steps taken.
return sr.clone({"done": sr.done or self.is_done()})
@abc.abstractmethod
def _step(self, action: Any) -> RLStepResult:
"""Helper function called by `step` to take a step by each agent in the
environment.
Takes the action in the environment and returns
observations (& rewards and any additional information)
corresponding to the agent's new state. This function is called
by the (public) `step` function and is what should be implemented
when defining your new task. Having separate `_step` be separate from `step`
is useful as this allows the `step` method to perform bookkeeping (e.g.
keeping track of the number of steps), without having `_step` as a separate
method, everyone implementing `step` would need to copy this bookkeeping code.
# Parameters
action : The action to take.
# Returns
A `RLStepResult` object encoding the new observations, reward, and
(possibly) additional information.
"""
raise NotImplementedError()
def reached_max_steps(self) -> bool:
"""Has the agent reached the maximum number of steps."""
return self.num_steps_taken() >= self.max_steps
@abc.abstractmethod
def reached_terminal_state(self) -> bool:
"""Has the agent reached a terminal state (excluding reaching the
maximum number of steps)."""
raise NotImplementedError()
def is_done(self) -> bool:
"""Did the agent reach a terminal state or performed the maximum number
of steps."""
return self.reached_terminal_state() or self.reached_max_steps()
def num_steps_taken(self) -> int:
"""Number of steps taken by the agent in the task so far."""
return self._num_steps_taken
@deprecated
def action_names(self) -> Tuple[str, ...]:
"""Action names of the Task instance.
This function has been deprecated and will be removed.
This function is a hold-over from when the `Task`
abstraction only considered `gym.space.Discrete` action spaces (in which
case it makes sense name these actions).
This implementation of `action_names` requires that a `class_action_names`
method has been defined. This method should be overwritten if `class_action_names`
requires key word arguments to determine the number of actions.
"""
if hasattr(self, "class_action_names"):
return self.class_action_names()
else:
raise NotImplementedError(
"`action_names` requires that a function `class_action_names` be defined."
" This said, please do not use this functionality as it has been deprecated and will be removed."
" If you would like an `action_names` function for your task, feel free to define one"
" with the knowledge that the AllenAct internals will ignore it."
)
@abc.abstractmethod
def close(self) -> None:
"""Closes the environment and any other files opened by the Task (if
applicable)."""
raise NotImplementedError()
def metrics(self) -> Dict[str, Any]:
"""Computes metrics related to the task after the task's completion.
By default this function is automatically called during training
and the reported metrics logged to tensorboard.
# Returns
A dictionary where every key is a string (the metric's
name) and the value is the value of the metric.
"""
return {
"ep_length": self.num_steps_taken(),
"reward": self.cumulative_reward,
"task_info": self.task_info,
}
def query_expert(self, **kwargs) -> Tuple[Any, bool]:
"""(Deprecated) Query the expert policy for this task.
The new correct way to include this functionality is through the definition of a class
derived from `allenact.base_abstractions.sensor.AbstractExpertActionSensor` or
`allenact.base_abstractions.sensor.AbstractExpertPolicySensor`, where a
`query_expert` method must be defined.
# Returns
A tuple (x, y) where x is the expert action (or policy) and y is False \
if the expert could not determine the optimal action (otherwise True). Here y \
is used for masking. Even when y is False, x should still lie in the space of \
possible values (e.g. if x is the expert policy then x should be the correct length, \
sum to 1, and have non-negative entries).
"""
return None, False
@property
def cumulative_reward(self) -> float:
"""Mean per-agent total cumulative in the task so far.
# Returns
Mean per-agent cumulative reward as a float.
"""
return (
np.mean(self._total_reward).item()
if isinstance(self._total_reward, Sequence)
else self._total_reward
)
SubTaskType = TypeVar("SubTaskType", bound=Task)
class TaskSampler(abc.ABC):
"""Abstract class defining a how new tasks are sampled."""
@property
@abc.abstractmethod
def length(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled. Can be
float('inf').
"""
raise NotImplementedError()
@property
@abc.abstractmethod
def last_sampled_task(self) -> Optional[Task]:
"""Get the most recently sampled Task.
# Returns
The most recently sampled Task.
"""
raise NotImplementedError()
@abc.abstractmethod
def next_task(self, force_advance_scene: bool = False) -> Optional[Task]:
"""Get the next task in the sampler's stream.
# Parameters
force_advance_scene : Used to (if applicable) force the task sampler to
use a new scene for the next task. This is useful if, during training,
you would like to train with one scene for some number of steps and
then explicitly control when you begin training with the next scene.
# Returns
The next Task in the sampler's stream if a next task exists. Otherwise None.
"""
raise NotImplementedError()
@abc.abstractmethod
def close(self) -> None:
"""Closes any open environments or streams.
Should be run when done sampling.
"""
raise NotImplementedError()
@property
@abc.abstractmethod
def all_observation_spaces_equal(self) -> bool:
"""Checks if all observation spaces of tasks that can be sampled are
equal.
This will almost always simply return `True`. A case in which it should
return `False` includes, for example, a setting where you design
a `TaskSampler` that can generate different types of tasks, i.e.
point navigation tasks and object navigation tasks. In this case, these
different tasks may output different types of observations.
# Returns
True if all Tasks that can be sampled by this sampler have the
same observation space. Otherwise False.
"""
raise NotImplementedError()
@abc.abstractmethod
def reset(self) -> None:
"""Resets task sampler to its original state (except for any seed)."""
raise NotImplementedError()
@abc.abstractmethod
def set_seed(self, seed: int) -> None:
"""Sets new RNG seed.
# Parameters
seed : New seed.
"""
raise NotImplementedError()
================================================
FILE: allenact/embodiedai/__init__.py
================================================
================================================
FILE: allenact/embodiedai/aux_losses/__init__.py
================================================
================================================
FILE: allenact/embodiedai/aux_losses/losses.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Defining the auxiliary loss for actor critic type models.
Several of the losses defined in this file are modified versions of those found in
https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/
"""
import abc
from typing import Dict, cast, Tuple, Sequence
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
def _bernoulli_subsample_mask_like(masks, p=0.1):
return (torch.rand_like(masks) <= p).float()
class MultiAuxTaskNegEntropyLoss(AbstractActorCriticLoss):
"""Used in multiple auxiliary tasks setting.
Add a negative entropy loss over all the task weights.
"""
UUID = "multitask_entropy" # make sure this is unique
def __init__(self, task_names: Sequence[str], *args, **kwargs):
super().__init__(*args, **kwargs)
self.num_tasks = len(task_names)
self.task_names = task_names
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
*args,
**kwargs,
) -> Tuple[torch.FloatTensor, Dict[str, float]]:
task_weights = actor_critic_output.extras[self.UUID]
task_weights = task_weights.view(-1, self.num_tasks)
entropy = CategoricalDistr(task_weights).entropy()
avg_loss = (-entropy).mean()
avg_task_weights = task_weights.mean(dim=0) # (K)
outputs = {"entropy_loss": cast(torch.Tensor, avg_loss).item()}
for i in range(self.num_tasks):
outputs["weight_" + self.task_names[i]] = cast(
torch.Tensor, avg_task_weights[i]
).item()
return (
avg_loss,
outputs,
)
class AuxiliaryLoss(AbstractActorCriticLoss):
"""Base class of auxiliary loss.
Any auxiliary task loss should inherit from it, and implement the
`get_aux_loss` function.
"""
def __init__(self, auxiliary_uuid: str, *args, **kwargs):
super().__init__(*args, **kwargs)
self.auxiliary_uuid = auxiliary_uuid
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
*args,
**kwargs,
) -> Tuple[torch.Tensor, Dict[str, float]]:
# auxiliary loss
return self.get_aux_loss(
**actor_critic_output.extras[self.auxiliary_uuid],
observations=batch["observations"],
actions=batch["actions"],
masks=batch["masks"],
)
@abc.abstractmethod
def get_aux_loss(
self,
aux_model: nn.Module,
observations: ObservationType,
obs_embeds: torch.Tensor,
actions: torch.Tensor,
beliefs: torch.Tensor,
masks: torch.Tensor,
*args,
**kwargs,
):
raise NotImplementedError()
def _propagate_final_beliefs_to_all_steps(
beliefs: torch.Tensor,
masks: torch.Tensor,
num_sampler: int,
num_steps: int,
):
final_beliefs = torch.zeros_like(beliefs) # (T, B, *)
start_locs_list = []
end_locs_list = []
for i in range(num_sampler):
# right shift: to locate the 1 before 0 and ignore the 1st element
end_locs = torch.where(masks[1:, i] == 0)[0] # maybe [], dtype=torch.Long
start_locs = torch.cat(
[torch.tensor([0]).to(end_locs), end_locs + 1]
) # add the first element
start_locs_list.append(start_locs)
end_locs = torch.cat(
[end_locs, torch.tensor([num_steps - 1]).to(end_locs)]
) # add the last element
end_locs_list.append(end_locs)
for st, ed in zip(start_locs, end_locs):
final_beliefs[st : ed + 1, i] = beliefs[ed, i]
return final_beliefs, start_locs_list, end_locs_list
class InverseDynamicsLoss(AuxiliaryLoss):
"""Auxiliary task of Inverse Dynamics from Auxiliary Tasks Speed Up
Learning PointGoal Navigation (Ye, 2020) https://arxiv.org/abs/2007.04561
originally from Curiosity-driven Exploration by Self-supervised Prediction
(Pathak, 2017) https://arxiv.org/abs/1705.05363."""
UUID = "InvDyn"
def __init__(
self, subsample_rate: float = 0.2, subsample_min_num: int = 10, *args, **kwargs
):
"""Subsample the valid samples by the rate of `subsample_rate`, if the
total num of the valid samples is larger than `subsample_min_num`."""
super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)
self.cross_entropy_loss = nn.CrossEntropyLoss(reduction="none")
self.subsample_rate = subsample_rate
self.subsample_min_num = subsample_min_num
def get_aux_loss(
self,
aux_model: nn.Module,
observations: ObservationType,
obs_embeds: torch.FloatTensor,
actions: torch.FloatTensor,
beliefs: torch.FloatTensor,
masks: torch.FloatTensor,
*args,
**kwargs,
):
## we discard the last action in the batch
num_steps, num_sampler = actions.shape # T, B
actions = cast(torch.LongTensor, actions)
actions = actions[:-1] # (T-1, B)
## find the final belief state based on masks
# we did not compute loss here as model.forward is compute-heavy
masks = masks.squeeze(-1) # (T, B)
final_beliefs, _, _ = _propagate_final_beliefs_to_all_steps(
beliefs,
masks,
num_sampler,
num_steps,
)
## compute CE loss
decoder_in = torch.cat(
[obs_embeds[:-1], obs_embeds[1:], final_beliefs[:-1]], dim=2
) # (T-1, B, *)
preds = aux_model(decoder_in) # (T-1, B, A)
# cross entropy loss require class dim at 1
loss = self.cross_entropy_loss(
preds.view((num_steps - 1) * num_sampler, -1), # ((T-1)*B, A)
actions.flatten(), # ((T-1)*B,)
)
loss = loss.view(num_steps - 1, num_sampler) # (T-1, B)
# def vanilla_valid_losses(loss, num_sampler, end_locs_batch):
# ## this is just used to verify the vectorized version works correctly.
# ## not used for experimentation
# valid_losses = []
# for i in range(num_sampler):
# end_locs = end_locs_batch[i]
# for j in range(len(end_locs)):
# if j == 0:
# start_loc = 0
# else:
# start_loc = end_locs[j - 1] + 1
# end_loc = end_locs[j]
# if end_loc - start_loc <= 0: # the episode only 1-step
# continue
# valid_losses.append(loss[start_loc:end_loc, i])
# if len(valid_losses) == 0:
# valid_losses = torch.zeros(1, dtype=torch.float).to(loss)
# else:
# valid_losses = torch.cat(valid_losses) # (sum m, )
# return valid_losses
# valid_losses = masks[1:] * loss # (T-1, B)
# valid_losses0 = vanilla_valid_losses(loss, num_sampler, end_locs_batch)
# assert valid_losses0.sum() == valid_losses.sum()
num_valid_losses = torch.count_nonzero(masks[1:])
if num_valid_losses < self.subsample_min_num: # don't subsample
subsample_rate = 1.0
else:
subsample_rate = self.subsample_rate
loss_masks = masks[1:] * _bernoulli_subsample_mask_like(
masks[1:], subsample_rate
)
num_valid_losses = torch.count_nonzero(loss_masks)
avg_loss = (loss * loss_masks).sum() / torch.clamp(num_valid_losses, min=1.0)
return (
avg_loss,
{
"total": cast(torch.Tensor, avg_loss).item(),
},
)
class TemporalDistanceLoss(AuxiliaryLoss):
"""Auxiliary task of Temporal Distance from Auxiliary Tasks Speed Up
Learning PointGoal Navigation (Ye, 2020)
https://arxiv.org/abs/2007.04561."""
UUID = "TempDist"
def __init__(self, num_pairs: int = 8, epsiode_len_min: int = 5, *args, **kwargs):
super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)
self.num_pairs = num_pairs
self.epsiode_len_min = float(epsiode_len_min)
def get_aux_loss(
self,
aux_model: nn.Module,
observations: ObservationType,
obs_embeds: torch.FloatTensor,
actions: torch.FloatTensor,
beliefs: torch.FloatTensor,
masks: torch.FloatTensor,
*args,
**kwargs,
):
## we discard the last action in the batch
num_steps, num_sampler = actions.shape # T, B
## find the final belief state based on masks
# we did not compute loss here as model.forward is compute-heavy
masks = masks.squeeze(-1) # (T, B)
(
final_beliefs,
start_locs_list,
end_locs_list,
) = _propagate_final_beliefs_to_all_steps(
beliefs,
masks,
num_sampler,
num_steps,
)
## also find the locs_batch of shape (M, 3)
# the last dim: [0] is on num_sampler loc, [1] and [2] is start and end locs
# of one episode
# in other words: at locs_batch[m, 0] in num_sampler dim, there exists one episode
# starting from locs_batch[m, 1], ends at locs_batch[m, 2] (included)
locs_batch = []
for i in range(num_sampler):
locs_batch.append(
torch.stack(
[
i * torch.ones_like(start_locs_list[i]),
start_locs_list[i],
end_locs_list[i],
],
dim=-1,
)
) # shape (M[i], 3)
locs_batch = torch.cat(locs_batch) # shape (M, 3)
temporal_dist_max = (
locs_batch[:, 2] - locs_batch[:, 1]
).float() # end - start, (M)
# create normalizer that ignores too short episode, otherwise 1/T
normalizer = torch.where(
temporal_dist_max > self.epsiode_len_min,
1.0 / temporal_dist_max,
torch.tensor([0]).to(temporal_dist_max),
) # (M)
# sample valid pairs: sampled_pairs shape (M, num_pairs, 3)
# where M is the num of total episodes in the batch
locs = locs_batch.cpu().numpy() # as torch.randint only support int, not tensor
sampled_pairs = np.random.randint(
np.repeat(locs[:, [1]], 2 * self.num_pairs, axis=-1), # (M, 2*k)
np.repeat(locs[:, [2]] + 1, 2 * self.num_pairs, axis=-1), # (M, 2*k)
).reshape(
(-1, self.num_pairs, 2)
) # (M, k, 2)
sampled_pairs_batch = torch.from_numpy(sampled_pairs).to(
locs_batch
) # (M, k, 2)
num_sampler_batch = locs_batch[:, [0]].expand(
-1, 2 * self.num_pairs
) # (M, 1) -> (M, 2*k)
num_sampler_batch = num_sampler_batch.reshape(
-1, self.num_pairs, 2
) # (M, k, 2)
sampled_obs_embeds = obs_embeds[
sampled_pairs_batch, num_sampler_batch
] # (M, k, 2, H1)
sampled_final_beliefs = final_beliefs[
sampled_pairs_batch, num_sampler_batch
] # (M, k, 2, H2)
features = torch.cat(
[
sampled_obs_embeds[:, :, 0],
sampled_obs_embeds[:, :, 1],
sampled_final_beliefs[:, :, 0],
],
dim=-1,
) # (M, k, 2*H1 + H2)
pred_temp_dist = aux_model(features).squeeze(-1) # (M, k)
true_temp_dist = (
sampled_pairs_batch[:, :, 1] - sampled_pairs_batch[:, :, 0]
).float() # (M, k)
pred_error = (pred_temp_dist - true_temp_dist) * normalizer.unsqueeze(1)
loss = 0.5 * (pred_error).pow(2)
avg_loss = loss.mean()
return (
avg_loss,
{
"total": cast(torch.Tensor, avg_loss).item(),
},
)
class CPCALoss(AuxiliaryLoss):
"""Auxiliary task of CPC|A from Auxiliary Tasks Speed Up Learning PointGoal
Navigation (Ye, 2020) https://arxiv.org/abs/2007.04561 originally from
Neural Predictive Belief Representations (Guo, 2018)
https://arxiv.org/abs/1811.06407."""
UUID = "CPCA"
def __init__(
self, planning_steps: int = 8, subsample_rate: float = 0.2, *args, **kwargs
):
super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)
self.planning_steps = planning_steps
self.subsample_rate = subsample_rate
self.cross_entropy_loss = nn.BCEWithLogitsLoss(reduction="none")
def get_aux_loss(
self,
aux_model: nn.Module,
observations: ObservationType,
obs_embeds: torch.Tensor,
actions: torch.Tensor,
beliefs: torch.Tensor,
masks: torch.Tensor,
*args,
**kwargs,
):
# prepare for autoregressive inputs: c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) <-> z_{t+k}
## where b_t = RNN(b_{t-1}, z_t, a_{t-1}), prev action is optional
num_steps, num_sampler, obs_embed_size = obs_embeds.shape # T, N, H_O
assert 0 < self.planning_steps <= num_steps
## prepare positive and negatives that sample from all the batch
positives = obs_embeds # (T, N, -1)
negative_inds = torch.randperm(num_steps * num_sampler).to(positives.device)
negatives = torch.gather( # input[index[i,j]][j]
positives.view(num_steps * num_sampler, -1),
dim=0,
index=negative_inds.view(num_steps * num_sampler, 1).expand(
num_steps * num_sampler, positives.shape[-1]
),
).view(
num_steps, num_sampler, -1
) # (T, N, -1)
## prepare action sequences and initial beliefs
action_embedding = aux_model.action_embedder(actions) # (T, N, -1)
action_embed_size = action_embedding.size(-1)
action_padding = torch.zeros(
self.planning_steps - 1, num_sampler, action_embed_size
).to(
action_embedding
) # (k-1, N, -1)
action_padded = torch.cat(
(action_embedding, action_padding), dim=0
) # (T+k-1, N, -1)
## unfold function will create consecutive action sequences
action_seq = (
action_padded.unfold(dimension=0, size=self.planning_steps, step=1)
.permute(3, 0, 1, 2)
.view(self.planning_steps, num_steps * num_sampler, action_embed_size)
) # (k, T*N, -1)
## beliefs GRU output
beliefs = beliefs.view(num_steps * num_sampler, -1).unsqueeze(0) # (1, T*N, -1)
# get future contexts c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1})
future_contexts_all, _ = aux_model.context_model(
action_seq, beliefs
) # (k, T*N, -1)
## NOTE: future_contexts_all starting from next step t+1 to t+k, not t to t+k-1
future_contexts_all = future_contexts_all.view(
self.planning_steps, num_steps, num_sampler, -1
).permute(
1, 0, 2, 3
) # (k, T, N, -1)
# get all the classifier scores I(c_{t+1:t+k}; z_{t+1:t+k})
positives_padding = torch.zeros(
self.planning_steps, num_sampler, obs_embed_size
).to(
positives
) # (k, N, -1)
positives_padded = torch.cat(
(positives[1:], positives_padding), dim=0
) # (T+k-1, N, -1)
positives_expanded = positives_padded.unfold(
dimension=0, size=self.planning_steps, step=1
).permute(
0, 3, 1, 2
) # (T, k, N, -1)
positives_logits = aux_model.classifier(
torch.cat([positives_expanded, future_contexts_all], -1)
) # (T, k, N, 1)
positive_loss = self.cross_entropy_loss(
positives_logits, torch.ones_like(positives_logits)
) # (T, k, N, 1)
negatives_padding = torch.zeros(
self.planning_steps, num_sampler, obs_embed_size
).to(
negatives
) # (k, N, -1)
negatives_padded = torch.cat(
(negatives[1:], negatives_padding), dim=0
) # (T+k-1, N, -1)
negatives_expanded = negatives_padded.unfold(
dimension=0, size=self.planning_steps, step=1
).permute(
0, 3, 1, 2
) # (T, k, N, -1)
negatives_logits = aux_model.classifier(
torch.cat([negatives_expanded, future_contexts_all], -1)
) # (T, k, N, 1)
negative_loss = self.cross_entropy_loss(
negatives_logits, torch.zeros_like(negatives_logits)
) # (T, k, N, 1)
# Masking to get valid scores
## masks: Note which timesteps [1, T+k+1] could have valid queries, at distance (k) (note offset by 1)
## we will extract the **diagonals** as valid_masks from masks later as below
## the vertical axis is (absolute) real timesteps, the horizontal axis is (relative) planning timesteps
## | - - - - - |
## | . |
## | , . |
## | . , . |
## | , . , . |
## | , . , . |
## | , . , |
## | , . |
## | , |
## | - - - - - |
masks = masks.squeeze(-1) # (T, N)
pred_masks = torch.ones(
num_steps + self.planning_steps,
self.planning_steps,
num_sampler,
1,
dtype=torch.bool,
).to(
beliefs.device
) # (T+k, k, N, 1)
pred_masks[num_steps - 1 :] = (
False # GRU(b_t, a_{t:t+k-1}) is invalid when t >= T, as we don't have real z_{t+1}
)
for j in range(1, self.planning_steps + 1): # for j-step predictions
pred_masks[: j - 1, j - 1] = (
False # Remove the upper triangle above the diagnonal (but I think this is unnecessary for valid_masks)
)
for n in range(num_sampler):
has_zeros_batch = torch.where(masks[:, n] == 0)[0]
# in j-step prediction, timesteps z -> z + j are disallowed as those are the first j timesteps of a new episode
# z-> z-1 because of pred_masks being offset by 1
for z in has_zeros_batch:
pred_masks[z - 1 : z - 1 + j, j - 1, n] = (
False # can affect j timesteps
)
# instead of the whole range, we actually are only comparing a window i:i+k for each query/target i - for each, select the appropriate k
# we essentially gather diagonals from this full mask, t of them, k long
valid_diagonals = [
torch.diagonal(pred_masks, offset=-i) for i in range(num_steps)
] # pull the appropriate k per timestep
valid_masks = (
torch.stack(valid_diagonals, dim=0).permute(0, 3, 1, 2).float()
) # (T, N, 1, k) -> (T, k, N, 1)
# print(valid_masks.int().squeeze(-1)); print(masks) # verify its correctness
loss_masks = valid_masks * _bernoulli_subsample_mask_like(
valid_masks, self.subsample_rate
) # (T, k, N, 1)
num_valid_losses = torch.count_nonzero(loss_masks)
avg_positive_loss = (positive_loss * loss_masks).sum() / torch.clamp(
num_valid_losses, min=1.0
)
avg_negative_loss = (negative_loss * loss_masks).sum() / torch.clamp(
num_valid_losses, min=1.0
)
avg_loss = avg_positive_loss + avg_negative_loss
return (
avg_loss,
{
"total": cast(torch.Tensor, avg_loss).item(),
"positive_loss": cast(torch.Tensor, avg_positive_loss).item(),
"negative_loss": cast(torch.Tensor, avg_negative_loss).item(),
},
)
class CPCASoftMaxLoss(AuxiliaryLoss):
"""Auxiliary task of CPC|A with multi class softmax."""
UUID = "cpcA_SOFTMAX"
def __init__(
self,
planning_steps: int = 8,
subsample_rate: float = 1,
allow_skipping: bool = True,
*args,
**kwargs,
):
super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)
self.planning_steps = planning_steps
self.subsample_rate = subsample_rate
self.cross_entropy_loss = nn.CrossEntropyLoss(
reduction="none"
) # nn.BCEWithLogitsLoss(reduction="none")
self.allow_skipping = allow_skipping
def get_aux_loss(
self,
aux_model: nn.Module,
observations: ObservationType,
obs_embeds: torch.Tensor,
actions: torch.Tensor,
beliefs: torch.Tensor,
masks: torch.Tensor,
*args,
**kwargs,
):
# prepare for autoregressive inputs: c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) <-> z_{t+k}
## where b_t = RNN(b_{t-1}, z_t, a_{t-1}), prev action is optional
num_steps, num_samplers, obs_embed_size = obs_embeds.shape # T, N, H_O
##visual observation of all num_steps
if not (0 < self.planning_steps <= num_steps):
if self.allow_skipping:
return 0, {}
else:
raise RuntimeError(
f"Insufficient planning steps: self.planning_steps {self.planning_steps} must"
f" be greater than zero and less than or equal to num_steps {num_steps}."
)
## prepare action sequences and initial beliefs
action_embedding = aux_model.action_embedder(actions) # (T, N, -1)
action_embed_size = action_embedding.size(-1)
action_padding = torch.zeros(
self.planning_steps - 1,
num_samplers,
action_embed_size,
device=action_embedding.device,
) # (k-1, N, -1)
action_padded = torch.cat(
(action_embedding, action_padding), dim=0
) # (T+k-1, N, -1)
## unfold function will create consecutive action sequences
action_seq = (
action_padded.unfold(dimension=0, size=self.planning_steps, step=1)
.permute(3, 0, 1, 2)
.view(self.planning_steps, num_steps * num_samplers, action_embed_size)
) # (k, T*N, -1)
## beliefs GRU output
obs_embeds = aux_model.visual_mlp(obs_embeds) # (T, N, 128)
beliefs = beliefs.view(1, num_steps * num_samplers, -1) # (1, T*N, -1)
# get future contexts c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1})
future_contexts_all, _ = aux_model.context_model(
action_seq, beliefs
) # (k, T*N, -1)
future_contexts_all = aux_model.belief_mlp(future_contexts_all) # (k, T*N, 128)
future_contexts_all = future_contexts_all.view(-1, 128) # (k*T*N, 128)
obs_embeds = obs_embeds.view(
num_steps * num_samplers, obs_embeds.shape[-1]
).permute(
1, 0
) # (-1, T*N)
visual_logits = torch.matmul(future_contexts_all, obs_embeds)
visual_log_probs = F.log_softmax(visual_logits, dim=1) ## (k*T*N, T*N)
target = torch.zeros(
(self.planning_steps, num_steps, num_samplers),
dtype=torch.long,
device=beliefs.device,
) # (k, T, N)
loss_mask = torch.zeros(
(self.planning_steps, num_steps, num_samplers), device=beliefs.device
) # (k, T, N)
num_valid_before = 0
for j in range(num_samplers):
for i in range(num_steps):
index = i * num_samplers + j
if i == 0 or masks[i, j].item() == 0:
num_valid_before = 0
continue
num_valid_before += 1
for back in range(min(num_valid_before, self.planning_steps)):
target[back, i - (back + 1), j] = index
loss_mask[back, i - (back + 1), j] = 1.0
target = target.view(-1) # (k*T*N,)
loss_value = self.cross_entropy_loss(visual_log_probs, target)
loss_value = loss_value.view(
self.planning_steps, num_steps, num_samplers, 1
) # (k, T, N, 1)
loss_mask = loss_mask.unsqueeze(-1) # (k, T, N, 1)
loss_valid_masks = loss_mask * _bernoulli_subsample_mask_like(
loss_mask, self.subsample_rate
) # (k, T, N, 1)
num_valid_losses = torch.count_nonzero(loss_valid_masks)
avg_multi_class_loss = (loss_value * loss_valid_masks).sum() / torch.clamp(
num_valid_losses, min=1.0
)
return (
avg_multi_class_loss,
{
"total": cast(torch.Tensor, avg_multi_class_loss).item(),
},
)
######## CPCA Softmax variants ######
class CPCA1SoftMaxLoss(CPCASoftMaxLoss):
UUID = "cpcA_SOFTMAX_1"
def __init__(self, subsample_rate: float = 1, *args, **kwargs):
super().__init__(
planning_steps=1, subsample_rate=subsample_rate, *args, **kwargs
)
class CPCA2SoftMaxLoss(CPCASoftMaxLoss):
UUID = "cpcA_SOFTMAX_2"
def __init__(self, subsample_rate: float = 1, *args, **kwargs):
super().__init__(
planning_steps=2, subsample_rate=subsample_rate, *args, **kwargs
)
class CPCA4SoftMaxLoss(CPCASoftMaxLoss):
UUID = "cpcA_SOFTMAX_4"
def __init__(self, subsample_rate: float = 1, *args, **kwargs):
super().__init__(
planning_steps=4, subsample_rate=subsample_rate, *args, **kwargs
)
class CPCA8SoftMaxLoss(CPCASoftMaxLoss):
UUID = "cpcA_SOFTMAX_8"
def __init__(self, subsample_rate: float = 1, *args, **kwargs):
super().__init__(
planning_steps=8, subsample_rate=subsample_rate, *args, **kwargs
)
class CPCA16SoftMaxLoss(CPCASoftMaxLoss):
UUID = "cpcA_SOFTMAX_16"
def __init__(self, subsample_rate: float = 1, *args, **kwargs):
super().__init__(
planning_steps=16, subsample_rate=subsample_rate, *args, **kwargs
)
###########
class CPCA1Loss(CPCALoss):
UUID = "CPCA_1"
def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
super().__init__(
planning_steps=1, subsample_rate=subsample_rate, *args, **kwargs
)
class CPCA2Loss(CPCALoss):
UUID = "CPCA_2"
def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
super().__init__(
planning_steps=2, subsample_rate=subsample_rate, *args, **kwargs
)
class CPCA4Loss(CPCALoss):
UUID = "CPCA_4"
def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
super().__init__(
planning_steps=4, subsample_rate=subsample_rate, *args, **kwargs
)
class CPCA8Loss(CPCALoss):
UUID = "CPCA_8"
def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
super().__init__(
planning_steps=8, subsample_rate=subsample_rate, *args, **kwargs
)
class CPCA16Loss(CPCALoss):
UUID = "CPCA_16"
def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
super().__init__(
planning_steps=16, subsample_rate=subsample_rate, *args, **kwargs
)
================================================
FILE: allenact/embodiedai/mapping/__init__.py
================================================
================================================
FILE: allenact/embodiedai/mapping/mapping_losses.py
================================================
import torch
from torch.nn import functional as F
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
class BinnedPointCloudMapLoss(AbstractActorCriticLoss):
"""A (binary cross entropy) loss for training metric maps for free space
prediction."""
def __init__(
self,
binned_pc_uuid: str,
map_logits_uuid: str,
):
"""Initializer.
# Parameters
binned_pc_uuid : The uuid of a sensor returning
a dictionary with an "egocentric_update"
key with the same format as returned by
`allenact.embodied_ai.mapping_utils.map_builders.BinnedPointCloudMapBuilder`. Such a sensor
can be found in the `allenact_plugins` library: see
`allenact_plugins.ithor_plugin.ithor_sensors.BinnedPointCloudMapTHORSensor`.
map_logits_uuid : key used to index into `actor_critic_output.extras` (returned by the model)
whose value should be a tensor of the same shape as the tensor corresponding to the above
"egocentric_update" key.
"""
super().__init__()
self.binned_pc_uuid = binned_pc_uuid
self.map_logits_uuid = map_logits_uuid
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
*args,
**kwargs,
):
ego_map_gt = batch["observations"][self.binned_pc_uuid][
"egocentric_update"
].float()
*_, h, w, c = ego_map_gt.shape
ego_map_gt = ego_map_gt.view(-1, h, w, c).permute(0, 3, 1, 2).contiguous()
ego_map_logits = actor_critic_output.extras[self.map_logits_uuid]
vision_range = ego_map_logits.shape[-1]
ego_map_logits = ego_map_logits.view(-1, c, vision_range, vision_range)
assert ego_map_gt.shape == ego_map_logits.shape
ego_map_gt_thresholded = (ego_map_gt > 0.5).float()
total_loss = F.binary_cross_entropy_with_logits(
ego_map_logits, ego_map_gt_thresholded
)
return (
total_loss,
{"binned_pc_map_ce": total_loss.item()},
)
# FOR DEBUGGING: Save all the ground-truth & predicted maps side by side
# import numpy as np
# import imageio
# for i in range(ego_map_gt_thresholded.shape[0]):
# a = ego_map_gt_thresholded[i].permute(1, 2, 0).flip(0).detach().numpy()
# b = torch.sigmoid(ego_map_logits)[i].permute(1, 2, 0).flip(0).detach().numpy()
#
# imageio.imwrite(
# f"z_occupancy_maps/{i}.png",
# np.concatenate((a, 1 + 0 * a[:, :10], b), axis=1),
# )
class SemanticMapFocalLoss(AbstractActorCriticLoss):
"""A (focal-loss based) loss for training metric maps for free space
prediction.
As semantic maps tend to be quite sparse this loss uses the focal
loss (https://arxiv.org/abs/1708.02002) rather than binary cross
entropy (BCE). If the `gamma` parameter is 0.0 then this is just the
normal BCE, larger values of `gamma` result less and less emphasis
being paid to examples that are already well classified.
"""
def __init__(
self, semantic_map_uuid: str, map_logits_uuid: str, gamma: float = 2.0
):
"""Initializer.
# Parameters
semantic_map_uuid : The uuid of a sensor returning
a dictionary with an "egocentric_update"
key with the same format as returned by
`allenact.embodied_ai.mapping_utils.map_builders.SemanticMapBuilder`. Such a sensor
can be found in the `allenact_plugins` library: see
`allenact_plugins.ithor_plugin.ithor_sensors.SemanticMapTHORSensor`.
map_logits_uuid : key used to index into `actor_critic_output.extras` (returned by the model)
whose value should be a tensor of the same shape as the tensor corresponding to the above
"egocentric_update" key.
"""
super().__init__()
assert gamma >= 0, f"`gamma` (=={gamma}) must be >= 0"
self.semantic_map_uuid = semantic_map_uuid
self.map_logits_uuid = map_logits_uuid
self.gamma = gamma
def loss( # type: ignore
self,
step_count: int,
batch: ObservationType,
actor_critic_output: ActorCriticOutput[CategoricalDistr],
*args,
**kwargs,
):
ego_map_gt = batch["observations"][self.semantic_map_uuid]["egocentric_update"]
ego_map_gt = (
ego_map_gt.view(-1, *ego_map_gt.shape[-3:]).permute(0, 3, 1, 2).contiguous()
)
ego_map_logits = actor_critic_output.extras[self.map_logits_uuid]
ego_map_logits = ego_map_logits.view(-1, *ego_map_logits.shape[-3:])
assert ego_map_gt.shape == ego_map_logits.shape
p = torch.sigmoid(ego_map_logits)
one_minus_p = torch.sigmoid(-ego_map_logits)
log_p = F.logsigmoid(ego_map_logits)
log_one_minus_p = F.logsigmoid(-ego_map_logits)
ego_map_gt = ego_map_gt.float()
total_loss = -(
ego_map_gt * (log_p * (one_minus_p**self.gamma))
+ (1 - ego_map_gt) * (log_one_minus_p * (p**self.gamma))
).mean()
return (
total_loss,
{"sem_map_focal_loss": total_loss.item()},
)
# FOR DEBUGGING: Save all the ground-truth & predicted maps side by side
# import numpy as np
# import imageio
# from allenact.embodiedai.mapping.mapping_utils.map_builders import SemanticMapBuilder
#
# print("\n" * 3)
# for i in range(ego_map_gt.shape[0]):
# pred_sem_map = torch.sigmoid(ego_map_logits)[i].permute(1, 2, 0).flip(0).detach()
# a = SemanticMapBuilder.randomly_color_semantic_map(ego_map_gt[i].permute(1, 2, 0).flip(0).detach())
# b = SemanticMapBuilder.randomly_color_semantic_map(pred_sem_map)
# imageio.imwrite(
# f"z_semantic_maps/{i}.png",
# np.concatenate((a, 255 + a[:, :10] * 0, b), axis=1),
# )
#
================================================
FILE: allenact/embodiedai/mapping/mapping_models/__init__.py
================================================
================================================
FILE: allenact/embodiedai/mapping/mapping_models/active_neural_slam.py
================================================
# MIT License
#
# Original Copyright (c) 2020 Devendra Chaplot
#
# Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import math
from typing import Optional, Tuple, Dict, Any, cast
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from allenact.utils.model_utils import simple_conv_and_linear_weights_init
DEGREES_TO_RADIANS = np.pi / 180.0
RADIANS_TO_DEGREES = 180.0 / np.pi
def _inv_sigmoid(x: torch.Tensor):
return torch.log(x) - torch.log1p(-x)
class ActiveNeuralSLAM(nn.Module):
"""Active Neural SLAM module.
This is an implementation of the Active Neural SLAM module
from:
```
Chaplot, D.S., Gandhi, D., Gupta, S., Gupta, A. and Salakhutdinov, R., 2020.
Learning To Explore Using Active Neural SLAM.
In International Conference on Learning Representations (ICLR).
```
Note that this is purely the mapping component and does not include the planning
components from the above paper.
This implementation is adapted from `https://github.com/devendrachaplot/Neural-SLAM`,
we have extended this implementation to allow for an arbitrary number of output map
channels (enabling semantic mapping).
At a high level, this model takes as input RGB egocentric images and outputs metric
map tensors of shape (# channels) x height x width where height/width correspond to the
ground plane of the environment.
"""
def __init__(
self,
frame_height: int,
frame_width: int,
n_map_channels: int,
resolution_in_cm: int = 5,
map_size_in_cm: int = 2400,
vision_range_in_cm: int = 300,
use_pose_estimation: bool = False,
pretrained_resnet: bool = True,
freeze_resnet_batchnorm: bool = True,
use_resnet_layernorm: bool = False,
):
"""Initialize an Active Neural SLAM module.
# Parameters
frame_height : The height of the RGB images given to this module on calls to `forward`.
frame_width : The width of the RGB images given to this module on calls to `forward`.
n_map_channels : The number of output channels in the output maps.
resolution_in_cm : The resolution of the output map, see `map_size_in_cm`.
map_size_in_cm : The height & width of the map in centimeters. The size of the map
tensor returned on calls to forward will be `map_size_in_cm/resolution_in_cm`. Note
that `map_size_in_cm` must be an divisible by resolution_in_cm.
vision_range_in_cm : Given an RGB image input, this module will transform this image into
an "egocentric map" with height and width equaling `vision_range_in_cm/resolution_in_cm`.
This egocentr map corresponds to the area of the world directly in front of the agent.
This "egocentric map" will be rotated/translated into the allocentric reference frame and
used to update the larger, allocentric, map whose
height and width equal `map_size_in_cm/resolution_in_cm`. Thus this parameter controls
how much of the map will be updated on every step.
use_pose_estimation : Whether or not we should estimate the agent's change in position/rotation.
If `False`, you'll need to provide the ground truth changes in position/rotation.
pretrained_resnet : Whether or not to use ImageNet pre-trained model weights for the ResNet18
backbone.
freeze_resnet_batchnorm : Whether or not the batch normalization layers in the ResNet18 backbone
should be frozen and batchnorm updates disabled. You almost certainly want this to be `True`
as using batch normalization during RL training results in all sorts of issues unless you're
very careful.
use_resnet_layernorm : If you've enabled `freeze_resnet_batchnorm` (recommended) you'll likely want
to normalize the output from the ResNet18 model as we've found that these values can otherwise
grow quite large harming learning.
"""
super(ActiveNeuralSLAM, self).__init__()
self.frame_height = frame_height
self.frame_width = frame_width
self.n_map_channels = n_map_channels
self.resolution_in_cm = resolution_in_cm
self.map_size_in_cm = map_size_in_cm
self.input_channels = 3
self.vision_range_in_cm = vision_range_in_cm
self.dropout = 0.5
self.use_pose_estimation = use_pose_estimation
self.freeze_resnet_batchnorm = freeze_resnet_batchnorm
self.max_abs_map_logit_value = 20
# Visual Encoding
resnet = models.resnet18(pretrained=pretrained_resnet)
self.resnet_l5 = nn.Sequential(*list(resnet.children())[0:8])
self.conv = nn.Sequential(
*filter(bool, [nn.Conv2d(512, 64, (1, 1), stride=(1, 1)), nn.ReLU()])
)
self.bn_modules = [
module
for module in self.resnet_l5.modules()
if "BatchNorm" in type(module).__name__
]
if freeze_resnet_batchnorm:
for bn in self.bn_modules:
bn.momentum = 0
# Layernorm (if requested)
self.use_resnet_layernorm = use_resnet_layernorm
if self.use_resnet_layernorm:
assert (
self.freeze_resnet_batchnorm
), "When using layernorm, we require that set `freeze_resnet_batchnorm` to True."
self.resnet_normalizer = nn.Sequential(
nn.Conv2d(512, 512, 1),
nn.LayerNorm(
normalized_shape=[512, 7, 7],
elementwise_affine=True,
),
)
self.resnet_normalizer.apply(simple_conv_and_linear_weights_init)
else:
self.resnet_normalizer = nn.Identity()
# convolution output size
input_test = torch.randn(
1, self.input_channels, self.frame_height, self.frame_width
)
# Have to explicitly call .forward to get past LGTM checks as it thinks nn.Sequential isn't callable
conv_output = self.conv.forward(self.resnet_l5.forward(input_test))
self.conv_output_size = conv_output.view(-1).size(0)
# projection layer
self.proj1 = nn.Linear(self.conv_output_size, 1024)
assert self.vision_range % 8 == 0
self.deconv_in_height = self.vision_range // 8
self.deconv_in_width = self.deconv_in_height
self.n_input_channels_for_deconv = 64
proj2_out_size = 64 * self.deconv_in_height * self.deconv_in_width
self.proj2 = nn.Linear(1024, proj2_out_size)
if self.dropout > 0:
self.dropout1 = nn.Dropout(self.dropout)
self.dropout2 = nn.Dropout(self.dropout)
# Deconv layers to predict map
self.deconv = nn.Sequential(
*filter(
bool,
[
nn.ConvTranspose2d(
self.n_input_channels_for_deconv,
32,
(4, 4),
stride=(2, 2),
padding=(1, 1),
),
nn.ReLU(),
nn.ConvTranspose2d(32, 16, (4, 4), stride=(2, 2), padding=(1, 1)),
nn.ReLU(),
nn.ConvTranspose2d(
16, self.n_map_channels, (4, 4), stride=(2, 2), padding=(1, 1)
),
],
)
)
# Pose Estimator
self.pose_conv = nn.Sequential(
nn.Conv2d(2 * self.n_map_channels, 64, (4, 4), stride=(2, 2)),
nn.ReLU(inplace=True),
nn.Conv2d(64, 32, (4, 4), stride=(2, 2)),
nn.ReLU(inplace=True),
nn.Conv2d(32, 16, (3, 3), stride=(1, 1)),
nn.ReLU(inplace=True),
nn.Flatten(),
)
self.pose_conv_output_dim = (
self.pose_conv.forward(
torch.zeros(
1, 2 * self.n_map_channels, self.vision_range, self.vision_range
)
)
.view(-1)
.size(0)
)
# projection layer
self.pose_proj1 = nn.Linear(self.pose_conv_output_dim, 1024)
self.pose_proj2_x = nn.Linear(1024, 128)
self.pose_proj2_z = nn.Linear(1024, 128)
self.pose_proj2_o = nn.Linear(1024, 128)
self.pose_proj3_x = nn.Linear(128, 1)
self.pose_proj3_y = nn.Linear(128, 1)
self.pose_proj3_o = nn.Linear(128, 1)
if self.dropout > 0:
self.pose_dropout1 = nn.Dropout(self.dropout)
self.train()
@property
def device(self):
d = self.pose_proj1.weight.get_device()
if d < 0:
return torch.device("cpu")
return torch.device(d)
def train(self, mode: bool = True):
super().train(mode=mode)
if mode and self.freeze_resnet_batchnorm:
for module in self.bn_modules:
module.eval()
@property
def map_size(self):
return self.map_size_in_cm // self.resolution_in_cm
@property
def vision_range(self):
return self.vision_range_in_cm // self.resolution_in_cm
def image_to_egocentric_map_logits(
self,
images: Optional[torch.Tensor],
resnet_image_features: Optional[torch.Tensor] = None,
):
if resnet_image_features is None:
bs, _, _, _ = images.size()
resnet_image_features = self.resnet_normalizer(
self.resnet_l5(images[:, :3, :, :])
)
else:
bs = resnet_image_features.shape[0]
conv_output = self.conv(resnet_image_features)
proj1 = F.relu(self.proj1(conv_output.reshape(-1, self.conv_output_size)))
if self.dropout > 0:
proj1 = self.dropout1(proj1)
proj3 = F.relu(self.proj2(proj1))
deconv_input = proj3.view(
bs,
self.n_input_channels_for_deconv,
self.deconv_in_height,
self.deconv_in_width,
)
deconv_output = self.deconv(deconv_input)
return deconv_output
def allocentric_map_to_egocentric_view(
self, allocentric_map: torch.Tensor, xzr: torch.Tensor, padding_mode: str
):
# Index the egocentric viewpoints at the given xzr locations
with torch.no_grad():
allocentric_map = allocentric_map.float()
xzr = xzr.float()
theta = xzr[:, 2].float() * float(np.pi / 180)
# Here form the rotation matrix
cos_theta = torch.cos(theta)
sin_theta = torch.sin(theta)
rot_mat = torch.stack(
(
torch.stack((cos_theta, -sin_theta), -1),
torch.stack((sin_theta, cos_theta), -1),
),
1,
)
scaler = 2 * (100 / (self.resolution_in_cm * self.map_size))
offset_to_center_the_agent = scaler * xzr[:, :2].unsqueeze(-1) - 1
offset_to_top_of_image = rot_mat @ torch.FloatTensor([0, 1.0]).unsqueeze(
1
).to(self.device)
rotation_and_translate_mat = torch.cat(
(
rot_mat,
offset_to_top_of_image + offset_to_center_the_agent,
),
dim=-1,
)
ego_map = F.grid_sample(
allocentric_map,
F.affine_grid(
rotation_and_translate_mat.to(self.device),
allocentric_map.shape,
),
padding_mode=padding_mode,
align_corners=False,
)
vr = self.vision_range
half_vr = vr // 2
center = self.map_size_in_cm // (2 * self.resolution_in_cm)
cropped = ego_map[:, :, :vr, (center - half_vr) : (center + half_vr)]
return cropped
def estimate_egocentric_dx_dz_dr(
self,
map_probs_egocentric: torch.Tensor,
last_map_probs_egocentric: torch.Tensor,
):
assert last_map_probs_egocentric.shape == map_probs_egocentric.shape
pose_est_input = torch.cat(
(map_probs_egocentric.detach(), last_map_probs_egocentric.detach()), dim=1
)
pose_conv_output = self.pose_conv(pose_est_input)
proj1 = F.relu(self.pose_proj1(pose_conv_output))
if self.dropout > 0:
proj1 = self.pose_dropout1(proj1)
proj2_x = F.relu(self.pose_proj2_x(proj1))
pred_dx = self.pose_proj3_x(proj2_x)
proj2_z = F.relu(self.pose_proj2_z(proj1))
pred_dz = self.pose_proj3_y(proj2_z)
proj2_o = F.relu(self.pose_proj2_o(proj1))
pred_do = self.pose_proj3_o(proj2_o)
return torch.cat((pred_dx, pred_dz, pred_do), dim=1)
@staticmethod
def update_allocentric_xzrs_with_egocentric_movement(
last_xzrs_allocentric: torch.Tensor,
dx_dz_drs_egocentric: torch.Tensor,
):
new_xzrs_allocentric = last_xzrs_allocentric.clone()
theta = new_xzrs_allocentric[:, 2] * DEGREES_TO_RADIANS
sin_theta = torch.sin(theta)
cos_theta = torch.cos(theta)
new_xzrs_allocentric[:, :2] += torch.matmul(
torch.stack([cos_theta, -sin_theta, sin_theta, cos_theta], dim=-1).view(
-1, 2, 2
),
dx_dz_drs_egocentric[:, :2].unsqueeze(-1),
).squeeze(-1)
new_xzrs_allocentric[:, 2] += dx_dz_drs_egocentric[:, 2]
new_xzrs_allocentric[:, 2] = (
torch.fmod(new_xzrs_allocentric[:, 2] - 180.0, 360.0) + 180.0
)
new_xzrs_allocentric[:, 2] = (
torch.fmod(new_xzrs_allocentric[:, 2] + 180.0, 360.0) - 180.0
)
return new_xzrs_allocentric
def forward(
self,
images: Optional[torch.Tensor],
last_map_probs_allocentric: Optional[torch.Tensor],
last_xzrs_allocentric: Optional[torch.Tensor],
dx_dz_drs_egocentric: Optional[torch.Tensor],
last_map_logits_egocentric: Optional[torch.Tensor],
return_allocentric_maps=True,
resnet_image_features: Optional[torch.Tensor] = None,
) -> Dict[str, Any]:
"""Create allocentric/egocentric maps predictions given RGB image
inputs.
Here it is assumed that `last_xzrs_allocentric` has been re-centered so that (x, z) == (0,0)
corresponds to the top left of the returned map (with increasing x/z moving to the bottom right of the map).
Note that all maps are oriented so that:
* **Increasing x values** correspond to **increasing columns** in the map(s).
* **Increasing z values** correspond to **increasing rows** in the map(s).
Note that this may seem a bit weird as:
* "north" is pointing downwards in the map,
* if you picture yourself as the agent facing north (i.e. down) in the map, then moving to the right from
the agent's perspective will correspond to **increasing** which column the agent is at:
```
agent facing downwards - - > (dir. to the right of the agent, i.e. moving right corresponds to +cols)
|
|
v (dir. agent faces, i.e. moving ahead corresponds to +rows)
```
This may be the opposite of what you expect.
# Parameters
images : A (# batches) x 3 x height x width tensor of RGB images. These should be
normalized for use with a resnet model. See [here](https_DOC_COLON_//pytorch.org/vision/stable/models.html)
for information (see also the `use_resnet_normalization` parameter of the
`allenact.base_abstractions.sensor.RGBSensor` sensor).
last_map_probs_allocentric : A (# batches) x (map channels) x (map height) x (map width)
tensor representing the colllection of allocentric maps to be updated.
last_xzrs_allocentric : A (# batches) x 3 tensor where `last_xzrs_allocentric[_DOC_COLON_, 0]`
are the agent's (allocentric) x-coordinates on the previous step,
`last_xzrs_allocentric[_DOC_COLON_, 1]` are the agent's (allocentric) z-coordinates from the previous
step, and `last_xzrs_allocentric[_DOC_COLON_, 2]` are the agent's rotations (allocentric, in degrees)
from the prevoius step.
dx_dz_drs_egocentric : A (# batches) x 3 tensor representing the agent's change in x (in meters), z (in meters),
and rotation (in degrees) from the previous step. Note that these changes are "egocentric" so that if the
agent moved 1 meter ahead from it's perspective this should correspond to a dz of +1.0 regardless of
the agent's orientation (similarly moving right would result in a dx of +1.0). This
is ignored (and thus can be `None`) if you are using pose estimation
(i.e. `self.use_pose_estimation` is `True`) or if `return_allocentric_maps` is `False`.
last_map_logits_egocentric : The "egocentric_update" output when calling this function
on the last agent's step. I.e. this should be the egocentric map view of the agent
from the last step. This is used to compute the change in the agent's position rotation.
This is ignored (and thus can be `None`) if you do not wish to estimate the agent's pose
(i.e. `self.use_pose_estimation` is `False`).
return_allocentric_maps : Whether or not to generate new allocentric maps given `last_map_probs_allocentric`
and the new map estimates. Creating these new allocentric maps is expensive so better avoided when
not needed.
resnet_image_features : Sometimes you may wish to compute the ResNet image features yourself for use
in another part of your model. Rather than having to recompute them multiple times, you can
instead compute them once and pass them into this forward call (in this case the input `images`
parameter is ignored). Note that if you're using the `self.resnet_l5` module to compute these
features, be sure to also normalize them with `self.resnet_normalizer` if you have opted to
`use_resnet_layernorm` when initializing this module).
# Returns
A dictionary with keys/values:
* "egocentric_update" - The egocentric map view for the given RGB image. This is what should
be used for computing losses in general.
* "map_logits_probs_update_no_grad" - The egocentric map view after it has been
rotated, translated, and moved into a full-sized allocentric map. This map has been
detached from the computation graph and so should not be used for gradient computations.
This will be `None` if `return_allocentric_maps` was `False`.
* "map_logits_probs_no_grad" - The newly updated allocentric map, this corresponds to
performing a pointwise maximum between `last_map_probs_allocentric` and the
above returned `map_probs_allocentric_update_no_grad`.
This will be `None` if `return_allocentric_maps` was `False`.
* "dx_dz_dr_egocentric_preds" - The predicted change in x, z, and rotation of the agent (from the
egocentric perspective of the agent).
* "xzr_allocentric_preds" - The (predicted if `self.use_pose_estimation == True`) allocentric
(x, z) position and rotation of the agent. This will equal `None` if `self.use_pose_estimation == False`
and `dx_dz_drs_egocentric` is `None`.
"""
# TODO: For consistency we should update things so that:
# "Furthermore, the rotation component of `last_xzrs_allocentric` and `dx_dz_drs_egocentric`
# should be specified in **degrees* with positive rotation corresponding to a **CLOCKWISE**
# rotation (this is the default used by the many game engines)."
map_logits_egocentric = self.image_to_egocentric_map_logits(
images=images, resnet_image_features=resnet_image_features
)
map_probs_egocentric = torch.sigmoid(map_logits_egocentric)
dx_dz_dr_egocentric_preds = None
if last_map_logits_egocentric is not None:
dx_dz_dr_egocentric_preds = self.estimate_egocentric_dx_dz_dr(
map_probs_egocentric=map_probs_egocentric,
last_map_probs_egocentric=torch.sigmoid(last_map_logits_egocentric),
)
if self.use_pose_estimation:
updated_xzrs_allocentrc = (
self.update_allocentric_xzrs_with_egocentric_movement(
last_xzrs_allocentric=last_xzrs_allocentric,
dx_dz_drs_egocentric=dx_dz_dr_egocentric_preds,
)
)
elif dx_dz_drs_egocentric is not None:
updated_xzrs_allocentrc = (
self.update_allocentric_xzrs_with_egocentric_movement(
last_xzrs_allocentric=last_xzrs_allocentric,
dx_dz_drs_egocentric=dx_dz_drs_egocentric,
)
)
else:
updated_xzrs_allocentrc = None
if return_allocentric_maps:
# Aggregate egocentric map prediction in the allocentric map
# using the predicted pose (if `self.use_pose_estimation`) or the ground
# truth pose (if not `self.use_pose_estimation`)
with torch.no_grad():
# Rotate and translate the egocentric map view, we do this grid sampling
# at the level of probabilities as bad results can occur at the logit level
full_size_allocentric_map_probs_update = (
_move_egocentric_map_view_into_allocentric_position(
map_probs_egocentric=map_probs_egocentric,
xzrs_allocentric=updated_xzrs_allocentrc,
allocentric_map_height_width=(self.map_size, self.map_size),
resolution_in_cm=self.resolution_in_cm,
)
)
map_probs_allocentric = torch.max(
last_map_probs_allocentric, full_size_allocentric_map_probs_update
)
else:
full_size_allocentric_map_probs_update = None
map_probs_allocentric = None
return {
"egocentric_update": map_logits_egocentric,
"map_probs_allocentric_update_no_grad": full_size_allocentric_map_probs_update,
"map_probs_allocentric_no_grad": map_probs_allocentric,
"dx_dz_dr_egocentric_preds": dx_dz_dr_egocentric_preds,
"xzr_allocentric_preds": updated_xzrs_allocentrc,
}
def _move_egocentric_map_view_into_allocentric_position(
map_probs_egocentric: torch.Tensor,
xzrs_allocentric: torch.Tensor,
allocentric_map_height_width: Tuple[int, int],
resolution_in_cm: float,
):
"""Translate/rotate an egocentric map view into an allocentric map.
Let's say you have a collection of egocentric maps in a tensor of shape
`(# batches) x (# channels) x (# ego rows) x (# ego columns)`
where these are "egocentric" as we assume the agent is always
at the center of the map and facing "downwards", namely
* **ahead** of the agent should correspond to **increasing rows** in the map(s).
* **right** of the agent should correspond to **increasing columns** in the map(s).
Note that the above is a bit weird as, if you picture yourself as the agent facing
downwards in the map, then moving to the right from the agent perspective. Here's how things
should look if you plotted one of these egocentric maps:
```
center of map - - > (dir. to the right of the agent, i.e. moving right corresponds to +cols)
|
|
v (dir. agent faces, i.e. moving ahead corresponds to +rows)
```
This function is used to translate/rotate the above ego maps so that
they are in the right position/rotation in an allocentric map of size
`(# batches) x (# channels) x (# allocentric_map_height_width[0]) x (# allocentric_map_height_width[1])`.
Adapted from the get_grid function in https://github.com/devendrachaplot/Neural-SLAM.
# Parameters
map_probs_egocentric : Egocentric map views.
xzrs_allocentric : (# batches)x3 tensor with `xzrs_allocentric[:, 0]` being the x-coordinates (in meters),
`xzrs_allocentric[:, 1]` being the z-coordinates (in meters), and `xzrs_allocentric[:, 2]` being the rotation
(in degrees) of the agent in the allocentric reference frame. Here it is assumed that `xzrs_allocentric` has
been re-centered so that (x, z) == (0,0) corresponds to the top left of the returned map (with increasing
x/z moving to the bottom right of the map). Note that positive rotations are in the counterclockwise direction.
allocentric_map_height_width : Height/width of the allocentric map to be returned
resolution_in_cm : Resolution (in cm) of map to be returned (and of map_probs_egocentric). I.e.
`map_probs_egocentric[0,0,0:1,0:1]` should correspond to a `resolution_in_cm x resolution_in_cm`
square on the ground plane in the world.
# Returns
`(# batches) x (# channels) x (# allocentric_map_height_width[0]) x (# allocentric_map_height_width[1])`
tensor where the input `map_probs_egocentric` maps have been rotated/translated so that they
are in the positions specified by `xzrs_allocentric`.
"""
# TODO: For consistency we should update the rotations so they are in the clockwise direction.
# First we place the egocentric map view into the center
# of a map that has the same size as the allocentric map
nbatch, c, ego_h, ego_w = cast(
Tuple[int, int, int, int], map_probs_egocentric.shape
)
allo_h, allo_w = allocentric_map_height_width
max_view_range = math.sqrt((ego_w / 2.0) ** 2 + ego_h**2)
if min(allo_h, allo_w) / 2.0 < max_view_range:
raise NotImplementedError(
f"The shape of your egocentric view (ego_h, ego_w)==({ego_h, ego_w})"
f" is too large relative the size of the allocentric map (allo_h, allo_w)==({allo_h}, {allo_w})."
f" The height/width of your allocentric map should be at least {2 * max_view_range} to allow"
f" for no information to be lost when rotating the egocentric map."
)
full_size_ego_map_update_probs = map_probs_egocentric.new(
nbatch, c, *allocentric_map_height_width
).fill_(0)
assert (ego_h % 2, ego_w % 2, allo_h % 2, allo_w % 2) == (
0,
) * 4, "All map heights/widths should be divisible by 2."
x1 = allo_w // 2 - ego_w // 2
x2 = x1 + ego_w
z1 = allo_h // 2
z2 = z1 + ego_h
full_size_ego_map_update_probs[:, :, z1:z2, x1:x2] = map_probs_egocentric
# Now we'll rotate and translate `full_size_ego_map_update_probs`
# so that the egocentric map view is positioned where it should be
# in the allocentric coordinate frame
# To do this we first need to rescale our allocentric xz coordinates
# so that the center of the map is (0,0) and the top left corner is (-1, -1)
# as this is what's expected by the `affine_grid` function below.
rescaled_xzrs_allocentric = xzrs_allocentric.clone().detach().float()
rescaled_xzrs_allocentric[:, :2] *= (
100.0 / resolution_in_cm
) # Put x / z into map units rather than meters
rescaled_xzrs_allocentric[:, 0] /= allo_w / 2 # x corresponds to columns
rescaled_xzrs_allocentric[:, 1] /= allo_h / 2 # z corresponds to rows
rescaled_xzrs_allocentric[:, :2] -= 1.0 # Re-center
x = rescaled_xzrs_allocentric[:, 0]
z = rescaled_xzrs_allocentric[:, 1]
theta = (
-rescaled_xzrs_allocentric[:, 2] * DEGREES_TO_RADIANS
) # Notice the negative sign
cos_theta = theta.cos()
sin_theta = theta.sin()
zeroes = torch.zeros_like(cos_theta)
ones = torch.ones_like(cos_theta)
theta11 = torch.stack([cos_theta, -sin_theta, zeroes], 1)
theta12 = torch.stack([sin_theta, cos_theta, zeroes], 1)
theta1 = torch.stack([theta11, theta12], 1)
theta21 = torch.stack([ones, zeroes, x], 1)
theta22 = torch.stack([zeroes, ones, z], 1)
theta2 = torch.stack([theta21, theta22], 1)
grid_size = [nbatch, c, allo_h, allo_w]
rot_grid = F.affine_grid(theta1, grid_size)
trans_grid = F.affine_grid(theta2, grid_size)
return F.grid_sample(
F.grid_sample(
full_size_ego_map_update_probs,
rot_grid,
padding_mode="zeros",
align_corners=False,
),
trans_grid,
padding_mode="zeros",
align_corners=False,
)
================================================
FILE: allenact/embodiedai/mapping/mapping_utils/__init__.py
================================================
================================================
FILE: allenact/embodiedai/mapping/mapping_utils/map_builders.py
================================================
# MIT License
#
# Original Copyright (c) 2020 Devendra Chaplot
#
# Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import random
from typing import Optional, Sequence, Union, Dict
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import (
depth_frame_to_world_space_xyz,
project_point_cloud_to_map,
)
class BinnedPointCloudMapBuilder(object):
"""Class used to iteratively construct a map of "free space" based on input
depth maps (i.e. pointclouds).
Adapted from https://github.com/devendrachaplot/Neural-SLAM
This class can be used to (iteratively) construct a metric map of free space in an environment as
an agent moves around. After every step the agent takes, you should call the `update` function and
pass the agent's egocentric depth image along with the agent's new position. This depth map will
be converted into a pointcloud, binned along the up/down axis, and then projected
onto a 3-dimensional tensor of shape (HxWxC) whose where HxW represent the ground plane
and where C equals the number of bins the up-down coordinate was binned into. This 3d map counts the
number of points in each bin. Thus a lack of points within a region can be used to infer that
that region is free space.
# Attributes
fov : FOV of the camera used to produce the depth images given when calling `update`.
vision_range_in_map_units : The maximum distance (in number of rows/columns) that will
be updated when calling `update`, points outside of this map vision range are ignored.
map_size_in_cm : Total map size in cm.
resolution_in_cm : Number of cm per row/column in the map.
height_bins : The bins used to bin the up-down coordinate (for us the y-coordinate). For example,
if `height_bins = [0.1, 1]` then
all y-values < 0.1 will be mapped to 0, all y values in [0.1, 1) will be mapped to 1, and
all y-values >= 1 will be mapped to 2.
**Importantly:** these y-values will first be recentered by the `min_xyz` value passed when
calling `reset(...)`.
device : A `torch.device` on which to run computations. If this device is a GPU you can potentially
obtain significant speed-ups.
"""
def __init__(
self,
fov: float,
vision_range_in_cm: int,
map_size_in_cm: int,
resolution_in_cm: int,
height_bins: Sequence[float],
return_egocentric_local_context: bool = False,
device: torch.device = torch.device("cpu"),
):
assert vision_range_in_cm % resolution_in_cm == 0
self.fov = fov
self.vision_range_in_map_units = vision_range_in_cm // resolution_in_cm
self.map_size_in_cm = map_size_in_cm
self.resolution_in_cm = resolution_in_cm
self.height_bins = height_bins
self.device = device
self.return_egocentric_local_context = return_egocentric_local_context
self.binned_point_cloud_map = np.zeros(
(
self.map_size_in_cm // self.resolution_in_cm,
self.map_size_in_cm // self.resolution_in_cm,
len(self.height_bins) + 1,
),
dtype=np.float32,
)
self.min_xyz: Optional[np.ndarray] = None
def update(
self,
depth_frame: np.ndarray,
camera_xyz: np.ndarray,
camera_rotation: float,
camera_horizon: float,
) -> Dict[str, np.ndarray]:
"""Updates the map with the input depth frame from the agent.
See the `allenact.embodiedai.mapping.mapping_utils.point_cloud_utils.project_point_cloud_to_map`
function for more information input parameter definitions. **We assume that the input
`depth_frame` has depths recorded in meters**.
# Returns
Let `map_size = self.map_size_in_cm // self.resolution_in_cm`. Returns a dictionary with keys-values:
* `"egocentric_update"` - A tensor of shape
`(vision_range_in_map_units)x(vision_range_in_map_units)x(len(self.height_bins) + 1)` corresponding
to the binned pointcloud after having been centered on the agent and rotated so that
points ahead of the agent correspond to larger row indices and points further to the right of the agent
correspond to larger column indices. Note that by "centered" we mean that one can picture
the agent as being positioned at (0, vision_range_in_map_units/2) and facing downward. Each entry in this tensor
is a count equaling the number of points in the pointcloud that, once binned, fell into this
entry. This is likely the output you want to use if you want to build a model to predict free space from an image.
* `"allocentric_update"` - A `(map_size)x(map_size)x(len(self.height_bins) + 1)` corresponding
to `"egocentric_update"` but rotated to the world-space coordinates. This `allocentric_update`
is what is used to update the internally stored representation of the map.
* `"map"` - A `(map_size)x(map_size)x(len(self.height_bins) + 1)` tensor corresponding
to the sum of all `"allocentric_update"` values since the last `reset()`.
```
"""
with torch.no_grad():
assert self.min_xyz is not None, "Please call `reset` before `update`."
camera_xyz = (
torch.from_numpy(camera_xyz - self.min_xyz).float().to(self.device)
)
try:
depth_frame = torch.from_numpy(depth_frame).to(self.device)
except ValueError:
depth_frame = torch.from_numpy(depth_frame.copy()).to(self.device)
depth_frame[
depth_frame
> self.vision_range_in_map_units * self.resolution_in_cm / 100
] = np.NaN
world_space_point_cloud = depth_frame_to_world_space_xyz(
depth_frame=depth_frame,
camera_world_xyz=camera_xyz,
rotation=camera_rotation,
horizon=camera_horizon,
fov=self.fov,
)
world_binned_map_update = project_point_cloud_to_map(
xyz_points=world_space_point_cloud,
bin_axis="y",
bins=self.height_bins,
map_size=self.binned_point_cloud_map.shape[0],
resolution_in_cm=self.resolution_in_cm,
flip_row_col=True,
)
# Center the cloud on the agent
recentered_point_cloud = world_space_point_cloud - (
torch.FloatTensor([1.0, 0.0, 1.0]).to(self.device) * camera_xyz
).reshape((1, 1, 3))
# Rotate the cloud so that positive-z is the direction the agent is looking
theta = (
np.pi * camera_rotation / 180
) # No negative since THOR rotations are already backwards
cos_theta = np.cos(theta)
sin_theta = np.sin(theta)
rotation_transform = torch.FloatTensor(
[
[cos_theta, 0, -sin_theta],
[0, 1, 0], # unchanged
[sin_theta, 0, cos_theta],
]
).to(self.device)
rotated_point_cloud = recentered_point_cloud @ rotation_transform.T
xoffset = (self.map_size_in_cm / 100) / 2
agent_centric_point_cloud = rotated_point_cloud + torch.FloatTensor(
[xoffset, 0, 0]
).to(self.device)
allocentric_update_numpy = world_binned_map_update.cpu().numpy()
self.binned_point_cloud_map = (
self.binned_point_cloud_map + allocentric_update_numpy
)
agent_centric_binned_map = project_point_cloud_to_map(
xyz_points=agent_centric_point_cloud,
bin_axis="y",
bins=self.height_bins,
map_size=self.binned_point_cloud_map.shape[0],
resolution_in_cm=self.resolution_in_cm,
flip_row_col=True,
)
vr = self.vision_range_in_map_units
vr_div_2 = self.vision_range_in_map_units // 2
width_div_2 = agent_centric_binned_map.shape[1] // 2
agent_centric_binned_map = agent_centric_binned_map[
:vr, (width_div_2 - vr_div_2) : (width_div_2 + vr_div_2), :
]
to_return = {
"egocentric_update": agent_centric_binned_map.cpu().numpy(),
"allocentric_update": allocentric_update_numpy,
"map": self.binned_point_cloud_map,
}
if self.return_egocentric_local_context:
# See the update function of the semantic map sensor for in depth comments regarding the below
# Essentially we are simply rotating the full map into the orientation of the agent and then
# selecting a smaller region around the agent.
theta = -np.pi * camera_rotation / 180
cos_theta = np.cos(theta)
sin_theta = np.sin(theta)
rot_mat = torch.FloatTensor(
[[cos_theta, -sin_theta], [sin_theta, cos_theta]]
).to(self.device)
move_back_offset = (
-0.5
* (self.vision_range_in_map_units * self.resolution_in_cm / 100)
) * (
rot_mat
@ torch.tensor(
[0, 1], dtype=torch.float, device=self.device
).unsqueeze(-1)
)
map_size = self.binned_point_cloud_map.shape[0]
scaler = 2 * (100 / (self.resolution_in_cm * map_size))
offset_to_center_the_agent = (
scaler
* (
torch.tensor(
[
camera_xyz[0],
camera_xyz[2],
],
dtype=torch.float,
device=self.device,
).unsqueeze(-1)
+ move_back_offset
)
- 1
)
offset_to_top_of_image = rot_mat @ torch.FloatTensor(
[0, 1.0]
).unsqueeze(1).to(self.device)
rotation_and_translate_mat = torch.cat(
(
rot_mat,
offset_to_top_of_image + offset_to_center_the_agent,
),
dim=1,
)
full_map_tensor = (
torch.tensor(
self.binned_point_cloud_map,
dtype=torch.float,
device=self.device,
)
.unsqueeze(0)
.permute(0, 3, 1, 2)
)
full_ego_map = (
F.grid_sample(
full_map_tensor,
F.affine_grid(
rotation_and_translate_mat.to(self.device).unsqueeze(0),
full_map_tensor.shape,
align_corners=False,
),
align_corners=False,
)
.squeeze(0)
.permute(1, 2, 0)
)
egocentric_local_context = full_ego_map[
:vr, (width_div_2 - vr_div_2) : (width_div_2 + vr_div_2), :
]
to_return["egocentric_local_context"] = (
egocentric_local_context.cpu().numpy()
)
return to_return
def reset(self, min_xyz: np.ndarray):
"""Reset the map.
Resets the internally stored map.
# Parameters
min_xyz : An array of size (3,) corresponding to the minimum possible x, y, and z values that will be observed
as a point in a pointcloud when calling `.update(...)`. The (world-space) maps returned by calls to `update`
will have been normalized so the (0,0,:) entry corresponds to these minimum values.
"""
self.min_xyz = min_xyz
self.binned_point_cloud_map = np.zeros_like(self.binned_point_cloud_map)
class ObjectHull2d:
def __init__(
self,
object_id: str,
object_type: str,
hull_points: Union[np.ndarray, Sequence[Sequence[float]]],
):
"""A class used to represent 2d convex hulls of objects when projected
to the ground plane.
# Parameters
object_id : A unique id for the object.
object_type : The type of the object.
hull_points : A Nx2 matrix with `hull_points[:, 0]` being the x coordinates and `hull_points[:, 1]` being
the `z` coordinates (this is using the Unity game engine conventions where the `y` axis is up/down).
"""
self.object_id = object_id
self.object_type = object_type
self.hull_points = (
hull_points
if isinstance(hull_points, np.ndarray)
else np.array(hull_points)
)
class SemanticMapBuilder(object):
"""Class used to iteratively construct a semantic map based on input depth
maps (i.e. pointclouds).
Adapted from https://github.com/devendrachaplot/Neural-SLAM
This class can be used to (iteratively) construct a semantic map of objects in the environment.
This map is similar to that generated by `BinnedPointCloudMapBuilder` (see its documentation for
more information) but the various channels correspond to different object types. Thus
if the `(i,j,k)` entry of a map generated by this function is `True`, this means that an
object of type `k` is present in position `i,j` in the map. In particular, by "present" we mean that,
after projecting the object to the ground plane and taking the convex hull of the resulting
2d object, a non-trivial portion of this convex hull overlaps the `i,j` position.
For attribute information, see the documentation of the `BinnedPointCloudMapBuilder` class. The
only attribute present in this class that is not present in `BinnedPointCloudMapBuilder` is
`ordered_object_types` which corresponds to a list of unique object types where
object type `ordered_object_types[i]` will correspond to the `i`th channel of the map
generated by this class.
"""
def __init__(
self,
fov: float,
vision_range_in_cm: int,
map_size_in_cm: int,
resolution_in_cm: int,
ordered_object_types: Sequence[str],
device: torch.device = torch.device("cpu"),
):
self.fov = fov
self.vision_range_in_map_units = vision_range_in_cm // resolution_in_cm
self.map_size_in_cm = map_size_in_cm
self.resolution_in_cm = resolution_in_cm
self.ordered_object_types = tuple(ordered_object_types)
self.device = device
self.object_type_to_index = {
ot: i for i, ot in enumerate(self.ordered_object_types)
}
self.ground_truth_semantic_map = np.zeros(
(
self.map_size_in_cm // self.resolution_in_cm,
self.map_size_in_cm // self.resolution_in_cm,
len(self.ordered_object_types),
),
dtype=np.uint8,
)
self.explored_mask = np.zeros(
(
self.map_size_in_cm // self.resolution_in_cm,
self.map_size_in_cm // self.resolution_in_cm,
1,
),
dtype=bool,
)
self.min_xyz: Optional[np.ndarray] = None
@staticmethod
def randomly_color_semantic_map(
map: Union[np.ndarray, torch.Tensor], threshold: float = 0.5, seed: int = 1
) -> np.ndarray:
if not isinstance(map, np.ndarray):
map = np.array(map)
rnd = random.Random(seed)
semantic_int_mat = (
(map >= threshold)
* np.array(list(range(1, map.shape[-1] + 1))).reshape((1, 1, -1))
).max(-1)
# noinspection PyTypeChecker
return np.uint8(
np.array(
[(0, 0, 0)]
+ [
tuple(rnd.randint(0, 256) for _ in range(3))
for _ in range(map.shape[-1])
]
)[semantic_int_mat]
)
def _xzs_to_colrows(self, xzs: np.ndarray):
height, width, _ = self.ground_truth_semantic_map.shape
return np.clip(
np.int32(
(
(100 / self.resolution_in_cm)
* (xzs - np.array([[self.min_xyz[0], self.min_xyz[2]]]))
)
),
a_min=0,
a_max=np.array(
[width - 1, height - 1]
), # width then height as we're returns cols then rows
)
def build_ground_truth_map(self, object_hulls: Sequence[ObjectHull2d]):
self.ground_truth_semantic_map.fill(0)
height, width, _ = self.ground_truth_semantic_map.shape
for object_hull in object_hulls:
ot = object_hull.object_type
if ot in self.object_type_to_index:
ind = self.object_type_to_index[ot]
self.ground_truth_semantic_map[:, :, ind : (ind + 1)] = (
cv2.fillConvexPoly(
img=np.array(
self.ground_truth_semantic_map[:, :, ind : (ind + 1)],
dtype=np.uint8,
),
points=self._xzs_to_colrows(np.array(object_hull.hull_points)),
color=255,
)
)
def update(
self,
depth_frame: np.ndarray,
camera_xyz: np.ndarray,
camera_rotation: float,
camera_horizon: float,
) -> Dict[str, np.ndarray]:
"""Updates the map with the input depth frame from the agent.
See the documentation for `BinnedPointCloudMapBuilder.update`,
the inputs and outputs are similar except that channels are used
to represent the presence/absence of objects of given types.
Unlike `BinnedPointCloudMapBuilder.update`, this function also
returns two masks with keys `"egocentric_mask"` and `"mask"`
that can be used to determine what portions of the map have been
observed by the agent so far in the egocentric and world-space
reference frames respectively.
"""
with torch.no_grad():
assert self.min_xyz is not None
camera_xyz = torch.from_numpy(camera_xyz - self.min_xyz).to(self.device)
map_size = self.ground_truth_semantic_map.shape[0]
depth_frame = torch.from_numpy(depth_frame).to(self.device)
depth_frame[
depth_frame
> self.vision_range_in_map_units * self.resolution_in_cm / 100
] = np.NaN
world_space_point_cloud = depth_frame_to_world_space_xyz(
depth_frame=depth_frame,
camera_world_xyz=camera_xyz,
rotation=camera_rotation,
horizon=camera_horizon,
fov=self.fov,
)
world_newly_explored = (
project_point_cloud_to_map(
xyz_points=world_space_point_cloud,
bin_axis="y",
bins=[],
map_size=map_size,
resolution_in_cm=self.resolution_in_cm,
flip_row_col=True,
)
> 0.001
)
world_update_and_mask = torch.cat(
(
torch.logical_and(
torch.from_numpy(self.ground_truth_semantic_map).to(
self.device
),
world_newly_explored,
),
world_newly_explored,
),
dim=-1,
).float()
world_update_and_mask_for_sample = world_update_and_mask.unsqueeze(
0
).permute(0, 3, 1, 2)
# We now use grid sampling to rotate world_update_for_sample into the egocentric coordinate
# frame of the agent so that the agent's forward direction is downwards in the tensor
# (and it's right side is to the right in the image, this means that right/left
# when taking the perspective of the agent in the image). This convention aligns with
# what's expected by grid_sample where +x corresponds to +cols and +z corresponds to +rows.
# Here also the rows/cols have been normalized so that the center of the image is at (0,0)
# and the bottom right is at (1,1).
# Mentally you can think of the output from the F.affine_grid function as you wanting
# rotating/translating an axis-aligned square on the image-to-be-sampled and then
# copying whatever is in this square to a new image. Note that the translation always
# happens in the global reference frame after the rotation. We'll start by rotating
# the square so that the the agent's z direction is downwards in the image.
# Since the global axis of the map and the grid sampling are aligned, this requires
# rotating the square by the rotation of the agent. As rotation is negative the usual
# standard in THOR, we need to negate the rotation of the agent.
theta = -np.pi * camera_rotation / 180
# Here form the rotation matrix
cos_theta = np.cos(theta)
sin_theta = np.sin(theta)
rot_mat = torch.FloatTensor(
[[cos_theta, -sin_theta], [sin_theta, cos_theta]]
).to(self.device)
# Now we need to figure out the translation. For an intuitive understanding, we break this
# translation into two different "offsets". The first offset centers the square on the
# agent's current location:
scaler = 2 * (100 / (self.resolution_in_cm * map_size))
offset_to_center_the_agent = (
scaler
* torch.FloatTensor([camera_xyz[0], camera_xyz[2]])
.unsqueeze(-1)
.to(self.device)
- 1
)
# The second offset moves the square in the direction of the agent's z direction
# so that the output image will have the agent's view starting directly at the
# top of the image.
offset_to_top_of_image = rot_mat @ torch.FloatTensor([0, 1.0]).unsqueeze(
1
).to(self.device)
rotation_and_translate_mat = torch.cat(
(
rot_mat,
offset_to_top_of_image + offset_to_center_the_agent,
),
dim=1,
)
ego_update_and_mask = F.grid_sample(
world_update_and_mask_for_sample.to(self.device),
F.affine_grid(
rotation_and_translate_mat.to(self.device).unsqueeze(0),
world_update_and_mask_for_sample.shape,
align_corners=False,
),
align_corners=False,
)
# All that's left now is to crop out the portion of the transformed tensor that we actually
# care about (i.e. the portion corresponding to the agent's `self.vision_range_in_map_units`.
vr = self.vision_range_in_map_units
half_vr = vr // 2
center = self.map_size_in_cm // (2 * self.resolution_in_cm)
cropped = ego_update_and_mask[
:, :, :vr, (center - half_vr) : (center + half_vr)
]
np.logical_or(
self.explored_mask,
world_newly_explored.cpu().numpy(),
out=self.explored_mask,
)
return {
"egocentric_update": cropped[0, :-1].permute(1, 2, 0).cpu().numpy(),
"egocentric_mask": (cropped[0, -1:].view(vr, vr, 1) > 0.001)
.cpu()
.numpy(),
"explored_mask": np.array(self.explored_mask),
"map": np.logical_and(
self.explored_mask, (self.ground_truth_semantic_map > 0)
),
}
def reset(self, min_xyz: np.ndarray, object_hulls: Sequence[ObjectHull2d]):
"""Reset the map.
Resets the internally stored map.
# Parameters
min_xyz : An array of size (3,) corresponding to the minimum possible x, y, and z values that will be observed
as a point in a pointcloud when calling `.update(...)`. The (world-space) maps returned by calls to `update`
will have been normalized so the (0,0,:) entry corresponds to these minimum values.
object_hulls : The object hulls corresponding to objects in the scene. These will be used to
construct the map.
"""
self.min_xyz = min_xyz
self.build_ground_truth_map(object_hulls=object_hulls)
================================================
FILE: allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.py
================================================
# MIT License
#
# Original Copyright (c) 2020 Devendra Chaplot
#
# Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import math
from typing import Optional, Sequence, cast
import numpy as np
import torch
from allenact_plugins.ithor_plugin.ithor_util import vertical_to_horizontal_fov
def camera_space_xyz_to_world_xyz(
camera_space_xyzs: torch.Tensor,
camera_world_xyz: torch.Tensor,
rotation: float,
horizon: float,
) -> torch.Tensor:
"""Transforms xyz coordinates in the camera's coordinate frame to world-
space (global) xyz frame.
This code has been adapted from https://github.com/devendrachaplot/Neural-SLAM.
**IMPORTANT:** We use the conventions from the Unity game engine. In particular:
* A rotation of 0 corresponds to facing north.
* Positive rotations correspond to CLOCKWISE rotations. That is a rotation of 90 degrees corresponds
to facing east. **THIS IS THE OPPOSITE CONVENTION OF THE ONE GENERALLY USED IN MATHEMATICS.**
* When facing NORTH (rotation==0) moving ahead by 1 meter results in the the z coordinate
increasing by 1. Moving to the right by 1 meter corresponds to increasing the x coordinate by 1.
Finally moving upwards by 1 meter corresponds to increasing the y coordinate by 1.
**Having x,z as the ground plane in this way is common in computer graphics but is different than
the usual mathematical convention of having z be "up".**
* The horizon corresponds to how far below the horizontal the camera is facing. I.e. a horizon
of 30 corresponds to the camera being angled downwards at an angle of 30 degrees.
# Parameters
camera_space_xyzs : A 3xN matrix of xyz coordinates in the camera's reference frame.
Here `x, y, z = camera_space_xyzs[:, i]` should equal the xyz coordinates for the ith point.
camera_world_xyz : The camera's xyz position in the world reference frame.
rotation : The world-space rotation (in degrees) of the camera.
horizon : The horizon (in degrees) of the camera.
# Returns
3xN tensor with entry [:, i] is the xyz world-space coordinate corresponding to the camera-space
coordinate camera_space_xyzs[:, i]
"""
# Adapted from https://github.com/devendrachaplot/Neural-SLAM.
# First compute the transformation that points undergo
# due to the camera's horizon
psi = -horizon * np.pi / 180
cos_psi = np.cos(psi)
sin_psi = np.sin(psi)
# fmt: off
horizon_transform = camera_space_xyzs.new(
[
[1, 0, 0], # unchanged
[0, cos_psi, sin_psi],
[0, -sin_psi, cos_psi,],
],
)
# fmt: on
# Next compute the transformation that points undergo
# due to the agent's rotation about the y-axis
phi = -rotation * np.pi / 180
cos_phi = np.cos(phi)
sin_phi = np.sin(phi)
# fmt: off
rotation_transform = camera_space_xyzs.new(
[
[cos_phi, 0, -sin_phi],
[0, 1, 0], # unchanged
[sin_phi, 0, cos_phi],],
)
# fmt: on
# Apply the above transformations
view_points = (rotation_transform @ horizon_transform) @ camera_space_xyzs
# Translate the points w.r.t. the camera's position in world space.
world_points = view_points + camera_world_xyz[:, None]
return world_points
def depth_frame_to_camera_space_xyz(
depth_frame: torch.Tensor, mask: Optional[torch.Tensor], fov: float = 90
) -> torch.Tensor:
"""Transforms a input depth map into a collection of xyz points (i.e. a
point cloud) in the camera's coordinate frame.
# Parameters
depth_frame : A square depth map, i.e. an MxM matrix with entry `depth_frame[i, j]` equaling
the distance from the camera to nearest surface at pixel (i,j).
mask : An optional boolean mask of the same size (MxM) as the input depth. Only values
where this mask are true will be included in the returned matrix of xyz coordinates. If
`None` then no pixels will be masked out (so the returned matrix of xyz points will have
dimension 3x(M*M)
fov: The field of view of the camera.
# Returns
A 3xN matrix with entry [:, i] equalling a the xyz coordinates (in the camera's coordinate
frame) of a point in the point cloud corresponding to the input depth frame.
"""
h, w = depth_frame.shape[:2]
if mask is None:
mask = torch.ones_like(depth_frame, dtype=torch.bool)
# pixel centers
camera_space_yx_offsets = (
torch.stack(torch.where(mask))
+ 0.5 # Offset by 0.5 so that we are in the middle of the pixel
)
# Subtract center
camera_space_yx_offsets[:1] -= h / 2.0
camera_space_yx_offsets[1:] -= w / 2.0
# Make "up" in y be positive
camera_space_yx_offsets[0, :] *= -1
# Put points on the clipping plane
camera_space_yx_offsets[:1] *= (2.0 / h) * math.tan((fov / 2) / 180 * math.pi)
camera_space_yx_offsets[1:] *= (2.0 / w) * math.tan(
(vertical_to_horizontal_fov(fov, height=h, width=w) / 2) / 180 * math.pi
)
# noinspection PyArgumentList
camera_space_xyz = torch.cat(
[
camera_space_yx_offsets[1:, :], # This is x
camera_space_yx_offsets[:1, :], # This is y
torch.ones_like(camera_space_yx_offsets[:1, :]),
],
axis=0,
)
return camera_space_xyz * depth_frame[mask][None, :]
def depth_frame_to_world_space_xyz(
depth_frame: torch.Tensor,
camera_world_xyz: torch.Tensor,
rotation: float,
horizon: float,
fov: float,
):
"""Transforms a input depth map into a collection of xyz points (i.e. a
point cloud) in the world-space coordinate frame.
**IMPORTANT:** We use the conventions from the Unity game engine. In particular:
* A rotation of 0 corresponds to facing north.
* Positive rotations correspond to CLOCKWISE rotations. That is a rotation of 90 degrees corresponds
to facing east. **THIS IS THE OPPOSITE CONVENTION OF THE ONE GENERALLY USED IN MATHEMATICS.**
* When facing NORTH (rotation==0) moving ahead by 1 meter results in the the z coordinate
increasing by 1. Moving to the right by 1 meter corresponds to increasing the x coordinate by 1.
Finally moving upwards by 1 meter corresponds to increasing the y coordinate by 1.
**Having x,z as the ground plane in this way is common in computer graphics but is different than
the usual mathematical convention of having z be "up".**
* The horizon corresponds to how far below the horizontal the camera is facing. I.e. a horizon
of 30 corresponds to the camera being angled downwards at an angle of 30 degrees.
# Parameters
depth_frame : A square depth map, i.e. an MxM matrix with entry `depth_frame[i, j]` equaling
the distance from the camera to nearest surface at pixel (i,j).
mask : An optional boolean mask of the same size (MxM) as the input depth. Only values
where this mask are true will be included in the returned matrix of xyz coordinates. If
`None` then no pixels will be masked out (so the returned matrix of xyz points will have
dimension 3x(M*M)
camera_space_xyzs : A 3xN matrix of xyz coordinates in the camera's reference frame.
Here `x, y, z = camera_space_xyzs[:, i]` should equal the xyz coordinates for the ith point.
camera_world_xyz : The camera's xyz position in the world reference frame.
rotation : The world-space rotation (in degrees) of the camera.
horizon : The horizon (in degrees) of the camera.
fov: The field of view of the camera.
# Returns
A 3xN matrix with entry [:, i] equalling a the xyz coordinates (in the world coordinate
frame) of a point in the point cloud corresponding to the input depth frame.
"""
camera_space_xyz = depth_frame_to_camera_space_xyz(
depth_frame=depth_frame, mask=None, fov=fov
)
world_points = camera_space_xyz_to_world_xyz(
camera_space_xyzs=camera_space_xyz,
camera_world_xyz=camera_world_xyz,
rotation=rotation,
horizon=horizon,
)
return world_points.view(3, *depth_frame.shape).permute(1, 2, 0)
def project_point_cloud_to_map(
xyz_points: torch.Tensor,
bin_axis: str,
bins: Sequence[float],
map_size: int,
resolution_in_cm: int,
flip_row_col: bool,
):
"""Bins an input point cloud into a map tensor with the bins equaling the
channels.
This code has been adapted from https://github.com/devendrachaplot/Neural-SLAM.
# Parameters
xyz_points : (x,y,z) pointcloud(s) as a torch.Tensor of shape (... x height x width x 3).
All operations are vectorized across the `...` dimensions.
bin_axis : Either "x", "y", or "z", the axis which should be binned by the values in `bins`.
If you have generated your point clouds with any of the other functions in the `point_cloud_utils`
module you almost certainly want this to be "y" as this is the default upwards dimension.
bins: The values by which to bin along `bin_axis`, see the `bins` parameter of `np.digitize`
for more info.
map_size : The axes not specified by `bin_axis` will be be divided by `resolution_in_cm / 100`
and then rounded to the nearest integer. They are then expected to have their values
within the interval [0, ..., map_size - 1].
resolution_in_cm: The resolution_in_cm, in cm, of the map output from this function. Every
grid square of the map corresponds to a (`resolution_in_cm`x`resolution_in_cm`) square
in space.
flip_row_col: Should the rows/cols of the map be flipped? See the 'Returns' section below for more
info.
# Returns
A collection of maps of shape (... x map_size x map_size x (len(bins)+1)), note that bin_axis
has been moved to the last index of this returned map, the other two axes stay in their original
order unless `flip_row_col` has been called in which case they are reversed (useful as often
rows should correspond to y or z instead of x).
"""
bin_dim = ["x", "y", "z"].index(bin_axis)
start_shape = xyz_points.shape
xyz_points = xyz_points.reshape([-1, *start_shape[-3:]])
num_clouds, h, w, _ = xyz_points.shape
if not flip_row_col:
new_order = [i for i in [0, 1, 2] if i != bin_dim] + [bin_dim]
else:
new_order = [i for i in [2, 1, 0] if i != bin_dim] + [bin_dim]
uvw_points = cast(
torch.Tensor, torch.stack([xyz_points[..., i] for i in new_order], dim=-1)
)
num_bins = len(bins) + 1
isnotnan = ~torch.isnan(xyz_points[..., 0])
uvw_points_binned: torch.Tensor = torch.cat(
(
torch.round(100 * uvw_points[..., :-1] / resolution_in_cm).long(),
torch.bucketize(
uvw_points[..., -1:].contiguous(), boundaries=uvw_points.new(bins)
),
),
dim=-1,
)
maxes = (
xyz_points.new()
.long()
.new([map_size, map_size, num_bins])
.reshape((1, 1, 1, 3))
)
isvalid = torch.logical_and(
torch.logical_and(
(uvw_points_binned >= 0).all(-1),
(uvw_points_binned < maxes).all(-1),
),
isnotnan,
)
uvw_points_binned_with_index_mat = torch.cat(
(
torch.repeat_interleave(
torch.arange(0, num_clouds).to(xyz_points.device), h * w
).reshape(-1, 1),
uvw_points_binned.reshape(-1, 3),
),
dim=1,
)
uvw_points_binned_with_index_mat[~isvalid.reshape(-1), :] = 0
ind = (
uvw_points_binned_with_index_mat[:, 0] * (map_size * map_size * num_bins)
+ uvw_points_binned_with_index_mat[:, 1] * (map_size * num_bins)
+ uvw_points_binned_with_index_mat[:, 2] * num_bins
+ uvw_points_binned_with_index_mat[:, 3]
)
ind[~isvalid.reshape(-1)] = 0
count = torch.bincount(
ind.view(-1),
isvalid.view(-1).long(),
minlength=num_clouds * map_size * map_size * num_bins,
)
return count.view(*start_shape[:-3], map_size, map_size, num_bins)
################
# FOR DEBUGGNG #
################
# The below functions are versions of the above which, because of their reliance on
# numpy functions, cannot use GPU acceleration. These are possibly useful for debugging,
# performance comparisons, or for validating that the above GPU variants work properly.
def _cpu_only_camera_space_xyz_to_world_xyz(
camera_space_xyzs: np.ndarray,
camera_world_xyz: np.ndarray,
rotation: float,
horizon: float,
):
# Adapted from https://github.com/devendrachaplot/Neural-SLAM.
# view_position = 3, world_points = 3 x N
# NOTE: camera_position is not equal to agent_position!!
# First compute the transformation that points undergo
# due to the camera's horizon
psi = -horizon * np.pi / 180
cos_psi = np.cos(psi)
sin_psi = np.sin(psi)
# fmt: off
horizon_transform = np.array(
[
[1, 0, 0], # unchanged
[0, cos_psi, sin_psi],
[0, -sin_psi, cos_psi,],
],
np.float64,
)
# fmt: on
# Next compute the transformation that points undergo
# due to the agent's rotation about the y-axis
phi = -rotation * np.pi / 180
cos_phi = np.cos(phi)
sin_phi = np.sin(phi)
# fmt: off
rotation_transform = np.array(
[
[cos_phi, 0, -sin_phi],
[0, 1, 0], # unchanged
[sin_phi, 0, cos_phi],],
np.float64,
)
# fmt: on
# Apply the above transformations
view_points = (rotation_transform @ horizon_transform) @ camera_space_xyzs
# Translate the points w.r.t. the camera's position in world space.
world_points = view_points + camera_world_xyz[:, None]
return world_points
def _cpu_only_depth_frame_to_camera_space_xyz(
depth_frame: np.ndarray, mask: Optional[np.ndarray], fov: float = 90
):
""""""
assert (
len(depth_frame.shape) == 2 and depth_frame.shape[0] == depth_frame.shape[1]
), f"depth has shape {depth_frame.shape}, we only support (N, N) shapes for now."
resolution = depth_frame.shape[0]
if mask is None:
mask = np.ones(depth_frame.shape, dtype=bool)
# pixel centers
camera_space_yx_offsets = (
np.stack(np.where(mask))
+ 0.5 # Offset by 0.5 so that we are in the middle of the pixel
)
# Subtract center
camera_space_yx_offsets -= resolution / 2.0
# Make "up" in y be positive
camera_space_yx_offsets[0, :] *= -1
# Put points on the clipping plane
camera_space_yx_offsets *= (2.0 / resolution) * math.tan((fov / 2) / 180 * math.pi)
camera_space_xyz = np.concatenate(
[
camera_space_yx_offsets[1:, :], # This is x
camera_space_yx_offsets[:1, :], # This is y
np.ones_like(camera_space_yx_offsets[:1, :]),
],
axis=0,
)
return camera_space_xyz * depth_frame[mask][None, :]
def _cpu_only_depth_frame_to_world_space_xyz(
depth_frame: np.ndarray,
camera_world_xyz: np.ndarray,
rotation: float,
horizon: float,
fov: float,
):
camera_space_xyz = _cpu_only_depth_frame_to_camera_space_xyz(
depth_frame=depth_frame, mask=None, fov=fov
)
world_points = _cpu_only_camera_space_xyz_to_world_xyz(
camera_space_xyzs=camera_space_xyz,
camera_world_xyz=camera_world_xyz,
rotation=rotation,
horizon=horizon,
)
return world_points.reshape((3, *depth_frame.shape)).transpose((1, 2, 0))
def _cpu_only_project_point_cloud_to_map(
xyz_points: np.ndarray,
bin_axis: str,
bins: Sequence[float],
map_size: int,
resolution_in_cm: int,
flip_row_col: bool,
):
"""Bins points into bins.
Adapted from https://github.com/devendrachaplot/Neural-SLAM.
# Parameters
xyz_points : (x,y,z) point clouds as a np.ndarray of shape (... x height x width x 3). (x,y,z)
should be coordinates specified in meters.
bin_axis : Either "x", "y", or "z", the axis which should be binned by the values in `bins`
bins: The values by which to bin along `bin_axis`, see the `bins` parameter of `np.digitize`
for more info.
map_size : The axes not specified by `bin_axis` will be be divided by `resolution_in_cm / 100`
and then rounded to the nearest integer. They are then expected to have their values
within the interval [0, ..., map_size - 1].
resolution_in_cm: The resolution_in_cm, in cm, of the map output from this function. Every
grid square of the map corresponds to a (`resolution_in_cm`x`resolution_in_cm`) square
in space.
flip_row_col: Should the rows/cols of the map be flipped
# Returns
A collection of maps of shape (... x map_size x map_size x (len(bins)+1)), note that bin_axis
has been moved to the last index of this returned map, the other two axes stay in their original
order unless `flip_row_col` has been called in which case they are reversed (useful if you give
points as often rows should correspond to y or z instead of x).
"""
bin_dim = ["x", "y", "z"].index(bin_axis)
start_shape = xyz_points.shape
xyz_points = xyz_points.reshape([-1, *start_shape[-3:]])
num_clouds, h, w, _ = xyz_points.shape
if not flip_row_col:
new_order = [i for i in [0, 1, 2] if i != bin_dim] + [bin_dim]
else:
new_order = [i for i in [2, 1, 0] if i != bin_dim] + [bin_dim]
uvw_points: np.ndarray = np.stack([xyz_points[..., i] for i in new_order], axis=-1)
num_bins = len(bins) + 1
isnotnan = ~np.isnan(xyz_points[..., 0])
uvw_points_binned = np.concatenate(
(
np.round(100 * uvw_points[..., :-1] / resolution_in_cm).astype(np.int32),
np.digitize(uvw_points[..., -1:], bins=bins).astype(np.int32),
),
axis=-1,
)
maxes = np.array([map_size, map_size, num_bins]).reshape((1, 1, 1, 3))
isvalid = np.logical_and.reduce(
(
(uvw_points_binned >= 0).all(-1),
(uvw_points_binned < maxes).all(-1),
isnotnan,
)
)
uvw_points_binned_with_index_mat = np.concatenate(
(
np.repeat(np.arange(0, num_clouds), h * w).reshape(-1, 1),
uvw_points_binned.reshape(-1, 3),
),
axis=1,
)
uvw_points_binned_with_index_mat[~isvalid.reshape(-1), :] = 0
ind = np.ravel_multi_index(
uvw_points_binned_with_index_mat.transpose(),
(num_clouds, map_size, map_size, num_bins),
)
ind[~isvalid.reshape(-1)] = 0
count = np.bincount(
ind.ravel(),
isvalid.ravel().astype(np.int32),
minlength=num_clouds * map_size * map_size * num_bins,
)
return count.reshape([*start_shape[:-3], map_size, map_size, num_bins])
================================================
FILE: allenact/embodiedai/models/__init__.py
================================================
================================================
FILE: allenact/embodiedai/models/aux_models.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Several of the models defined in this file are modified versions of those
found in https://github.com/joel99/habitat-pointnav-
aux/blob/master/habitat_baselines/"""
import torch
import torch.nn as nn
from allenact.embodiedai.aux_losses.losses import (
InverseDynamicsLoss,
TemporalDistanceLoss,
CPCALoss,
CPCASoftMaxLoss,
)
from allenact.utils.model_utils import FeatureEmbedding
class AuxiliaryModel(nn.Module):
"""The class of defining the models for all kinds of self-supervised
auxiliary tasks."""
def __init__(
self,
aux_uuid: str,
action_dim: int,
obs_embed_dim: int,
belief_dim: int,
action_embed_size: int = 4,
cpca_classifier_hidden_dim: int = 32,
cpca_softmax_dim: int = 128,
):
super().__init__()
self.aux_uuid = aux_uuid
self.action_dim = action_dim
self.obs_embed_dim = obs_embed_dim
self.belief_dim = belief_dim
self.action_embed_size = action_embed_size
self.cpca_classifier_hidden_dim = cpca_classifier_hidden_dim
self.cpca_softmax_dim = cpca_softmax_dim
self.initialize_model_given_aux_uuid(self.aux_uuid)
def initialize_model_given_aux_uuid(self, aux_uuid: str):
if aux_uuid == InverseDynamicsLoss.UUID:
self.init_inverse_dynamics()
elif aux_uuid == TemporalDistanceLoss.UUID:
self.init_temporal_distance()
elif CPCALoss.UUID in aux_uuid: # the CPCA family with various k
self.init_cpca()
elif CPCASoftMaxLoss.UUID in aux_uuid:
self.init_cpca_softmax()
else:
raise ValueError("Unknown Auxiliary Loss UUID")
def init_inverse_dynamics(self):
self.decoder = nn.Linear(
2 * self.obs_embed_dim + self.belief_dim, self.action_dim
)
def init_temporal_distance(self):
self.decoder = nn.Linear(2 * self.obs_embed_dim + self.belief_dim, 1)
def init_cpca(self):
## Auto-regressive model to predict future context
self.action_embedder = FeatureEmbedding(
self.action_dim + 1, self.action_embed_size
)
# NOTE: add extra 1 in embedding dict cuz we will pad zero actions?
self.context_model = nn.GRU(self.action_embed_size, self.belief_dim)
## Classifier to estimate mutual information
self.classifier = nn.Sequential(
nn.Linear(
self.belief_dim + self.obs_embed_dim, self.cpca_classifier_hidden_dim
),
nn.ReLU(),
nn.Linear(self.cpca_classifier_hidden_dim, 1),
)
def init_cpca_softmax(self):
# same as CPCA with extra MLP for contrastive losses.
###
self.action_embedder = FeatureEmbedding(
self.action_dim + 1, self.action_embed_size
)
# NOTE: add extra 1 in embedding dict cuz we will pad zero actions?
self.context_model = nn.GRU(self.action_embed_size, self.belief_dim)
## Classifier to estimate mutual information
self.visual_mlp = nn.Sequential(
nn.Linear(self.obs_embed_dim, self.cpca_classifier_hidden_dim),
nn.ReLU(),
nn.Linear(self.cpca_classifier_hidden_dim, self.cpca_softmax_dim),
)
self.belief_mlp = nn.Sequential(
nn.Linear(self.belief_dim, self.cpca_classifier_hidden_dim),
nn.ReLU(),
nn.Linear(self.cpca_classifier_hidden_dim, self.cpca_softmax_dim),
)
def forward(self, features: torch.FloatTensor):
if self.aux_uuid in [InverseDynamicsLoss.UUID, TemporalDistanceLoss.UUID]:
return self.decoder(features)
else:
raise NotImplementedError(
f"Auxiliary model with UUID {self.aux_uuid} does not support `forward` call."
)
================================================
FILE: allenact/embodiedai/models/basic_models.py
================================================
"""Basic building block torch networks that can be used across a variety of
tasks."""
from typing import (
Sequence,
Dict,
Union,
cast,
List,
Callable,
Optional,
Tuple,
Any,
)
import gym
import numpy as np
import torch
from gym.spaces.dict import Dict as SpaceDict
import torch.nn as nn
from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel, DistributionType
from allenact.base_abstractions.distributions import CategoricalDistr, Distr
from allenact.base_abstractions.misc import ActorCriticOutput, Memory
from allenact.utils.model_utils import make_cnn, compute_cnn_output
from allenact.utils.system import get_logger
class SimpleCNN(nn.Module):
"""A Simple N-Conv CNN followed by a fully connected layer. Takes in
observations (of type gym.spaces.dict) and produces an embedding of the
`rgb_uuid` and/or `depth_uuid` components.
# Attributes
observation_space : The observation_space of the agent, should have `rgb_uuid` or `depth_uuid` as
a component (otherwise it is a blind model).
output_size : The size of the embedding vector to produce.
"""
def __init__(
self,
observation_space: SpaceDict,
output_size: int,
rgb_uuid: Optional[str],
depth_uuid: Optional[str],
layer_channels: Sequence[int] = (32, 64, 32),
kernel_sizes: Sequence[Tuple[int, int]] = ((8, 8), (4, 4), (3, 3)),
layers_stride: Sequence[Tuple[int, int]] = ((4, 4), (2, 2), (1, 1)),
paddings: Sequence[Tuple[int, int]] = ((0, 0), (0, 0), (0, 0)),
dilations: Sequence[Tuple[int, int]] = ((1, 1), (1, 1), (1, 1)),
flatten: bool = True,
output_relu: bool = True,
):
"""Initializer.
# Parameters
observation_space : See class attributes documentation.
output_size : See class attributes documentation.
"""
super().__init__()
self.rgb_uuid = rgb_uuid
if self.rgb_uuid is not None:
assert self.rgb_uuid in observation_space.spaces
self._n_input_rgb = observation_space.spaces[self.rgb_uuid].shape[2]
assert self._n_input_rgb >= 0
else:
self._n_input_rgb = 0
self.depth_uuid = depth_uuid
if self.depth_uuid is not None:
assert self.depth_uuid in observation_space.spaces
self._n_input_depth = observation_space.spaces[self.depth_uuid].shape[2]
assert self._n_input_depth >= 0
else:
self._n_input_depth = 0
if not self.is_blind:
# hyperparameters for layers
self._cnn_layers_channels = list(layer_channels)
self._cnn_layers_kernel_size = list(kernel_sizes)
self._cnn_layers_stride = list(layers_stride)
self._cnn_layers_paddings = list(paddings)
self._cnn_layers_dilations = list(dilations)
if self._n_input_rgb > 0:
input_rgb_cnn_dims = np.array(
observation_space.spaces[self.rgb_uuid].shape[:2], dtype=np.float32
)
self.rgb_cnn = self.make_cnn_from_params(
output_size=output_size,
input_dims=input_rgb_cnn_dims,
input_channels=self._n_input_rgb,
flatten=flatten,
output_relu=output_relu,
)
if self._n_input_depth > 0:
input_depth_cnn_dims = np.array(
observation_space.spaces[self.depth_uuid].shape[:2],
dtype=np.float32,
)
self.depth_cnn = self.make_cnn_from_params(
output_size=output_size,
input_dims=input_depth_cnn_dims,
input_channels=self._n_input_depth,
flatten=flatten,
output_relu=output_relu,
)
def make_cnn_from_params(
self,
output_size: int,
input_dims: np.ndarray,
input_channels: int,
flatten: bool,
output_relu: bool,
) -> nn.Module:
output_dims = input_dims
for kernel_size, stride, padding, dilation in zip(
self._cnn_layers_kernel_size,
self._cnn_layers_stride,
self._cnn_layers_paddings,
self._cnn_layers_dilations,
):
# noinspection PyUnboundLocalVariable
output_dims = self._conv_output_dim(
dimension=output_dims,
padding=np.array(padding, dtype=np.float32),
dilation=np.array(dilation, dtype=np.float32),
kernel_size=np.array(kernel_size, dtype=np.float32),
stride=np.array(stride, dtype=np.float32),
)
# noinspection PyUnboundLocalVariable
cnn = make_cnn(
input_channels=input_channels,
layer_channels=self._cnn_layers_channels,
kernel_sizes=self._cnn_layers_kernel_size,
strides=self._cnn_layers_stride,
paddings=self._cnn_layers_paddings,
dilations=self._cnn_layers_dilations,
output_height=output_dims[0],
output_width=output_dims[1],
output_channels=output_size,
flatten=flatten,
output_relu=output_relu,
)
self.layer_init(cnn)
return cnn
@staticmethod
def _conv_output_dim(
dimension: Sequence[int],
padding: Sequence[int],
dilation: Sequence[int],
kernel_size: Sequence[int],
stride: Sequence[int],
) -> Tuple[int, ...]:
"""Calculates the output height and width based on the input height and
width to the convolution layer. For parameter definitions see.
[here](https://pytorch.org/docs/master/nn.html#torch.nn.Conv2d).
# Parameters
dimension : See above link.
padding : See above link.
dilation : See above link.
kernel_size : See above link.
stride : See above link.
"""
assert len(dimension) == 2
out_dimension = []
for i in range(len(dimension)):
out_dimension.append(
int(
np.floor(
(
(
dimension[i]
+ 2 * padding[i]
- dilation[i] * (kernel_size[i] - 1)
- 1
)
/ stride[i]
)
+ 1
)
)
)
return tuple(out_dimension)
@staticmethod
def layer_init(cnn) -> None:
"""Initialize layer parameters using Kaiming normal."""
for layer in cnn:
if isinstance(layer, (nn.Conv2d, nn.Linear)):
nn.init.kaiming_normal_(layer.weight, nn.init.calculate_gain("relu"))
if layer.bias is not None:
nn.init.constant_(layer.bias, val=0)
@property
def is_blind(self):
"""True if the observation space doesn't include `self.rgb_uuid` or
`self.depth_uuid`."""
return self._n_input_rgb + self._n_input_depth == 0
def forward(self, observations: Dict[str, torch.Tensor]): # type: ignore
if self.is_blind:
return None
def check_use_agent(new_setting):
if use_agent is not None:
assert (
use_agent is new_setting
), "rgb and depth must both use an agent dim or none"
return new_setting
cnn_output_list: List[torch.Tensor] = []
use_agent: Optional[bool] = None
if self.rgb_uuid is not None:
use_agent = check_use_agent(len(observations[self.rgb_uuid].shape) == 6)
cnn_output_list.append(
compute_cnn_output(self.rgb_cnn, observations[self.rgb_uuid])
)
if self.depth_uuid is not None:
use_agent = check_use_agent(len(observations[self.depth_uuid].shape) == 6)
cnn_output_list.append(
compute_cnn_output(self.depth_cnn, observations[self.depth_uuid])
)
if use_agent:
channels_dim = 3 # [step, sampler, agent, channel (, height, width)]
else:
channels_dim = 2 # [step, sampler, channel (, height, width)]
return torch.cat(cnn_output_list, dim=channels_dim)
class RNNStateEncoder(nn.Module):
"""A simple RNN-based model playing a role in many baseline embodied-
navigation agents.
See `seq_forward` for more details of how this model is used.
"""
def __init__(
self,
input_size: int,
hidden_size: int,
num_layers: int = 1,
rnn_type: str = "GRU",
trainable_masked_hidden_state: bool = False,
):
"""An RNN for encoding the state in RL. Supports masking the hidden
state during various timesteps in the forward lass.
# Parameters
input_size : The input size of the RNN.
hidden_size : The hidden size.
num_layers : The number of recurrent layers.
rnn_type : The RNN cell type. Must be GRU or LSTM.
trainable_masked_hidden_state : If `True` the initial hidden state (used at the start of a Task)
is trainable (as opposed to being a vector of zeros).
"""
super().__init__()
self._num_recurrent_layers = num_layers
self._rnn_type = rnn_type
self.rnn = getattr(torch.nn, rnn_type)(
input_size=input_size, hidden_size=hidden_size, num_layers=num_layers
)
self.trainable_masked_hidden_state = trainable_masked_hidden_state
if trainable_masked_hidden_state:
self.init_hidden_state = nn.Parameter(
0.1 * torch.randn((num_layers, 1, hidden_size)), requires_grad=True
)
self.layer_init()
def layer_init(self):
"""Initialize the RNN parameters in the model."""
for name, param in self.rnn.named_parameters():
if "weight" in name:
nn.init.orthogonal_(param)
elif "bias" in name:
nn.init.constant_(param, 0)
@property
def num_recurrent_layers(self) -> int:
"""The number of recurrent layers in the network."""
return self._num_recurrent_layers * (2 if "LSTM" in self._rnn_type else 1)
def _pack_hidden(
self, hidden_states: Union[torch.FloatTensor, Sequence[torch.FloatTensor]]
) -> torch.FloatTensor:
"""Stacks hidden states in an LSTM together (if using a GRU rather than
an LSTM this is just the identity).
# Parameters
hidden_states : The hidden states to (possibly) stack.
"""
if "LSTM" in self._rnn_type:
hidden_states = cast(
torch.FloatTensor,
torch.cat([hidden_states[0], hidden_states[1]], dim=0),
)
return cast(torch.FloatTensor, hidden_states)
def _unpack_hidden(
self, hidden_states: torch.FloatTensor
) -> Union[torch.FloatTensor, Tuple[torch.FloatTensor, torch.FloatTensor]]:
"""Partial inverse of `_pack_hidden` (exact if there are 2 hidden
layers)."""
if "LSTM" in self._rnn_type:
new_hidden_states = (
hidden_states[0 : self._num_recurrent_layers],
hidden_states[self._num_recurrent_layers :],
)
return cast(Tuple[torch.FloatTensor, torch.FloatTensor], new_hidden_states)
return cast(torch.FloatTensor, hidden_states)
def _mask_hidden(
self,
hidden_states: Union[Tuple[torch.FloatTensor, ...], torch.FloatTensor],
masks: torch.FloatTensor,
) -> Union[Tuple[torch.FloatTensor, ...], torch.FloatTensor]:
"""Mask input hidden states given `masks`. Useful when masks represent
steps on which a task has completed.
# Parameters
hidden_states : The hidden states.
masks : Masks to apply to hidden states (see seq_forward).
# Returns
Masked hidden states. Here masked hidden states will be replaced with
either all zeros (if `trainable_masked_hidden_state` was False) and will
otherwise be a learnable collection of parameters.
"""
if not self.trainable_masked_hidden_state:
if isinstance(hidden_states, tuple):
hidden_states = tuple(
cast(torch.FloatTensor, v * masks) for v in hidden_states
)
else:
hidden_states = cast(torch.FloatTensor, masks * hidden_states)
else:
if isinstance(hidden_states, tuple):
# noinspection PyTypeChecker
hidden_states = tuple(
v * masks # type:ignore
+ (1.0 - masks) * (self.init_hidden_state.repeat(1, v.shape[1], 1)) # type: ignore
for v in hidden_states # type:ignore
) # type: ignore
else:
# noinspection PyTypeChecker
hidden_states = masks * hidden_states + (1 - masks) * ( # type: ignore
self.init_hidden_state.repeat(1, hidden_states.shape[1], 1)
)
return hidden_states
def single_forward(
self,
x: torch.FloatTensor,
hidden_states: torch.FloatTensor,
masks: torch.FloatTensor,
) -> Tuple[
torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]
]:
"""Forward for a single-step input."""
(
x,
hidden_states,
masks,
mem_agent,
obs_agent,
nsteps,
nsamplers,
nagents,
) = self.adapt_input(x, hidden_states, masks)
unpacked_hidden_states = self._unpack_hidden(hidden_states)
x, unpacked_hidden_states = self.rnn(
x,
self._mask_hidden(
unpacked_hidden_states, cast(torch.FloatTensor, masks[0].view(1, -1, 1))
),
)
return self.adapt_result(
x,
self._pack_hidden(unpacked_hidden_states),
mem_agent,
obs_agent,
nsteps,
nsamplers,
nagents,
)
def adapt_input(
self,
x: torch.FloatTensor,
hidden_states: torch.FloatTensor,
masks: torch.FloatTensor,
) -> Tuple[
torch.FloatTensor,
torch.FloatTensor,
torch.FloatTensor,
bool,
bool,
int,
int,
int,
]:
nsteps, nsamplers = masks.shape[:2]
assert len(hidden_states.shape) in [
3,
4,
], "hidden_states must be [layer, sampler, hidden] or [layer, sampler, agent, hidden]"
assert len(x.shape) in [
3,
4,
], "observations must be [step, sampler, data] or [step, sampler, agent, data]"
nagents = 1
mem_agent: bool
if len(hidden_states.shape) == 4: # [layer, sampler, agent, hidden]
mem_agent = True
nagents = hidden_states.shape[2]
else: # [layer, sampler, hidden]
mem_agent = False
obs_agent: bool
if len(x.shape) == 4: # [step, sampler, agent, dims]
obs_agent = True
else: # [step, sampler, dims]
obs_agent = False
# Flatten (nsamplers, nagents)
x = x.view(nsteps, nsamplers * nagents, -1) # type:ignore
masks = masks.expand(-1, -1, nagents).reshape( # type:ignore
nsteps, nsamplers * nagents
)
# Flatten (nsamplers, nagents) and remove step dim
hidden_states = hidden_states.view( # type:ignore
self.num_recurrent_layers, nsamplers * nagents, -1
)
# noinspection PyTypeChecker
return x, hidden_states, masks, mem_agent, obs_agent, nsteps, nsamplers, nagents
def adapt_result(
self,
outputs: torch.FloatTensor,
hidden_states: torch.FloatTensor,
mem_agent: bool,
obs_agent: bool,
nsteps: int,
nsamplers: int,
nagents: int,
) -> Tuple[
torch.FloatTensor,
torch.FloatTensor,
]:
output_dims = (nsteps, nsamplers) + ((nagents, -1) if obs_agent else (-1,))
hidden_dims = (self.num_recurrent_layers, nsamplers) + (
(nagents, -1) if mem_agent else (-1,)
)
outputs = cast(torch.FloatTensor, outputs.view(*output_dims))
hidden_states = cast(
torch.FloatTensor,
hidden_states.view(*hidden_dims),
)
return outputs, hidden_states
def seq_forward( # type: ignore
self,
x: torch.FloatTensor,
hidden_states: torch.FloatTensor,
masks: torch.FloatTensor,
) -> Tuple[
torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]
]:
"""Forward for a sequence of length T.
# Parameters
x : (Steps, Samplers, Agents, -1) tensor.
hidden_states : The starting hidden states.
masks : A (Steps, Samplers, Agents) tensor.
The masks to be applied to hidden state at every timestep, equal to 0 whenever the previous step finalized
the task, 1 elsewhere.
"""
(
x,
hidden_states,
masks,
mem_agent,
obs_agent,
nsteps,
nsamplers,
nagents,
) = self.adapt_input(x, hidden_states, masks)
# steps in sequence which have zero for any episode. Assume t=0 has
# a zero in it.
has_zeros = (masks[1:] == 0.0).any(dim=-1).nonzero().squeeze().cpu()
# +1 to correct the masks[1:]
if has_zeros.dim() == 0:
# handle scalar
has_zeros = [has_zeros.item() + 1] # type: ignore
else:
has_zeros = (has_zeros + 1).numpy().tolist()
# add t=0 and t=T to the list
has_zeros = cast(List[int], [0] + has_zeros + [nsteps])
unpacked_hidden_states = self._unpack_hidden(
cast(torch.FloatTensor, hidden_states)
)
outputs = []
for i in range(len(has_zeros) - 1):
# process steps that don't have any zeros in masks together
start_idx = int(has_zeros[i])
end_idx = int(has_zeros[i + 1])
# noinspection PyTypeChecker
rnn_scores, unpacked_hidden_states = self.rnn(
x[start_idx:end_idx],
self._mask_hidden(
unpacked_hidden_states,
cast(torch.FloatTensor, masks[start_idx].view(1, -1, 1)),
),
)
outputs.append(rnn_scores)
return self.adapt_result(
cast(torch.FloatTensor, torch.cat(outputs, dim=0)),
self._pack_hidden(unpacked_hidden_states),
mem_agent,
obs_agent,
nsteps,
nsamplers,
nagents,
)
def forward( # type: ignore
self,
x: torch.FloatTensor,
hidden_states: torch.FloatTensor,
masks: torch.FloatTensor,
) -> Tuple[
torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]
]:
nsteps = masks.shape[0]
if nsteps == 1:
return self.single_forward(x, hidden_states, masks)
return self.seq_forward(x, hidden_states, masks)
class LinearActorCritic(ActorCriticModel[CategoricalDistr]):
def __init__(
self,
input_uuid: str,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
):
super().__init__(action_space=action_space, observation_space=observation_space)
assert (
input_uuid in observation_space.spaces
), "LinearActorCritic expects only a single observational input."
self.input_uuid = input_uuid
box_space: gym.spaces.Box = observation_space[self.input_uuid]
assert isinstance(box_space, gym.spaces.Box), (
"LinearActorCritic requires that"
"observation space corresponding to the input uuid is a Box space."
)
assert len(box_space.shape) == 1
self.in_dim = box_space.shape[0]
self.linear = nn.Linear(self.in_dim, action_space.n + 1)
nn.init.orthogonal_(self.linear.weight)
nn.init.constant_(self.linear.bias, 0)
# noinspection PyMethodMayBeStatic
def _recurrent_memory_specification(self):
return None
def forward(self, observations, memory, prev_actions, masks):
out = self.linear(observations[self.input_uuid])
# noinspection PyArgumentList
return (
ActorCriticOutput(
# ensure [steps, samplers, ...]
distributions=CategoricalDistr(logits=out[..., :-1]),
# ensure [steps, samplers, flattened]
values=cast(torch.FloatTensor, out[..., -1:].view(*out.shape[:2], -1)),
extras={},
),
None,
)
class RNNActorCritic(ActorCriticModel[Distr]):
def __init__(
self,
input_uuid: str,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
hidden_size: int = 128,
num_layers: int = 1,
rnn_type: str = "GRU",
head_type: Callable[..., ActorCriticModel[Distr]] = LinearActorCritic,
):
super().__init__(action_space=action_space, observation_space=observation_space)
self.hidden_size = hidden_size
self.rnn_type = rnn_type
assert (
input_uuid in observation_space.spaces
), "LinearActorCritic expects only a single observational input."
self.input_uuid = input_uuid
box_space: gym.spaces.Box = observation_space[self.input_uuid]
assert isinstance(box_space, gym.spaces.Box), (
"RNNActorCritic requires that"
"observation space corresponding to the input uuid is a Box space."
)
assert len(box_space.shape) == 1
self.in_dim = box_space.shape[0]
self.state_encoder = RNNStateEncoder(
input_size=self.in_dim,
hidden_size=hidden_size,
num_layers=num_layers,
rnn_type=rnn_type,
trainable_masked_hidden_state=True,
)
self.head_uuid = "{}_{}".format("rnn", input_uuid)
self.ac_nonrecurrent_head: ActorCriticModel[Distr] = head_type(
input_uuid=self.head_uuid,
action_space=action_space,
observation_space=SpaceDict(
{
self.head_uuid: gym.spaces.Box(
low=np.float32(0.0), high=np.float32(1.0), shape=(hidden_size,)
)
}
),
)
self.memory_key = "rnn"
@property
def recurrent_hidden_state_size(self) -> int:
return self.hidden_size
@property
def num_recurrent_layers(self) -> int:
return self.state_encoder.num_recurrent_layers
def _recurrent_memory_specification(self):
return {
self.memory_key: (
(
("layer", self.num_recurrent_layers),
("sampler", None),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
}
def forward( # type:ignore
self,
observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]],
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
if self.memory_key not in memory:
get_logger().warning(
f"Key {self.memory_key} not found in memory,"
f" initializing this as all zeros."
)
obs = observations[self.input_uuid]
memory.check_append(
key=self.memory_key,
tensor=obs.new(
self.num_recurrent_layers,
obs.shape[1],
self.recurrent_hidden_state_size,
)
.float()
.zero_(),
sampler_dim=1,
)
rnn_out, mem_return = self.state_encoder(
x=observations[self.input_uuid],
hidden_states=memory.tensor(self.memory_key),
masks=masks,
)
# noinspection PyCallingNonCallable
out, _ = self.ac_nonrecurrent_head(
observations={self.head_uuid: rnn_out},
memory=None,
prev_actions=prev_actions,
masks=masks,
)
# noinspection PyArgumentList
return (
out,
memory.set_tensor(self.memory_key, mem_return),
)
================================================
FILE: allenact/embodiedai/models/fusion_models.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Adapted from https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/
import math
from typing import Tuple
import torch
import torch.nn as nn
class Fusion(nn.Module):
"""Base class of belief fusion model from Auxiliary Tasks Speed Up Learning
PointGoal Navigation (Ye, 2020) Child class should implement
`get_belief_weights` function to generate weights to fuse the beliefs from
all the auxiliary task into one."""
def __init__(self, hidden_size, obs_embed_size, num_tasks):
super().__init__()
self.hidden_size = hidden_size # H
self.obs_embed_size = obs_embed_size # Z
self.num_tasks = num_tasks # k
def forward(
self,
all_beliefs: torch.FloatTensor, # (T, N, H, K)
obs_embeds: torch.FloatTensor, # (T, N, Z)
) -> Tuple[torch.FloatTensor, torch.FloatTensor]: # (T, N, H), (T, N, K)
num_steps, num_samplers, _, _ = all_beliefs.shape
all_beliefs = all_beliefs.view(
num_steps * num_samplers, self.hidden_size, self.num_tasks
)
obs_embeds = obs_embeds.view(num_steps * num_samplers, -1)
weights = self.get_belief_weights(
all_beliefs=all_beliefs,
obs_embeds=obs_embeds, # (T*N, H, K) # (T*N, Z)
).unsqueeze(
-1
) # (T*N, K, 1)
beliefs = torch.bmm(all_beliefs, weights) # (T*N, H, 1)
beliefs = beliefs.squeeze(-1).view(num_steps, num_samplers, self.hidden_size)
weights = weights.squeeze(-1).view(num_steps, num_samplers, self.num_tasks)
return beliefs, weights
def get_belief_weights(
self,
all_beliefs: torch.FloatTensor, # (T*N, H, K)
obs_embeds: torch.FloatTensor, # (T*N, Z)
) -> torch.FloatTensor: # (T*N, K)
raise NotImplementedError()
class AverageFusion(Fusion):
UUID = "avg"
def get_belief_weights(
self,
all_beliefs: torch.FloatTensor, # (T*N, H, K)
obs_embeds: torch.FloatTensor, # (T*N, Z)
) -> torch.FloatTensor: # (T*N, K)
batch_size = all_beliefs.shape[0]
weights = torch.ones(batch_size, self.num_tasks).to(all_beliefs)
weights /= self.num_tasks
return weights
class SoftmaxFusion(Fusion):
"""Situational Fusion of Visual Representation for Visual Navigation
https://arxiv.org/abs/1908.09073."""
UUID = "smax"
def __init__(self, hidden_size, obs_embed_size, num_tasks):
super().__init__(hidden_size, obs_embed_size, num_tasks)
# mapping from rnn input to task
# ignore beliefs
self.linear = nn.Linear(obs_embed_size, num_tasks)
def get_belief_weights(
self,
all_beliefs: torch.Tensor, # (T*N, H, K)
obs_embeds: torch.Tensor, # (T*N, Z)
) -> torch.Tensor: # (T*N, K)
scores = self.linear(obs_embeds) # (T*N, K)
weights = torch.softmax(scores, dim=-1)
return weights
class AttentiveFusion(Fusion):
"""Attention is All You Need https://arxiv.org/abs/1706.03762 i.e. scaled
dot-product attention."""
UUID = "attn"
def __init__(self, hidden_size, obs_embed_size, num_tasks):
super().__init__(hidden_size, obs_embed_size, num_tasks)
self.linear = nn.Linear(obs_embed_size, hidden_size)
def get_belief_weights(
self,
all_beliefs: torch.Tensor, # (T*N, H, K)
obs_embeds: torch.Tensor, # (T*N, Z)
) -> torch.Tensor: # (T*N, K)
queries = self.linear(obs_embeds).unsqueeze(1) # (T*N, 1, H)
scores = torch.bmm(queries, all_beliefs).squeeze(1) # (T*N, K)
weights = torch.softmax(
scores / math.sqrt(self.hidden_size), dim=-1
) # (T*N, K)
return weights
================================================
FILE: allenact/embodiedai/models/resnet.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Adapted from https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/
from typing import Optional
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from gym.spaces.dict import Dict as SpaceDict
from allenact.utils.model_utils import Flatten
from allenact.utils.system import get_logger
def conv3x3(in_planes, out_planes, stride=1, groups=1):
"""3x3 convolution with padding."""
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False,
groups=groups,
)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution."""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
resneXt = False
def __init__(
self,
inplanes,
planes,
ngroups,
stride=1,
downsample=None,
cardinality=1,
):
super(BasicBlock, self).__init__()
self.convs = nn.Sequential(
conv3x3(inplanes, planes, stride, groups=cardinality),
nn.GroupNorm(ngroups, planes),
nn.ReLU(True),
conv3x3(planes, planes, groups=cardinality),
nn.GroupNorm(ngroups, planes),
)
self.downsample = downsample
self.relu = nn.ReLU(True)
def forward(self, x):
residual = x
out = self.convs(x)
if self.downsample is not None:
residual = self.downsample(x)
return self.relu(out + residual)
def _build_bottleneck_branch(inplanes, planes, ngroups, stride, expansion, groups=1):
return nn.Sequential(
conv1x1(inplanes, planes),
nn.GroupNorm(ngroups, planes),
nn.ReLU(True),
conv3x3(planes, planes, stride, groups=groups),
nn.GroupNorm(ngroups, planes),
nn.ReLU(True),
conv1x1(planes, planes * expansion),
nn.GroupNorm(ngroups, planes * expansion),
)
class SE(nn.Module):
def __init__(self, planes, r=16):
super().__init__()
self.squeeze = nn.AdaptiveAvgPool2d(1)
self.excite = nn.Sequential(
nn.Linear(planes, int(planes / r)),
nn.ReLU(True),
nn.Linear(int(planes / r), planes),
nn.Sigmoid(),
)
def forward(self, x):
b, c, _, _ = x.size()
x = self.squeeze(x)
x = x.view(b, c)
x = self.excite(x)
return x.view(b, c, 1, 1)
def _build_se_branch(planes, r=16):
return SE(planes, r)
class Bottleneck(nn.Module):
expansion = 4
resneXt = False
def __init__(
self,
inplanes,
planes,
ngroups,
stride=1,
downsample=None,
cardinality=1,
):
super().__init__()
self.convs = _build_bottleneck_branch(
inplanes,
planes,
ngroups,
stride,
self.expansion,
groups=cardinality,
)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def _impl(self, x):
identity = x
out = self.convs(x)
if self.downsample is not None:
identity = self.downsample(x)
return self.relu(out + identity)
def forward(self, x):
return self._impl(x)
class SEBottleneck(Bottleneck):
def __init__(
self,
inplanes,
planes,
ngroups,
stride=1,
downsample=None,
cardinality=1,
):
super().__init__(inplanes, planes, ngroups, stride, downsample, cardinality)
self.se = _build_se_branch(planes * self.expansion)
def _impl(self, x):
identity = x
out = self.convs(x)
out = self.se(out) * out
if self.downsample is not None:
identity = self.downsample(x)
return self.relu(out + identity)
class SEResNeXtBottleneck(SEBottleneck):
expansion = 2
resneXt = True
class ResNeXtBottleneck(Bottleneck):
expansion = 2
resneXt = True
class GroupNormResNet(nn.Module):
def __init__(self, in_channels, base_planes, ngroups, block, layers, cardinality=1):
super(GroupNormResNet, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels,
base_planes,
kernel_size=7,
stride=2,
padding=3,
bias=False,
),
nn.GroupNorm(ngroups, base_planes),
nn.ReLU(True),
)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.cardinality = cardinality
self.inplanes = base_planes
if block.resneXt:
base_planes *= 2
self.layer1 = self._make_layer(block, ngroups, base_planes, layers[0])
self.layer2 = self._make_layer(
block, ngroups, base_planes * 2, layers[1], stride=2
)
self.layer3 = self._make_layer(
block, ngroups, base_planes * 2 * 2, layers[2], stride=2
)
self.layer4 = self._make_layer(
block, ngroups, base_planes * 2 * 2 * 2, layers[3], stride=2
)
self.final_channels = self.inplanes
self.final_spatial_compress = 1.0 / (2**5)
def _make_layer(self, block, ngroups, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.GroupNorm(ngroups, planes * block.expansion),
)
layers = [
block(
self.inplanes,
planes,
ngroups,
stride,
downsample,
cardinality=self.cardinality,
)
]
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, ngroups))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def gnresnet18(in_channels, base_planes, ngroups):
model = GroupNormResNet(in_channels, base_planes, ngroups, BasicBlock, [2, 2, 2, 2])
return model
def gnresnet50(in_channels, base_planes, ngroups):
model = GroupNormResNet(in_channels, base_planes, ngroups, Bottleneck, [3, 4, 6, 3])
return model
def gnresneXt50(in_channels, base_planes, ngroups):
model = GroupNormResNet(
in_channels,
base_planes,
ngroups,
ResNeXtBottleneck,
[3, 4, 6, 3],
cardinality=int(base_planes / 2),
)
return model
def se_gnresnet50(in_channels, base_planes, ngroups):
model = GroupNormResNet(
in_channels, base_planes, ngroups, SEBottleneck, [3, 4, 6, 3]
)
return model
def se_gnresneXt50(in_channels, base_planes, ngroups):
model = GroupNormResNet(
in_channels,
base_planes,
ngroups,
SEResNeXtBottleneck,
[3, 4, 6, 3],
cardinality=int(base_planes / 2),
)
return model
def se_gnresneXt101(in_channels, base_planes, ngroups):
model = GroupNormResNet(
in_channels,
base_planes,
ngroups,
SEResNeXtBottleneck,
[3, 4, 23, 3],
cardinality=int(base_planes / 2),
)
return model
class GroupNormResNetEncoder(nn.Module):
def __init__(
self,
observation_space: SpaceDict,
rgb_uuid: Optional[str],
depth_uuid: Optional[str],
output_size: int,
baseplanes=32,
ngroups=32,
make_backbone=None,
):
super().__init__()
self._inputs = []
self.rgb_uuid = rgb_uuid
if self.rgb_uuid is not None:
assert self.rgb_uuid in observation_space.spaces
self._n_input_rgb = observation_space.spaces[self.rgb_uuid].shape[2]
assert self._n_input_rgb >= 0
self._inputs.append(self.rgb_uuid)
else:
self._n_input_rgb = 0
self.depth_uuid = depth_uuid
if self.depth_uuid is not None:
assert self.depth_uuid in observation_space.spaces
self._n_input_depth = observation_space.spaces[self.depth_uuid].shape[2]
assert self._n_input_depth >= 0
self._inputs.append(self.depth_uuid)
else:
self._n_input_depth = 0
if not self.is_blind:
spatial_size = (
observation_space.spaces[self._inputs[0]].shape[0] // 2
) # H (=W) / 2
# RGBD into one model
input_channels = self._n_input_rgb + self._n_input_depth # C
self.backbone = make_backbone(input_channels, baseplanes, ngroups)
final_spatial = int(
np.ceil(spatial_size * self.backbone.final_spatial_compress)
) # fix bug in habitat that uses int()
after_compression_flat_size = 2048
num_compression_channels = int(
round(after_compression_flat_size / (final_spatial**2))
)
self.compression = nn.Sequential(
nn.Conv2d(
self.backbone.final_channels,
num_compression_channels,
kernel_size=3,
padding=1,
bias=False,
),
nn.GroupNorm(1, num_compression_channels),
nn.ReLU(True),
)
self.output_shape = (
num_compression_channels,
final_spatial,
final_spatial,
)
self.head = nn.Sequential(
Flatten(),
nn.Linear(np.prod(self.output_shape), output_size),
nn.ReLU(True),
)
self.layer_init()
@property
def is_blind(self):
return self._n_input_rgb + self._n_input_depth == 0
def layer_init(self):
for layer in self.modules():
if isinstance(layer, (nn.Conv2d, nn.Linear)):
nn.init.kaiming_normal_(layer.weight, nn.init.calculate_gain("relu"))
if layer.bias is not None:
nn.init.constant_(layer.bias, val=0)
get_logger().debug("Initializing resnet encoder")
def forward(self, observations):
if self.is_blind:
return None
# TODO: the reshape follows compute_cnn_output()
# but it's hard to make the forward as a nn.Module as cnn param
nagents: Optional[int] = None
nsteps: Optional[int] = None
nsamplers: Optional[int] = None
assert len(self._inputs) > 0
cnn_input = []
for mode in self._inputs:
mode_obs = observations[mode]
assert len(mode_obs.shape) in [
5,
6,
], "CNN input must have shape [STEP, SAMPLER, (AGENT,) dim1, dim2, dim3]"
if len(mode_obs.shape) == 6:
nsteps, nsamplers, nagents = mode_obs.shape[:3]
else:
nsteps, nsamplers = mode_obs.shape[:2]
# Make FLAT_BATCH = nsteps * nsamplers (* nagents)
mode_obs = mode_obs.view(
(-1,) + mode_obs.shape[2 + int(nagents is not None) :]
)
# permute tensor to dimension [BATCH x CHANNEL x HEIGHT X WIDTH]
mode_obs = mode_obs.permute(0, 3, 1, 2)
cnn_input.append(mode_obs)
x = torch.cat(cnn_input, dim=1)
x = F.avg_pool2d(x, 2) # 2x downsampling
x = self.backbone(x) # (256, 4, 4)
x = self.compression(x) # (128, 4, 4)
x = self.head(x) # (2048) -> (hidden_size)
if nagents is not None:
x = x.reshape(
(
nsteps,
nsamplers,
nagents,
)
+ x.shape[1:]
)
else:
x = x.reshape(
(
nsteps,
nsamplers,
)
+ x.shape[1:]
)
return x
================================================
FILE: allenact/embodiedai/models/visual_nav_models.py
================================================
from collections import OrderedDict
from typing import Tuple, Dict, Optional, List, Sequence
from typing import TypeVar
import gym
import torch
import torch.nn as nn
from gym.spaces.dict import Dict as SpaceDict
from allenact.algorithms.onpolicy_sync.policy import (
ActorCriticModel,
LinearCriticHead,
LinearActorHead,
ObservationType,
DistributionType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput, Memory
from allenact.embodiedai.aux_losses.losses import MultiAuxTaskNegEntropyLoss
from allenact.embodiedai.models.aux_models import AuxiliaryModel
from allenact.embodiedai.models.basic_models import RNNStateEncoder
from allenact.embodiedai.models.fusion_models import Fusion
from allenact.utils.model_utils import FeatureEmbedding
from allenact.utils.system import get_logger
FusionType = TypeVar("FusionType", bound=Fusion)
class VisualNavActorCritic(ActorCriticModel[CategoricalDistr]):
"""Base class of visual navigation / manipulation (or broadly, embodied AI)
model.
`forward_encoder` function requires implementation.
"""
action_space: gym.spaces.Discrete
def __init__(
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
hidden_size=512,
multiple_beliefs=False,
beliefs_fusion: Optional[FusionType] = None,
auxiliary_uuids: Optional[List[str]] = None,
auxiliary_model_class=AuxiliaryModel,
):
super().__init__(action_space=action_space, observation_space=observation_space)
self._hidden_size = hidden_size
assert multiple_beliefs == (beliefs_fusion is not None)
self.multiple_beliefs = multiple_beliefs
self.beliefs_fusion = beliefs_fusion
self.auxiliary_uuids = auxiliary_uuids
if isinstance(self.auxiliary_uuids, list) and len(self.auxiliary_uuids) == 0:
self.auxiliary_uuids = None
# Define the placeholders in init function
self.state_encoders: Optional[nn.ModuleDict] = None
self.aux_models: Optional[nn.ModuleDict] = None
self.actor: Optional[LinearActorHead] = None
self.critic: Optional[LinearCriticHead] = None
self.prev_action_embedder: Optional[FeatureEmbedding] = None
self.fusion_model: Optional[nn.Module] = None
self.belief_names: Optional[Sequence[str]] = None
self.auxiliary_model_class = auxiliary_model_class
def create_state_encoders(
self,
obs_embed_size: int,
prev_action_embed_size: int,
num_rnn_layers: int,
rnn_type: str,
add_prev_actions: bool,
add_prev_action_null_token: bool,
trainable_masked_hidden_state=False,
):
rnn_input_size = obs_embed_size
self.prev_action_embedder = FeatureEmbedding(
input_size=int(add_prev_action_null_token) + self.action_space.n,
output_size=prev_action_embed_size if add_prev_actions else 0,
)
if add_prev_actions:
rnn_input_size += prev_action_embed_size
state_encoders = OrderedDict() # perserve insertion order in py3.6
if self.multiple_beliefs: # multiple belief model
for aux_uuid in self.auxiliary_uuids:
state_encoders[aux_uuid] = RNNStateEncoder(
rnn_input_size,
self._hidden_size,
num_layers=num_rnn_layers,
rnn_type=rnn_type,
trainable_masked_hidden_state=trainable_masked_hidden_state,
)
# create fusion model
self.fusion_model = self.beliefs_fusion(
hidden_size=self._hidden_size,
obs_embed_size=obs_embed_size,
num_tasks=len(self.auxiliary_uuids),
)
else: # single belief model
state_encoders["single_belief"] = RNNStateEncoder(
rnn_input_size,
self._hidden_size,
num_layers=num_rnn_layers,
rnn_type=rnn_type,
trainable_masked_hidden_state=trainable_masked_hidden_state,
)
self.state_encoders = nn.ModuleDict(state_encoders)
self.belief_names = list(self.state_encoders.keys())
get_logger().info(
"there are {} belief models: {}".format(
len(self.belief_names), self.belief_names
)
)
def load_state_dict(self, state_dict, **kwargs):
new_state_dict = OrderedDict()
for key in state_dict.keys():
if "state_encoder." in key: # old key name
new_key = key.replace("state_encoder.", "state_encoders.single_belief.")
elif "goal_visual_encoder.embed_class" in key:
new_key = key.replace(
"goal_visual_encoder.embed_class", "goal_visual_encoder.embed_goal"
)
else:
new_key = key
new_state_dict[new_key] = state_dict[key]
return super().load_state_dict(new_state_dict, **kwargs) # compatible in keys
def create_actorcritic_head(self):
self.actor = LinearActorHead(self._hidden_size, self.action_space.n)
self.critic = LinearCriticHead(self._hidden_size)
def create_aux_models(self, obs_embed_size: int, action_embed_size: int):
if self.auxiliary_uuids is None:
return
aux_models = OrderedDict()
for aux_uuid in self.auxiliary_uuids:
aux_models[aux_uuid] = self.auxiliary_model_class(
aux_uuid=aux_uuid,
action_dim=self.action_space.n,
obs_embed_dim=obs_embed_size,
belief_dim=self._hidden_size,
action_embed_size=action_embed_size,
)
self.aux_models = nn.ModuleDict(aux_models)
@property
def num_recurrent_layers(self):
"""Number of recurrent hidden layers."""
return list(self.state_encoders.values())[0].num_recurrent_layers
@property
def recurrent_hidden_state_size(self):
"""The recurrent hidden state size of a single model."""
return self._hidden_size
def _recurrent_memory_specification(self):
return {
memory_key: (
(
("layer", self.num_recurrent_layers),
("sampler", None),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
for memory_key in self.belief_names
}
def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:
raise NotImplementedError("Obs Encoder Not Implemented")
def fuse_beliefs(
self,
beliefs_dict: Dict[str, torch.FloatTensor],
obs_embeds: torch.FloatTensor,
) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]:
all_beliefs = torch.stack(list(beliefs_dict.values()), dim=-1) # (T, N, H, k)
if self.multiple_beliefs: # call the fusion model
return self.fusion_model(all_beliefs=all_beliefs, obs_embeds=obs_embeds)
# single belief
beliefs = all_beliefs.squeeze(-1) # (T,N,H)
return beliefs, None
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
"""Processes input batched observations to produce new actor and critic
values. Processes input batched observations (along with prior hidden
states, previous actions, and masks denoting which recurrent hidden
states should be masked) and returns an `ActorCriticOutput` object
containing the model's policy (distribution over actions) and
evaluation of the current state (value).
# Parameters
observations : Batched input observations.
memory : `Memory` containing the hidden states from initial timepoints.
prev_actions : Tensor of previous actions taken.
masks : Masks applied to hidden states. See `RNNStateEncoder`.
# Returns
Tuple of the `ActorCriticOutput` and recurrent hidden state.
"""
# 1.1 use perception model (i.e. encoder) to get observation embeddings
obs_embeds = self.forward_encoder(observations)
# 1.2 use embedding model to get prev_action embeddings
if self.prev_action_embedder.input_size == self.action_space.n + 1:
# In this case we have a unique embedding for the start of an episode
prev_actions_embeds = self.prev_action_embedder(
torch.where(
condition=0 != masks.view(*prev_actions.shape),
input=prev_actions + 1,
other=torch.zeros_like(prev_actions),
)
)
else:
prev_actions_embeds = self.prev_action_embedder(prev_actions)
joint_embeds = torch.cat((obs_embeds, prev_actions_embeds), dim=-1) # (T, N, *)
# 2. use RNNs to get single/multiple beliefs
beliefs_dict = {}
for key, model in self.state_encoders.items():
beliefs_dict[key], rnn_hidden_states = model(
joint_embeds, memory.tensor(key), masks
)
memory.set_tensor(key, rnn_hidden_states) # update memory here
# 3. fuse beliefs for multiple belief models
beliefs, task_weights = self.fuse_beliefs(
beliefs_dict, obs_embeds
) # fused beliefs
# 4. prepare output
extras = (
{
aux_uuid: {
"beliefs": (
beliefs_dict[aux_uuid] if self.multiple_beliefs else beliefs
),
"obs_embeds": obs_embeds,
"aux_model": (
self.aux_models[aux_uuid]
if aux_uuid in self.aux_models
else None
),
}
for aux_uuid in self.auxiliary_uuids
}
if self.auxiliary_uuids is not None
else {}
)
if self.multiple_beliefs:
extras[MultiAuxTaskNegEntropyLoss.UUID] = task_weights
actor_critic_output = ActorCriticOutput(
distributions=self.actor(beliefs),
values=self.critic(beliefs),
extras=extras,
)
return actor_critic_output, memory
================================================
FILE: allenact/embodiedai/preprocessors/__init__.py
================================================
================================================
FILE: allenact/embodiedai/preprocessors/resnet.py
================================================
from typing import List, Callable, Optional, Any, cast, Dict
import gym
import numpy as np
import torch
import torch.nn as nn
from torchvision import models
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.misc_utils import prepare_locals_for_super
class ResNetEmbedder(nn.Module):
def __init__(self, resnet, pool=True):
super().__init__()
self.model = resnet
self.pool = pool
self.eval()
def forward(self, x):
with torch.no_grad():
x = self.model.conv1(x)
x = self.model.bn1(x)
x = self.model.relu(x)
x = self.model.maxpool(x)
x = self.model.layer1(x)
x = self.model.layer2(x)
x = self.model.layer3(x)
x = self.model.layer4(x)
if not self.pool:
return x
else:
x = self.model.avgpool(x)
x = torch.flatten(x, 1)
return x
class ResNetPreprocessor(Preprocessor):
"""Preprocess RGB or depth image using a ResNet model."""
def __init__(
self,
input_uuids: List[str],
output_uuid: str,
input_height: int,
input_width: int,
output_height: int,
output_width: int,
output_dims: int,
pool: bool,
torchvision_resnet_model: Callable[..., models.ResNet] = models.resnet18,
device: Optional[torch.device] = None,
device_ids: Optional[List[torch.device]] = None,
**kwargs: Any,
):
def f(x, k):
assert k in x, "{} must be set in ResNetPreprocessor".format(k)
return x[k]
def optf(x, k, default):
return x[k] if k in x else default
self.input_height = input_height
self.input_width = input_width
self.output_height = output_height
self.output_width = output_width
self.output_dims = output_dims
self.pool = pool
self.make_model = torchvision_resnet_model
self.device = torch.device("cpu") if device is None else device
self.device_ids = device_ids or cast(
List[torch.device], list(range(torch.cuda.device_count()))
)
self._resnet: Optional[ResNetEmbedder] = None
low = -np.inf
high = np.inf
shape = (self.output_dims, self.output_height, self.output_width)
assert (
len(input_uuids) == 1
), "resnet preprocessor can only consume one observation type"
observation_space = gym.spaces.Box(low=low, high=high, shape=shape)
super().__init__(**prepare_locals_for_super(locals()))
@property
def resnet(self) -> ResNetEmbedder:
if self._resnet is None:
self._resnet = ResNetEmbedder(
self.make_model(pretrained=True).to(self.device), pool=self.pool
)
return self._resnet
def to(self, device: torch.device) -> "ResNetPreprocessor":
self._resnet = self.resnet.to(device)
self.device = device
return self
def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw
# If the input is depth, repeat it across all 3 channels
if x.shape[1] == 1:
x = x.repeat(1, 3, 1, 1)
return self.resnet(x.to(self.device))
================================================
FILE: allenact/embodiedai/sensors/__init__.py
================================================
================================================
FILE: allenact/embodiedai/sensors/vision_sensors.py
================================================
from abc import abstractmethod, ABC
from typing import Optional, Tuple, Any, cast, Union, Sequence
import PIL
import gym
import numpy as np
from torchvision import transforms
from allenact.base_abstractions.misc import EnvType
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import SubTaskType
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact.utils.tensor_utils import ScaleBothSides
IMAGENET_RGB_MEANS: Tuple[float, float, float] = (0.485, 0.456, 0.406)
IMAGENET_RGB_STDS: Tuple[float, float, float] = (0.229, 0.224, 0.225)
class VisionSensor(Sensor[EnvType, SubTaskType]):
def __init__(
self,
mean: Union[Sequence[float], np.ndarray, None] = None,
stdev: Union[Sequence[float], np.ndarray, None] = None,
height: Optional[int] = None,
width: Optional[int] = None,
uuid: str = "vision",
output_shape: Optional[Tuple[int, ...]] = None,
output_channels: Optional[int] = None,
unnormalized_infimum: float = -np.inf,
unnormalized_supremum: float = np.inf,
scale_first: bool = True,
**kwargs: Any
):
"""Initializer.
# Parameters
mean : The images will be normalized with the given mean
stdev : The images will be normalized with the given standard deviations.
height : If it's a non-negative integer and `width` is also non-negative integer, the image returned from the
environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
width : If it's a non-negative integer and `height` is also non-negative integer, the image returned from the
environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
uuid : The universally unique identifier for the sensor.
output_shape : Optional observation space shape (alternative to `output_channels`).
output_channels : Optional observation space number of channels (alternative to `output_shape`).
unnormalized_infimum : Lower limit(s) for the observation space range.
unnormalized_supremum : Upper limit(s) for the observation space range.
scale_first : Whether to scale image before normalization (if needed).
kwargs : Extra kwargs. Currently unused.
"""
self._norm_means = np.array(mean) if mean is not None else None
self._norm_sds = np.array(stdev) if stdev is not None else None
assert (self._norm_means is None) == (self._norm_sds is None), (
"In VisionSensor's config, "
"either both mean/stdev must be None or neither."
)
self._should_normalize = self._norm_means is not None
self._height = height
self._width = width
assert (self._width is None) == (self._height is None), (
"In VisionSensor's config, "
"either both height/width must be None or neither."
)
self._scale_first = scale_first
self.scaler: Optional[ScaleBothSides] = None
if self._width is not None:
self.scaler = ScaleBothSides(
width=cast(int, self._width), height=cast(int, self._height)
)
self.to_pil = transforms.ToPILImage() # assumes mode="RGB" for 3 channels
self._observation_space = self._make_observation_space(
output_shape=output_shape,
output_channels=output_channels,
unnormalized_infimum=unnormalized_infimum,
unnormalized_supremum=unnormalized_supremum,
)
assert int(PIL.__version__.split(".")[0]) != 7, (
"We found that Pillow version >=7.* has broken scaling,"
" please downgrade to version 6.2.1 or upgrade to >=8.0.0"
)
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _make_observation_space(
self,
output_shape: Optional[Tuple[int, ...]],
output_channels: Optional[int],
unnormalized_infimum: float,
unnormalized_supremum: float,
) -> gym.spaces.Box:
assert output_shape is None or output_channels is None, (
"In VisionSensor's config, "
"only one of output_shape and output_channels can be not None."
)
shape: Optional[Tuple[int, ...]] = None
if output_shape is not None:
shape = output_shape
elif self._height is not None and output_channels is not None:
shape = (
cast(int, self._height),
cast(int, self._width),
cast(int, output_channels),
)
if not self._should_normalize or shape is None or len(shape) == 1:
return gym.spaces.Box(
low=np.float32(unnormalized_infimum),
high=np.float32(unnormalized_supremum),
shape=shape,
)
else:
out_shape = shape[:-1] + (1,)
low = np.tile(
(unnormalized_infimum - cast(np.ndarray, self._norm_means))
/ cast(np.ndarray, self._norm_sds),
out_shape,
)
high = np.tile(
(unnormalized_supremum - cast(np.ndarray, self._norm_means))
/ cast(np.ndarray, self._norm_sds),
out_shape,
)
return gym.spaces.Box(low=np.float32(low), high=np.float32(high))
def _get_observation_space(self):
return self._observation_space
@property
def height(self) -> Optional[int]:
"""Height that input image will be rescale to have.
# Returns
The height as a non-negative integer or `None` if no rescaling is done.
"""
return self._height
@property
def width(self) -> Optional[int]:
"""Width that input image will be rescale to have.
# Returns
The width as a non-negative integer or `None` if no rescaling is done.
"""
return self._width
@abstractmethod
def frame_from_env(self, env: EnvType, task: Optional[SubTaskType]) -> np.ndarray:
raise NotImplementedError
def process_img(self, img: np.ndarray):
assert (
np.issubdtype(img.dtype, np.float32)
and (len(img.shape) == 2 or img.shape[-1] == 1)
) or (img.shape[-1] == 3 and np.issubdtype(img.dtype, np.uint8)), (
"Input frame must either have 3 channels and be of"
" type np.uint8 or have one channel and be of type np.float32"
)
if (
self._scale_first
and self.scaler is not None
and img.shape[:2] != (self._height, self._width)
):
img = np.array(self.scaler(self.to_pil(img)), dtype=img.dtype) # hwc
elif np.issubdtype(img.dtype, np.float32):
img = img.copy()
assert img.dtype in [np.uint8, np.float32]
if np.issubdtype(img.dtype, np.uint8):
img = img.astype(np.float32) / 255.0
if self._should_normalize:
img -= self._norm_means
img /= self._norm_sds
if (
(not self._scale_first)
and self.scaler is not None
and img.shape[:2] != (self._height, self._width)
):
img = np.array(self.scaler(self.to_pil(img)), dtype=np.float32) # hwc
return img
def get_observation(
self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any
) -> Any:
return self.process_img(self.frame_from_env(env=env, task=task))
class RGBSensor(VisionSensor[EnvType, SubTaskType], ABC):
def __init__(
self,
use_resnet_normalization: bool = False,
mean: Optional[Union[np.ndarray, Sequence[float]]] = IMAGENET_RGB_MEANS,
stdev: Optional[Union[np.ndarray, Sequence[float]]] = IMAGENET_RGB_STDS,
height: Optional[int] = None,
width: Optional[int] = None,
uuid: str = "rgb",
output_shape: Optional[Tuple[int, ...]] = None,
output_channels: int = 3,
unnormalized_infimum: float = 0.0,
unnormalized_supremum: float = 1.0,
scale_first: bool = True,
**kwargs: Any
):
"""Initializer.
# Parameters
use_resnet_normalization : Whether to apply image normalization with the given `mean` and `stdev`.
mean : The images will be normalized with the given mean if `use_resnet_normalization` is True (default
`[0.485, 0.456, 0.406]`, i.e. the standard resnet normalization mean).
stdev : The images will be normalized with the given standard deviation if `use_resnet_normalization` is True
(default `[0.229, 0.224, 0.225]`, i.e. the standard resnet normalization standard deviation).
height: If it's a non-negative integer and `width` is also non-negative integer, the image returned from the
environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
width: If it's a non-negative integer and `height` is also non-negative integer, the image returned from the
environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
uuid: The universally unique identifier for the sensor.
output_shape: Optional observation space shape (alternative to `output_channels`).
output_channels: Optional observation space number of channels (alternative to `output_shape`).
unnormalized_infimum: Lower limit(s) for the observation space range.
unnormalized_supremum: Upper limit(s) for the observation space range.
scale_first: Whether to scale image before normalization (if needed).
kwargs : Extra kwargs. Currently unused.
"""
if not use_resnet_normalization:
mean, stdev = None, None
if isinstance(mean, tuple):
mean = np.array(mean, dtype=np.float32).reshape((1, 1, len(mean)))
if isinstance(stdev, tuple):
stdev = np.array(stdev, dtype=np.float32).reshape((1, 1, len(stdev)))
super().__init__(**prepare_locals_for_super(locals()))
class DepthSensor(VisionSensor[EnvType, SubTaskType], ABC):
def __init__(
self,
use_normalization: bool = False,
mean: Optional[Union[np.ndarray, float]] = 0.5,
stdev: Optional[Union[np.ndarray, float]] = 0.25,
height: Optional[int] = None,
width: Optional[int] = None,
uuid: str = "depth",
output_shape: Optional[Tuple[int, ...]] = None,
output_channels: int = 1,
unnormalized_infimum: float = 0.0,
unnormalized_supremum: float = 5.0,
scale_first: bool = True,
**kwargs: Any
):
"""Initializer.
# Parameters
config : If `config["use_normalization"]` is `True` then the depth images will be normalized
with mean 0.5 and standard deviation 0.25. If both `config["height"]` and `config["width"]` are
non-negative integers then the depth image returned from the environment will be rescaled to have shape
(config["height"], config["width"]) using bilinear sampling.
use_normalization : Whether to apply image normalization with the given `mean` and `stdev`.
mean : The images will be normalized with the given mean if `use_normalization` is True (default 0.5).
stdev : The images will be normalized with the given standard deviation if `use_normalization` is True
(default 0.25).
height: If it's a non-negative integer and `width` is also non-negative integer, the image returned from the
environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
width: If it's a non-negative integer and `height` is also non-negative integer, the image returned from the
environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
uuid: The universally unique identifier for the sensor.
output_shape: Optional observation space shape (alternative to `output_channels`).
output_channels: Optional observation space number of channels (alternative to `output_shape`).
unnormalized_infimum: Lower limit(s) for the observation space range.
unnormalized_supremum: Upper limit(s) for the observation space range.
scale_first: Whether to scale image before normalization (if needed).
kwargs : Extra kwargs. Currently unused.
"""
if not use_normalization:
mean, stdev = None, None
if isinstance(mean, float):
mean = np.array(mean, dtype=np.float32).reshape(1, 1)
if isinstance(stdev, float):
stdev = np.array(stdev, dtype=np.float32).reshape(1, 1)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation( # type: ignore
self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any
) -> Any:
depth = super().get_observation(env, task, *args, **kwargs)
depth = np.expand_dims(depth, 2)
return depth
================================================
FILE: allenact/embodiedai/storage/__init__.py
================================================
================================================
FILE: allenact/embodiedai/storage/vdr_storage.py
================================================
import math
import random
from collections import defaultdict
from typing import Union, Tuple, Optional, Dict, Callable, cast, Sequence
import torch
import torch.nn.functional as F
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.algorithms.onpolicy_sync.storage import (
MiniBatchStorageMixin,
ExperienceStorage,
)
from allenact.base_abstractions.misc import (
GenericAbstractLoss,
ModelType,
Memory,
LossOutput,
)
from allenact.utils.misc_utils import unzip, partition_sequence
def _index_recursive(d: Dict, key: Union[str, Tuple[str, ...]]):
if isinstance(key, str):
return d[key]
for k in key:
d = d[k]
return d
class InverseDynamicsVDRLoss(GenericAbstractLoss):
def __init__(
self,
compute_action_logits_fn: Callable,
img0_key: str,
img1_key: str,
action_key: str,
):
self.compute_action_logits_fn = compute_action_logits_fn
self.img0_key = img0_key
self.img1_key = img1_key
self.action_key = action_key
def loss(
self,
*,
model: ModelType,
batch: ObservationType,
batch_memory: Memory,
stream_memory: Memory,
) -> LossOutput:
action_logits = self.compute_action_logits_fn(
model=model,
img0=batch[self.img0_key],
img1=batch[self.img1_key],
)
loss = F.cross_entropy(action_logits, target=batch[self.action_key])
return LossOutput(
value=loss,
info={"cross_entropy": loss.item()},
per_epoch_info={},
batch_memory=batch_memory,
stream_memory=stream_memory,
bsize=int(batch[self.img0_key].shape[0]),
)
class DiscreteVisualDynamicsReplayStorage(ExperienceStorage, MiniBatchStorageMixin):
def __init__(
self,
image_uuid: Union[str, Tuple[str, ...]],
action_success_uuid: Optional[Union[str, Tuple[str, ...]]],
nactions: int,
num_to_store_per_action: int,
max_to_save_per_episode: int,
target_batch_size: int,
extra_targets: Optional[Sequence] = None,
):
self.image_uuid = image_uuid
self.action_success_uuid = action_success_uuid
self.nactions = nactions
self.num_to_store_per_action = num_to_store_per_action
self.max_to_save_per_episode = max_to_save_per_episode
self.target_batch_size = target_batch_size
self.extra_targets = extra_targets if extra_targets is not None else []
self._prev_imgs: Optional[torch.Tensor] = None
self.action_to_saved_transitions = {i: [] for i in range(nactions)}
self.action_to_num_seen = {i: 0 for i in range(nactions)}
self.task_sampler_to_actions_already_sampled = defaultdict(lambda: set())
self.device = torch.device("cpu")
self._total_samples_returned_in_batches = 0
@property
def total_experiences(self):
return self._total_samples_returned_in_batches
def set_partition(self, index: int, num_parts: int):
self.num_to_store_per_action = math.ceil(
self.num_to_store_per_action / num_parts
)
self.target_batch_size = math.ceil(self.target_batch_size / num_parts)
def initialize(self, *, observations: ObservationType, **kwargs):
self._prev_imgs = None
self.add(observations=observations, actions=None, masks=None)
def batched_experience_generator(self, num_mini_batch: int):
triples = [
(i0, a, i1)
for a, v in self.action_to_saved_transitions.items()
for (i0, i1) in v
]
random.shuffle(triples)
if len(triples) == 0:
return
parts = partition_sequence(
triples, math.ceil(len(triples) / self.target_batch_size)
)
for part in parts:
img0s, actions, img1s = unzip(part, n=3)
img0 = torch.stack([i0.to(self.device) for i0 in img0s], 0)
action = torch.tensor(actions, device=self.device)
img1 = torch.stack([i1.to(self.device) for i1 in img1s], 0)
self._total_samples_returned_in_batches += img0.shape[0]
yield {"img0": img0, "action": action, "img1": img1}
def add(
self,
*,
observations: ObservationType,
actions: Optional[torch.Tensor],
masks: Optional[torch.Tensor],
**kwargs,
):
cur_imgs = cast(
torch.Tensor, _index_recursive(d=observations, key=self.image_uuid).cpu()
)
if self._prev_imgs is not None:
actions = actions.view(-1).cpu().numpy()
masks = masks.view(-1).cpu().numpy()
if self.action_success_uuid is not None:
action_successes = (
observations[self.action_success_uuid].cpu().view(-1).numpy()
)
else:
action_successes = [True] * actions.shape[0]
extra = {}
for et in self.extra_targets:
extra[et] = observations[et][0].cpu().numpy()
nsamplers = actions.shape[0]
assert nsamplers == masks.shape[0]
for i, (a, m, action_success) in enumerate(
zip(actions, masks, action_successes)
):
actions_already_sampled_in_ep = (
self.task_sampler_to_actions_already_sampled[i]
)
if (
m != 0
and action_success
and (
len(actions_already_sampled_in_ep)
<= self.max_to_save_per_episode
)
and a not in actions_already_sampled_in_ep
): # Not the start of a new episode/task -> self._prev_imgs[i] corresponds to cur_imgs[i]
saved_transitions = self.action_to_saved_transitions[a]
if len(saved_transitions) < self.num_to_store_per_action:
saved_transitions.append((self._prev_imgs[i], cur_imgs[i]))
else:
saved_transitions[
random.randint(0, len(saved_transitions) - 1)
] = (
self._prev_imgs[i],
cur_imgs[i],
)
# Reservoir sampling transitions
# a = int(a)
# saved_transitions = self.action_to_saved_transitions[a]
# num_seen = self.action_to_num_seen[a]
# if num_seen < self.triples_to_save_per_action:
# saved_transitions.append((self._prev_imgs[i], cur_imgs[i]))
# else:
# index = random.randint(0, num_seen)
# if index < self.triples_to_save_per_action:
# saved_transitions[index] = (self._prev_imgs[i], cur_imgs[i])
actions_already_sampled_in_ep.add(a)
self.action_to_num_seen[a] += 1
else:
actions_already_sampled_in_ep.clear()
self._prev_imgs = cur_imgs
def before_updates(self, **kwargs):
pass
def after_updates(self, **kwargs):
pass
def to(self, device: torch.device):
self.device = device
================================================
FILE: allenact/main.py
================================================
"""Entry point to training/validating/testing for a user given experiment
name."""
import os
if "CUDA_DEVICE_ORDER" not in os.environ:
# Necessary to order GPUs correctly in some cases
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
import argparse
import ast
import importlib
import inspect
import json
from typing import Dict, List, Optional, Tuple, Type
from setproctitle import setproctitle as ptitle
from allenact import __version__
from allenact.algorithms.onpolicy_sync.runner import (
CONFIG_KWARGS_STR,
OnPolicyRunner,
SaveDirFormat,
)
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.utils.system import HUMAN_LOG_LEVELS, get_logger, init_logging
def get_argument_parser():
"""Creates the argument parser."""
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(
description="allenact",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"experiment",
type=str,
help="the path to experiment config file relative the 'experiment_base' directory"
" (see the `--experiment_base` flag).",
)
parser.add_argument(
"--eval",
dest="eval",
action="store_true",
required=False,
help="if you pass the `--eval` flag, AllenAct will run inference on your experiment configuration."
" You will need to specify which experiment checkpoints to run evaluation using the `--checkpoint`"
" flag.",
)
parser.set_defaults(eval=False)
parser.add_argument(
"--config_kwargs",
type=str,
default=None,
required=False,
help="sometimes it is useful to be able to pass additional key-word arguments"
" to `__init__` when initializing an experiment configuration. This flag can be used"
" to pass such key-word arugments by specifying them with json, e.g."
'\n\t--config_kwargs \'{"gpu_id": 0, "my_important_variable": [1,2,3]}\''
"\nTo see which arguments are supported for your experiment see the experiment"
" config's `__init__` function. If the value passed to this function is a file path"
" then we will try to load this file path as a json object and use this json object"
" as key-word arguments.",
)
parser.add_argument(
"--extra_tag",
type=str,
default="",
required=False,
help="Add an extra tag to the experiment when trying out new ideas (will be used"
" as a subdirectory of the tensorboard path so you will be able to"
" search tensorboard logs using this extra tag). This can also be used to add an extra"
" organization when running evaluation (e.g. `--extra_tag running_eval_on_great_idea_12`)",
)
parser.add_argument(
"-o",
"--output_dir",
required=False,
type=str,
default="experiment_output",
help="experiment output folder",
)
parser.add_argument(
"--save_dir_fmt",
required=False,
type=lambda s: SaveDirFormat[s.upper()],
default="flat",
help="The file structure to use when saving results from allenact."
" See documentation o f`SaveDirFormat` for more details."
" Allowed values are ('flat' and 'nested'). Default: 'flat'.",
)
parser.add_argument(
"-s",
"--seed",
required=False,
default=None,
type=int,
help="random seed",
)
parser.add_argument(
"-b",
"--experiment_base",
required=False,
default=os.getcwd(),
type=str,
help="experiment configuration base folder (default: working directory)",
)
parser.add_argument(
"-c",
"--checkpoint",
required=False,
default=None,
type=str,
help="optional checkpoint file name to resume training on or run testing with. When testing (see the `--eval` flag) this"
" argument can be used very flexibly as:"
"\n(1) the path to a particular individual checkpoint file,"
"\n(2) the path to a directory of checkpoint files all of which you'd like to be evaluated"
" (checkpoints are expected to have a `.pt` file extension),"
'\n(3) a "glob" pattern (https://tldp.org/LDP/abs/html/globbingref.html) that will be expanded'
" using python's `glob.glob` function and should return a collection of checkpoint files."
"\nIf you'd like to only evaluate a subset of the checkpoints specified by the above directory/glob"
" (e.g. every checkpoint saved after 5mil steps) you'll likely want to use the `--approx_ckpt_step_interval`"
" flag.",
)
parser.add_argument(
"--infer_output_dir",
dest="infer_output_dir",
action="store_true",
required=False,
help="applied when evaluating checkpoint(s) in nested save_dir_fmt: if specified, the output dir will be inferred from checkpoint path.",
)
parser.add_argument(
"--approx_ckpt_step_interval",
required=False,
default=None,
type=float,
help="if running tests on a collection of checkpoints (see the `--checkpoint` flag) this argument can be"
" used to skip checkpoints. In particular, if this value is specified and equals `n` then we will"
" only evaluate checkpoints whose step count is closest to each of `0*n`, `1*n`, `2*n`, `3*n`, ... "
" n * ceil(max training steps in ckpts / n). Note that 'closest to' is important here as AllenAct does"
" not generally save checkpoints at exact intervals (doing so would result in performance degregation"
" in distributed training).",
)
parser.add_argument(
"-r",
"--restart_pipeline",
dest="restart_pipeline",
action="store_true",
required=False,
help="for training, if checkpoint is specified, DO NOT continue the training pipeline from where"
" training had previously ended. Instead restart the training pipeline from scratch but"
" with the model weights from the checkpoint.",
)
parser.set_defaults(restart_pipeline=False)
parser.add_argument(
"-d",
"--deterministic_cudnn",
dest="deterministic_cudnn",
action="store_true",
required=False,
help="sets CuDNN to deterministic mode",
)
parser.set_defaults(deterministic_cudnn=False)
parser.add_argument(
"-m",
"--max_sampler_processes_per_worker",
required=False,
default=None,
type=int,
help="maximal number of sampler processes to spawn for each worker",
)
parser.add_argument(
"-e",
"--deterministic_agents",
dest="deterministic_agents",
action="store_true",
required=False,
help="enable deterministic agents (i.e. always taking the mode action) during validation/testing",
)
parser.set_defaults(deterministic_agents=False)
parser.add_argument(
"-l",
"--log_level",
default="info",
type=str,
required=False,
help="sets the log_level. it must be one of {}.".format(
", ".join(HUMAN_LOG_LEVELS)
),
)
parser.add_argument(
"-i",
"--disable_tensorboard",
dest="disable_tensorboard",
action="store_true",
required=False,
help="disable tensorboard logging",
)
parser.set_defaults(disable_tensorboard=False)
parser.add_argument(
"-a",
"--disable_config_saving",
dest="disable_config_saving",
action="store_true",
required=False,
help="disable saving the used config in the output directory",
)
parser.set_defaults(disable_config_saving=False)
parser.add_argument(
"--collect_valid_results",
dest="collect_valid_results",
action="store_true",
required=False,
help="enables returning and saving valid results during training",
)
parser.set_defaults(collect_valid_results=False)
parser.add_argument(
"--valid_on_initial_weights",
dest="valid_on_initial_weights",
action="store_true",
required=False,
help="enables running validation on the model with initial weights",
)
parser.set_defaults(valid_on_initial_weights=False)
parser.add_argument(
"--test_expert",
dest="test_expert",
action="store_true",
required=False,
help="use expert during test",
)
parser.set_defaults(test_expert=False)
parser.add_argument(
"--version", action="version", version=f"allenact {__version__}"
)
parser.add_argument(
"--distributed_ip_and_port",
dest="distributed_ip_and_port",
required=False,
type=str,
default="127.0.0.1:0",
help="IP address and port of listener for distributed process with rank 0."
" Port number 0 lets runner choose a free port. For more details, please follow the"
" tutorial https://allenact.org/tutorials/distributed-objectnav-tutorial/.",
)
parser.add_argument(
"--machine_id",
dest="machine_id",
required=False,
type=int,
default=0,
help="ID for machine in distributed runs. For more details, please follow the"
" tutorial https://allenact.org/tutorials/distributed-objectnav-tutorial/",
)
parser.add_argument(
"--save_ckpt_at_every_host",
dest="save_ckpt_at_every_host",
action="store_true",
required=False,
help="if you pass the `--save_ckpt_at_every_host` flag, AllenAct will save checkpoints at every host as the"
" the training progresses in distributed training mode.",
)
parser.set_defaults(save_ckpt_at_every_host=False)
parser.add_argument(
"--callbacks",
dest="callbacks",
required=False,
type=str,
default="",
help="Comma-separated list of files with Callback classes to use.",
)
parser.add_argument(
"--enable_crash_recovery",
dest="enable_crash_recovery",
default=False,
action="store_true",
required=False,
help="Whether or not to try recovering when a task crashes (use at your own risk).",
)
### DEPRECATED FLAGS
parser.add_argument(
"-t",
"--test_date",
default=None,
type=str,
required=False,
help="`--test_date` has been deprecated. Please use `--eval` instead.",
)
parser.add_argument(
"--approx_ckpt_steps_count",
required=False,
default=None,
type=float,
help="`--approx_ckpt_steps_count` has been deprecated."
" Please specify the checkpoint directly using the '--checkpoint' flag.",
)
parser.add_argument(
"-k",
"--skip_checkpoints",
required=False,
default=0,
type=int,
help="`--skip_checkpoints` has been deprecated. Please use `--approx_ckpt_steps_count` instead.",
)
### END DEPRECATED FLAGS
return parser
def get_args():
"""Creates the argument parser and parses any input arguments."""
parser = get_argument_parser()
args = parser.parse_args()
# check for deprecated
deprecated_flags = ["test_date", "skip_checkpoints", "approx_ckpt_steps_count"]
for df in deprecated_flags:
df_info = parser._option_string_actions[f"--{df}"]
if getattr(args, df) is not df_info.default:
raise RuntimeError(df_info.help)
return args
def _config_source(config_type: Type) -> Dict[str, str]:
if config_type is ExperimentConfig:
return {}
try:
module_file_path = inspect.getfile(config_type)
module_dot_path = config_type.__module__
sources_dict = {module_file_path: module_dot_path}
for super_type in config_type.__bases__:
sources_dict.update(_config_source(super_type))
return sources_dict
except TypeError as _:
return {}
def find_sub_modules(path: str, module_list: Optional[List] = None):
if module_list is None:
module_list = []
path = os.path.abspath(path)
if path[-3:] == ".py":
module_list.append(path)
elif os.path.isdir(path):
contents = os.listdir(path)
if any(key in contents for key in ["__init__.py", "setup.py"]):
new_paths = [os.path.join(path, f) for f in os.listdir(path)]
for new_path in new_paths:
find_sub_modules(new_path, module_list)
return module_list
def load_config(args) -> Tuple[ExperimentConfig, Dict[str, str]]:
assert os.path.exists(
args.experiment_base
), "The path '{}' does not seem to exist (your current working directory is '{}').".format(
args.experiment_base, os.getcwd()
)
rel_base_dir = os.path.relpath( # Normalizing string representation of path
os.path.abspath(args.experiment_base), os.getcwd()
)
rel_base_dot_path = rel_base_dir.replace("/", ".")
if rel_base_dot_path == ".":
rel_base_dot_path = ""
exp_dot_path = args.experiment
if exp_dot_path[-3:] == ".py":
exp_dot_path = exp_dot_path[:-3]
exp_dot_path = exp_dot_path.replace("/", ".")
module_path = (
f"{rel_base_dot_path}.{exp_dot_path}"
if len(rel_base_dot_path) != 0
else exp_dot_path
)
try:
importlib.invalidate_caches()
module = importlib.import_module(module_path)
except ModuleNotFoundError as e:
if not any(isinstance(arg, str) and module_path in arg for arg in e.args):
raise e
all_sub_modules = set(find_sub_modules(os.getcwd()))
desired_config_name = module_path.split(".")[-1]
relevant_submodules = [
sm for sm in all_sub_modules if desired_config_name in os.path.basename(sm)
]
raise ModuleNotFoundError(
f"Could not import experiment '{module_path}', are you sure this is the right path?"
f" Possibly relevant files include {relevant_submodules}."
f" Note that the experiment must be reachable along your `PYTHONPATH`, it might"
f" be helpful for you to run `export PYTHONPATH=$PYTHONPATH:$PWD` in your"
f" project's top level directory."
) from e
experiments = [
m[1]
for m in inspect.getmembers(module, inspect.isclass)
if m[1].__module__ == module.__name__ and issubclass(m[1], ExperimentConfig)
]
assert (
len(experiments) == 1
), "Too many or two few experiments defined in {}".format(module_path)
config_kwargs = {}
if args.config_kwargs is not None:
if os.path.exists(args.config_kwargs):
with open(args.config_kwargs, "r") as f:
config_kwargs = json.load(f)
else:
try:
config_kwargs = json.loads(args.config_kwargs)
except json.JSONDecodeError:
get_logger().warning(
f"The input for --config_kwargs ('{args.config_kwargs}')"
f" does not appear to be valid json. Often this is due to"
f" json requiring very specific syntax (e.g. double quoted strings)"
f" we'll try to get around this by evaluating with `ast.literal_eval`"
f" (a safer version of the standard `eval` function)."
)
config_kwargs = ast.literal_eval(args.config_kwargs)
assert isinstance(
config_kwargs, Dict
), "`--config_kwargs` must be a json string (or a path to a .json file) that evaluates to a dictionary."
config = experiments[0](**config_kwargs)
sources = _config_source(config_type=experiments[0])
sources[CONFIG_KWARGS_STR] = json.dumps(config_kwargs)
return config, sources
def main():
args = get_args()
init_logging(args.log_level)
get_logger().info("Running with args {}".format(args))
ptitle("Master: {}".format("Training" if args.eval is None else "Evaluation"))
cfg, srcs = load_config(args)
if not args.eval:
OnPolicyRunner(
config=cfg,
output_dir=args.output_dir,
save_dir_fmt=args.save_dir_fmt,
loaded_config_src_files=srcs,
seed=args.seed,
mode="train",
deterministic_cudnn=args.deterministic_cudnn,
deterministic_agents=args.deterministic_agents,
extra_tag=args.extra_tag,
disable_tensorboard=args.disable_tensorboard,
disable_config_saving=args.disable_config_saving,
distributed_ip_and_port=args.distributed_ip_and_port,
machine_id=args.machine_id,
callbacks_paths=args.callbacks,
).start_train(
checkpoint=args.checkpoint,
restart_pipeline=args.restart_pipeline,
max_sampler_processes_per_worker=args.max_sampler_processes_per_worker,
collect_valid_results=args.collect_valid_results,
valid_on_initial_weights=args.valid_on_initial_weights,
try_restart_after_task_error=args.enable_crash_recovery,
save_ckpt_at_every_host=save_ckpt_at_every_host,
)
else:
OnPolicyRunner(
config=cfg,
output_dir=args.output_dir,
save_dir_fmt=args.save_dir_fmt,
loaded_config_src_files=srcs,
seed=args.seed,
mode="test",
deterministic_cudnn=args.deterministic_cudnn,
deterministic_agents=args.deterministic_agents,
extra_tag=args.extra_tag,
disable_tensorboard=args.disable_tensorboard,
disable_config_saving=args.disable_config_saving,
distributed_ip_and_port=args.distributed_ip_and_port,
machine_id=args.machine_id,
callbacks_paths=args.callbacks,
).start_test(
checkpoint_path_dir_or_pattern=args.checkpoint,
infer_output_dir=args.infer_output_dir,
approx_ckpt_step_interval=args.approx_ckpt_step_interval,
max_sampler_processes_per_worker=args.max_sampler_processes_per_worker,
inference_expert=args.test_expert,
)
if __name__ == "__main__":
main()
================================================
FILE: allenact/setup.py
================================================
import os
from pathlib import Path
from setuptools import find_packages, setup
def parse_req_file(fname, initial=None):
"""Reads requires.txt file generated by setuptools and outputs a
new/updated dict of extras as keys and corresponding lists of dependencies
as values.
The input file's contents are similar to a `ConfigParser` file, e.g.
pkg_1
pkg_2
pkg_3
[extras1]
pkg_4
pkg_5
[extras2]
pkg_6
pkg_7
"""
reqs = {} if initial is None else initial
cline = None
with open(fname, "r") as f:
for line in f.readlines():
line = line[:-1].strip()
if len(line) == 0:
continue
if line[0] == "[":
# Add new key for current extras (if missing in dict)
cline = line[1:-1].strip()
if cline not in reqs:
reqs[cline] = []
else:
# Only keep dependencies from extras
if cline is not None:
reqs[cline].append(line)
return reqs
def get_version(fname):
"""Reads PKG-INFO file generated by setuptools and extracts the Version
number."""
res = "UNK"
with open(fname, "r") as f:
for line in f.readlines():
line = line[:-1]
if line.startswith("Version:"):
res = line.replace("Version:", "").strip()
break
if res in ["UNK", ""]:
raise ValueError(f"Missing Version number in {fname}")
return res
def _do_setup():
base_dir = os.path.abspath(os.path.dirname(Path(__file__)))
if not os.path.exists(
os.path.join(base_dir, "allenact.egg-info/dependency_links.txt")
):
# Build mode for sdist
os.chdir(os.path.join(base_dir, ".."))
with open(".VERSION", "r") as f:
__version__ = f.readline().strip()
# Extra dependencies for development (actually unnecessary)
extras = {
"dev": [
l.strip()
for l in open("dev_requirements.txt", "r").readlines()
if l.strip() != ""
]
}
else:
# Install mode from sdist
__version__ = get_version(os.path.join(base_dir, "allenact.egg-info/PKG-INFO"))
extras = parse_req_file(
os.path.join(base_dir, "allenact.egg-info/requires.txt")
)
setup(
name="allenact",
version=__version__,
description="AllenAct framework",
long_description=(
"AllenAct is a modular and flexible learning framework designed with"
" a focus on the unique requirements of Embodied-AI research."
),
classifiers=[
"Intended Audience :: Science/Research",
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: MIT License",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Programming Language :: Python",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
],
keywords=["reinforcement learning", "embodied-AI", "AI", "RL", "SLAM"],
url="https://github.com/allenai/allenact",
author="Allen Institute for Artificial Intelligence",
author_email="lucaw@allenai.org",
license="MIT",
packages=find_packages(include=["allenact", "allenact.*"]),
install_requires=[
"gym==0.17.*", # Newer versions of gym are now broken with updates to setuptools
"torch>=1.6.0,!=1.8.0",
"torchvision>=0.7.0,<=0.16.2",
"tensorboardx>=2.1",
"setproctitle",
"moviepy>=1.0.3",
"filelock",
"numpy>=1.19.1",
"Pillow>=8.2.0,<10.3.0",
"matplotlib>=3.3.1",
"networkx",
"opencv-python",
"wheel>=0.36.2",
"attrs>=21.4.0",
"scipy>=1.5.4",
],
setup_requires=["pytest-runner"],
tests_require=["pytest", "pytest-cov", "compress_pickle"],
entry_points={"console_scripts": ["allenact=allenact.main:main"]},
extras_require=extras,
)
if __name__ == "__main__":
_do_setup()
================================================
FILE: allenact/utils/__init__.py
================================================
================================================
FILE: allenact/utils/cache_utils.py
================================================
import math
from typing import Dict, Any, Union, Callable, Optional
from allenact.utils.system import get_logger
def pos_to_str_for_cache(pos: Dict[str, float]) -> str:
return "_".join([str(pos["x"]), str(pos["y"]), str(pos["z"])])
def str_to_pos_for_cache(s: str) -> Dict[str, float]:
split = s.split("_")
return {"x": float(split[0]), "y": float(split[1]), "z": float(split[2])}
def get_distance(
cache: Dict[str, Any], pos: Dict[str, float], target: Dict[str, float]
) -> float:
pos = {
"x": 0.25 * math.ceil(pos["x"] / 0.25),
"y": pos["y"],
"z": 0.25 * math.ceil(pos["z"] / 0.25),
}
sp = _get_shortest_path_distance_from_cache(cache, pos, target)
if sp == -1.0:
pos = {
"x": 0.25 * math.floor(pos["x"] / 0.25),
"y": pos["y"],
"z": 0.25 * math.ceil(pos["z"] / 0.25),
}
sp = _get_shortest_path_distance_from_cache(cache, pos, target)
if sp == -1.0:
pos = {
"x": 0.25 * math.ceil(pos["x"] / 0.25),
"y": pos["y"],
"z": 0.25 * math.floor(pos["z"] / 0.25),
}
sp = _get_shortest_path_distance_from_cache(cache, pos, target)
if sp == -1.0:
pos = {
"x": 0.25 * math.floor(pos["x"] / 0.25),
"y": pos["y"],
"z": 0.25 * math.floor(pos["z"] / 0.25),
}
sp = _get_shortest_path_distance_from_cache(cache, pos, target)
if sp == -1.0:
pos = find_nearest_point_in_cache(cache, pos)
sp = _get_shortest_path_distance_from_cache(cache, pos, target)
if sp == -1.0:
target = find_nearest_point_in_cache(cache, target)
sp = _get_shortest_path_distance_from_cache(cache, pos, target)
if sp == -1.0:
print("Your cache is incomplete!")
exit()
return sp
def get_distance_to_object(
cache: Dict[str, Any], pos: Dict[str, float], target_class: str
) -> float:
dists = []
weights = []
for rounder_func_0 in [math.ceil, math.floor]:
for rounder_func_1 in [math.ceil, math.floor]:
rounded_pos = {
"x": 0.25 * rounder_func_0(pos["x"] / 0.25),
"y": pos["y"],
"z": 0.25 * rounder_func_1(pos["z"] / 0.25),
}
dist = _get_shortest_path_distance_to_object_from_cache(
cache, rounded_pos, target_class
)
if dist >= 0:
dists.append(dist)
weights.append(
1.0
/ (
math.sqrt(
(pos["x"] - rounded_pos["x"]) ** 2
+ (pos["z"] - rounded_pos["z"]) ** 2
)
+ 1e6
)
)
if len(dists) == 0:
raise RuntimeError("Your cache is incomplete!")
total_weight = sum(weights)
weights = [w / total_weight for w in weights]
return sum(d * w for d, w in zip(dists, weights))
def _get_shortest_path_distance_from_cache(
cache: Dict[str, Any], position: Dict[str, float], target: Dict[str, float]
) -> float:
try:
return cache[pos_to_str_for_cache(position)][pos_to_str_for_cache(target)][
"distance"
]
except KeyError:
return -1.0
def _get_shortest_path_distance_to_object_from_cache(
cache: Dict[str, Any], position: Dict[str, float], target_class: str
) -> float:
try:
return cache[pos_to_str_for_cache(position)][target_class]["distance"]
except KeyError:
return -1.0
def find_nearest_point_in_cache(
cache: Dict[str, Any], point: Dict[str, float]
) -> Dict[str, float]:
best_delta = float("inf")
closest_point: Dict[str, float] = {}
for p in cache:
pos = str_to_pos_for_cache(p)
delta = (
abs(point["x"] - pos["x"])
+ abs(point["y"] - pos["y"])
+ abs(point["z"] - pos["z"])
)
if delta < best_delta:
best_delta = delta
closest_point = pos
return closest_point
class DynamicDistanceCache(object):
def __init__(self, rounding: Optional[int] = None):
self.cache: Dict[str, Any] = {}
self.rounding = rounding
self.hits = 0
self.misses = 0
self.num_accesses = 0
def find_distance(
self,
scene_name: str,
position: Dict[str, Any],
target: Union[Dict[str, Any], str],
native_distance_function: Callable[
[Dict[str, Any], Union[Dict[str, Any], str]], float
],
) -> float:
# Convert the position to its rounded string representation
position_str = scene_name + self._pos_to_str(position)
# If the target is also a position, convert it to its rounded string representation
if isinstance(target, str):
target_str = target
else:
target_str = self._pos_to_str(target)
if position_str not in self.cache:
self.cache[position_str] = {}
if target_str not in self.cache[position_str]:
self.cache[position_str][target_str] = native_distance_function(
position, target
)
self.misses += 1
else:
self.hits += 1
self.num_accesses += 1
if self.num_accesses % 1000 == 0:
get_logger().debug("Cache Miss-Hit Ratio: %.4f" % (self.misses / self.hits))
return self.cache[position_str][target_str]
def invalidate(self):
self.cache = []
def _pos_to_str(self, pos: Dict[str, Any]) -> str:
if self.rounding:
pos = {k: round(v, self.rounding) for k, v in pos.items()}
return str(pos)
================================================
FILE: allenact/utils/cacheless_frcnn.py
================================================
from typing import List, Any
import torch
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.faster_rcnn import FasterRCNN
# noinspection PyProtectedMember
from torchvision.models.detection.faster_rcnn import model_urls
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.utils import load_state_dict_from_url
class CachelessAnchorGenerator(AnchorGenerator):
def forward(self, image_list: Any, feature_maps: Any):
grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])
image_size = image_list.tensors.shape[-2:]
strides = [
[int(image_size[0] / g[0]), int(image_size[1] / g[1])] for g in grid_sizes
]
dtype, device = feature_maps[0].dtype, feature_maps[0].device
self.set_cell_anchors(dtype, device)
anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides)
anchors = torch.jit.annotate(List[List[torch.Tensor]], []) # type:ignore
for i, (image_height, image_width) in enumerate(image_list.image_sizes):
anchors_in_image = []
for anchors_per_feature_map in anchors_over_all_feature_maps:
anchors_in_image.append(anchors_per_feature_map)
anchors.append(anchors_in_image)
anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
return anchors
def fasterrcnn_resnet50_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs
):
if pretrained:
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet_fpn_backbone("resnet50", pretrained_backbone)
anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios)
model = FasterRCNN(
backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs
)
# min_size = 300
# max_size = 400
# anchor_sizes = ((12,), (24,), (48,), (96,), (192,))
# aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
# rpn_anchor_generator = CachelessAnchorGenerator(
# anchor_sizes, aspect_ratios
# )
# model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(
model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress
)
model.load_state_dict(state_dict)
return model
================================================
FILE: allenact/utils/experiment_utils.py
================================================
"""Utility classes and functions for running and designing experiments."""
import abc
import collections.abc
import copy
import numbers
import random
from collections import OrderedDict, defaultdict
from typing import (
Callable,
NamedTuple,
Dict,
Any,
Union,
Iterator,
Optional,
List,
cast,
Sequence,
TypeVar,
Generic,
Tuple,
)
import attr
import numpy as np
import torch
import torch.optim as optim
import wandb
import shutil
from allenact.algorithms.offpolicy_sync.losses.abstract_offpolicy_loss import Memory
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.storage import (
ExperienceStorage,
RolloutStorage,
RolloutBlockStorage,
)
from allenact.base_abstractions.misc import Loss, GenericAbstractLoss
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact.utils.system import get_logger
try:
# noinspection PyProtectedMember,PyUnresolvedReferences
from torch.optim.lr_scheduler import _LRScheduler
except (ImportError, ModuleNotFoundError):
raise ImportError("`_LRScheduler` was not found in `torch.optim.lr_scheduler`")
_DEFAULT_ONPOLICY_UUID = "onpolicy"
def evenly_distribute_count_into_bins(count: int, nbins: int) -> List[int]:
"""Distribute a count into a number of bins.
# Parameters
count: A positive integer to be distributed, should be `>= nbins`.
nbins: The number of bins.
# Returns
A list of positive integers which sum to `count`. These values will be
as close to equal as possible (may differ by at most 1).
"""
assert count >= nbins, f"count ({count}) < nbins ({nbins})"
res = [0] * nbins
for it in range(count):
res[it % nbins] += 1
return res
def recursive_update(
original: Union[Dict, collections.abc.MutableMapping],
update: Union[Dict, collections.abc.MutableMapping],
):
"""Recursively updates original dictionary with entries form update dict.
# Parameters
original : Original dictionary to be updated.
update : Dictionary with additional or replacement entries.
# Returns
Updated original dictionary.
"""
for k, v in update.items():
if isinstance(v, collections.abc.MutableMapping):
original[k] = recursive_update(original.get(k, {}), v)
else:
original[k] = v
return original
ToBuildType = TypeVar("ToBuildType")
class Builder(tuple, Generic[ToBuildType]):
"""Used to instantiate a given class with (default) parameters.
Helper class that stores a class, default parameters for that
class, and key word arguments that (possibly) overwrite the defaults.
When calling this an object of the Builder class it generates
a class of type `class_type` with parameters specified by
the attributes `default` and `kwargs` (and possibly additional, overwriting,
keyword arguments).
# Attributes
class_type : The class to be instantiated when calling the object.
kwargs : Keyword arguments used to instantiate an object of type `class_type`.
default : Default parameters used when instantiating the class.
"""
class_type: ToBuildType
kwargs: Dict[str, Any]
default: Dict[str, Any]
# noinspection PyTypeChecker
def __new__(
cls,
class_type: ToBuildType,
kwargs: Optional[Dict[str, Any]] = None,
default: Optional[Dict[str, Any]] = None,
):
"""Create a new Builder.
For parameter descriptions see the class documentation. Note
that `kwargs` and `default` can be None in which case they are
set to be empty dictionaries.
"""
self = tuple.__new__(
cls,
(
class_type,
kwargs if kwargs is not None else {},
default if default is not None else {},
),
)
self.class_type = class_type
self.kwargs = self[1]
self.default = self[2]
return self
def __repr__(self) -> str:
return (
f"Group(class_type={self.class_type},"
f" kwargs={self.kwargs},"
f" default={self.default})"
)
def __call__(self, **kwargs) -> ToBuildType:
"""Build and return a new class.
# Parameters
kwargs : additional keyword arguments to use when instantiating
the object. These overwrite all arguments already in the `self.kwargs`
and `self.default` attributes.
# Returns
Class of type `self.class_type` with parameters
taken from `self.default`, `self.kwargs`, and
any keyword arguments additionally passed to `__call__`.
"""
allkwargs = copy.deepcopy(self.default)
recursive_update(allkwargs, self.kwargs)
recursive_update(allkwargs, kwargs)
return cast(Callable, self.class_type)(**allkwargs)
class ScalarMeanTracker(object):
"""Track a collection `scalar key -> mean` pairs."""
def __init__(self) -> None:
self._sums: Dict[str, float] = OrderedDict()
self._counts: Dict[str, int] = OrderedDict()
def add_scalars(
self, scalars: Dict[str, Union[float, int]], n: Union[int, Dict[str, int]] = 1
) -> None:
"""Add additional scalars to track.
# Parameters
scalars : A dictionary of `scalar key -> value` pairs.
"""
ndict = cast(
Dict[str, int], (n if isinstance(n, Dict) else defaultdict(lambda: n)) # type: ignore
)
for k in scalars:
if k not in self._sums:
self._sums[k] = ndict[k] * scalars[k]
self._counts[k] = ndict[k]
else:
self._sums[k] += ndict[k] * scalars[k]
self._counts[k] += ndict[k]
def pop_and_reset(self) -> Dict[str, float]:
"""Return tracked means and reset.
On resetting all previously tracked values are discarded.
# Returns
A dictionary of `scalar key -> current mean` pairs corresponding to those
values added with `add_scalars`.
"""
means = OrderedDict(
[(k, float(self._sums[k] / self._counts[k])) for k in self._sums]
)
self.reset()
return means
def reset(self):
self._sums = OrderedDict()
self._counts = OrderedDict()
def sums(self):
return copy.copy(self._sums)
def counts(self) -> Dict[str, int]:
return copy.copy(self._counts)
def means(self) -> Dict[str, float]:
return OrderedDict(
[(k, float(self._sums[k] / self._counts[k])) for k in self._sums]
)
@property
def empty(self):
assert len(self._sums) == len(
self._counts
), "Mismatched length of _sums {} and _counts {}".format(
len(self._sums), len(self._counts)
)
return len(self._sums) == 0
class LoggingPackage:
"""Data package used for logging."""
def __init__(
self,
mode: str,
training_steps: Optional[int],
storage_uuid_to_total_experiences: Dict[str, int],
pipeline_stage: Optional[int] = None,
checkpoint_file_name: Optional[str] = None,
) -> None:
self.mode = mode
self.training_steps: int = training_steps
self.storage_uuid_to_total_experiences: Dict[str, int] = (
storage_uuid_to_total_experiences
)
self.pipeline_stage = pipeline_stage
self.metrics_tracker = ScalarMeanTracker()
self.info_trackers: Dict[Tuple[str, str], ScalarMeanTracker] = {}
self.metric_dicts: List[Any] = []
self.viz_data: Optional[Dict[str, List[Dict[str, Any]]]] = None
self.checkpoint_file_name: Optional[str] = checkpoint_file_name
self.task_callback_data: List[Any] = []
self.num_empty_metrics_dicts_added: int = 0
@property
def num_non_empty_metrics_dicts_added(self) -> int:
return len(self.metric_dicts)
@staticmethod
def _metrics_dict_is_empty(
single_task_metrics_dict: Dict[str, Union[float, int]]
) -> bool:
return (
len(single_task_metrics_dict) == 0
or (
len(single_task_metrics_dict) == 1
and "task_info" in single_task_metrics_dict
)
or (
"success" in single_task_metrics_dict
and single_task_metrics_dict["success"] is None
)
)
def add_metrics_dict(
self, single_task_metrics_dict: Dict[str, Union[float, int]]
) -> bool:
if self._metrics_dict_is_empty(single_task_metrics_dict):
self.num_empty_metrics_dicts_added += 1
return False
self.metric_dicts.append(single_task_metrics_dict)
self.metrics_tracker.add_scalars(
{k: v for k, v in single_task_metrics_dict.items() if k != "task_info"}
)
return True
def add_info_dict(
self,
info_dict: Dict[str, Union[int, float]],
n: int,
stage_component_uuid: str,
storage_uuid: str,
):
key = (stage_component_uuid, storage_uuid)
if key not in self.info_trackers:
self.info_trackers[key] = ScalarMeanTracker()
assert n >= 0
self.info_trackers[key].add_scalars(scalars=info_dict, n=n)
class LinearDecay(object):
"""Linearly decay between two values over some number of steps.
Obtain the value corresponding to the `i`-th step by calling
an instance of this class with the value `i`.
# Parameters
steps : The number of steps over which to decay.
startp : The starting value.
endp : The ending value.
"""
def __init__(self, steps: int, startp: float = 1.0, endp: float = 0.0) -> None:
"""Initializer.
See class documentation for parameter definitions.
"""
self.steps = steps
self.startp = startp
self.endp = endp
def __call__(self, epoch: int) -> float:
"""Get the decayed value for `epoch` number of steps.
# Parameters
epoch : The number of steps.
# Returns
Decayed value for `epoch` number of steps.
"""
epoch = max(min(epoch, self.steps), 0)
return self.startp + (self.endp - self.startp) * (epoch / float(self.steps))
class MultiLinearDecay(object):
"""Container for multiple stages of LinearDecay.
Obtain the value corresponding to the `i`-th step by calling
an instance of this class with the value `i`.
# Parameters
stages: List of `LinearDecay` objects to be sequentially applied
for the number of steps in each stage.
"""
def __init__(self, stages: Sequence[LinearDecay]) -> None:
"""Initializer.
See class documentation for parameter definitions.
"""
self.stages = stages
self.steps = np.cumsum([stage.steps for stage in self.stages])
self.total_steps = self.steps[-1]
self.stage_idx = -1
self.min_steps = 0
self.max_steps = 0
self.stage = None
def __call__(self, epoch: int) -> float:
"""Get the decayed value factor for `epoch` number of steps.
# Parameters
epoch : The number of steps.
# Returns
Decayed value for `epoch` number of steps.
"""
epoch = max(min(epoch, self.total_steps), 0)
while epoch >= self.max_steps and self.max_steps < self.total_steps:
self.stage_idx += 1
assert self.stage_idx < len(self.stages)
self.min_steps = self.max_steps
self.max_steps = self.steps[self.stage_idx]
self.stage = self.stages[self.stage_idx]
return self.stage(epoch - self.min_steps)
# noinspection PyTypeHints,PyUnresolvedReferences
def set_deterministic_cudnn() -> None:
"""Makes cudnn deterministic.
This may slow down computations.
"""
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True # type: ignore
torch.backends.cudnn.benchmark = False # type: ignore
def set_seed(seed: Optional[int] = None) -> None:
"""Set seeds for multiple (cpu) sources of randomness.
Sets seeds for (cpu) `pytorch`, base `random`, and `numpy`.
# Parameters
seed : The seed to set. If set to None, keep using the current seed.
"""
if seed is None:
return
torch.manual_seed(seed) # seeds the RNG for all devices (CPU and GPUs)
random.seed(seed)
np.random.seed(seed)
class EarlyStoppingCriterion(abc.ABC):
"""Abstract class for class who determines if training should stop early in
a particular pipeline stage."""
@abc.abstractmethod
def __call__(
self,
stage_steps: int,
total_steps: int,
training_metrics: ScalarMeanTracker,
) -> bool:
"""Returns `True` if training should be stopped early.
# Parameters
stage_steps: Total number of steps taken in the current pipeline stage.
total_steps: Total number of steps taken during training so far (includes steps
taken in prior pipeline stages).
training_metrics: Metrics recovered over some fixed number of steps
(see the `metric_accumulate_interval` attribute in the `TrainingPipeline` class)
training.
"""
raise NotImplementedError
class NeverEarlyStoppingCriterion(EarlyStoppingCriterion):
"""Implementation of `EarlyStoppingCriterion` which never stops early."""
def __call__(
self,
stage_steps: int,
total_steps: int,
training_metrics: ScalarMeanTracker,
) -> bool:
return False
class OffPolicyPipelineComponent(NamedTuple):
"""An off-policy component for a PipeLineStage.
# Attributes
data_iterator_builder: A function to instantiate a Data Iterator (with a __next__(self) method)
loss_names: list of unique names assigned to off-policy losses
updates: number of off-policy updates between on-policy rollout collections
loss_weights : A list of floating point numbers describing the relative weights
applied to the losses referenced by `loss_names`. Should be the same length
as `loss_names`. If this is `None`, all weights will be assumed to be one.
data_iterator_kwargs_generator: Optional generator of keyword arguments for data_iterator_builder (useful for
distributed training. It takes
a `cur_worker` int value,
a `rollouts_per_worker` list of number of samplers per training worker,
and an optional random `seed` shared by all workers, which can be None.
"""
data_iterator_builder: Callable[..., Iterator]
loss_names: List[str]
updates: int
loss_weights: Optional[Sequence[float]] = None
data_iterator_kwargs_generator: Callable[
[int, Sequence[int], Optional[int]], Dict
] = lambda cur_worker, rollouts_per_worker, seed: {}
class TrainingSettings:
"""Class defining parameters used for training (within a stage or the
entire pipeline).
# Attributes
num_mini_batch : The number of mini-batches to break a rollout into.
update_repeats : The number of times we will cycle through the mini-batches corresponding
to a single rollout doing gradient updates.
max_grad_norm : The maximum "inf" norm of any gradient step (gradients are clipped to not exceed this).
num_steps : Total number of steps a single agent takes in a rollout.
gamma : Discount factor applied to rewards (should be in [0, 1]).
use_gae : Whether or not to use generalized advantage estimation (GAE).
gae_lambda : The additional parameter used in GAE.
advance_scene_rollout_period: Optional number of rollouts before enforcing an advance scene in all samplers.
save_interval : The frequency with which to save (in total agent steps taken). If `None` then *no*
checkpoints will be saved. Otherwise, in addition to the checkpoints being saved every
`save_interval` steps, a checkpoint will *always* be saved at the end of each pipeline stage.
If `save_interval <= 0` then checkpoints will only be saved at the end of each pipeline stage.
metric_accumulate_interval : The frequency with which training/validation metrics are accumulated
(in total agent steps). Metrics accumulated in an interval are logged (if `should_log` is `True`)
and used by the stage's early stopping criterion (if any).
"""
num_mini_batch: Optional[int]
update_repeats: Optional[Union[int, Sequence[int]]]
max_grad_norm: Optional[float]
num_steps: Optional[int]
gamma: Optional[float]
use_gae: Optional[bool]
gae_lambda: Optional[float]
advance_scene_rollout_period: Optional[int]
save_interval: Optional[int]
metric_accumulate_interval: Optional[int]
# noinspection PyUnresolvedReferences
def __init__(
self,
num_mini_batch: Optional[int] = None,
update_repeats: Optional[int] = None,
max_grad_norm: Optional[float] = None,
num_steps: Optional[int] = None,
gamma: Optional[float] = None,
use_gae: Optional[bool] = None,
gae_lambda: Optional[float] = None,
advance_scene_rollout_period: Optional[int] = None,
save_interval: Optional[int] = None,
metric_accumulate_interval: Optional[int] = None,
):
self._key_to_setting = prepare_locals_for_super(locals(), ignore_kwargs=True)
self._training_setting_keys = tuple(sorted(self._key_to_setting.keys()))
self._defaults: Optional["TrainingSettings"] = None
def keys(self) -> Tuple[str, ...]:
return self._training_setting_keys
def has_key(self, key: str) -> bool:
return key in self._key_to_setting
def set_defaults(self, defaults: "TrainingSettings"):
assert self._defaults is None, "Defaults can only be set once."
self._defaults = defaults
def __getattr__(self, item: str):
if item in self._key_to_setting:
val = self._key_to_setting[item]
if val is None and self._defaults is not None:
val = getattr(self._defaults, item)
return val
else:
super(TrainingSettings, self).__getattribute__(item)
@attr.s(kw_only=True)
class StageComponent:
"""A custom component for a PipelineStage, possibly including overrides to
the `TrainingSettings` from the `TrainingPipeline` and `PipelineStage`.
# Attributes
uuid: the name of this component
storage_uuid: the name of the `ExperienceStorage` that will be used with this component.
loss_names: list of unique names assigned to off-policy losses
training_settings: Instance of `TrainingSettings`
loss_weights : A list of floating point numbers describing the relative weights
applied to the losses referenced by `loss_names`. Should be the same length
as `loss_names`. If this is `None`, all weights will be assumed to be one.
"""
uuid: str = attr.ib()
storage_uuid: str = attr.ib()
loss_names: Sequence[str] = attr.ib()
training_settings: TrainingSettings = attr.ib(
default=attr.Factory(TrainingSettings)
)
@training_settings.validator
def _validate_training_settings(self, attribute, value: TrainingSettings):
must_be_none = [
"num_steps",
"gamma",
"use_gae",
"gae_lambda",
"advance_scene_rollout_period",
"save_interval",
"metric_accumulate_interval",
]
for key in must_be_none:
assert getattr(value, key) is None, (
f"`{key}` must be `None` in `TrainingSettings` passed to"
f" `StageComponent` (as such values will be ignored). Pass such"
f" settings to the `PipelineStage` or `TrainingPipeline` objects instead.",
)
class PipelineStage:
"""A single stage in a training pipeline, possibly including overrides to
the global `TrainingSettings` in `TrainingPipeline`.
# Attributes
loss_name : A collection of unique names assigned to losses. These will
reference the `Loss` objects in a `TrainingPipeline` instance.
max_stage_steps : Either the total number of steps agents should take in this stage or
a Callable object (e.g. a function)
loss_weights : A list of floating point numbers describing the relative weights
applied to the losses referenced by `loss_name`. Should be the same length
as `loss_name`. If this is `None`, all weights will be assumed to be one.
teacher_forcing : If applicable, defines the probability an agent will take the
expert action (as opposed to its own sampled action) at a given time point.
early_stopping_criterion: An `EarlyStoppingCriterion` object which determines if
training in this stage should be stopped early. If `None` then no early stopping
occurs. If `early_stopping_criterion` is not `None` then we do not guarantee
reproducibility when restarting a model from a checkpoint (as the
`EarlyStoppingCriterion` object may store internal state which is not
saved in the checkpoint). Currently, AllenAct only supports using early stopping
criterion when **not** using distributed training.
training_settings: Instance of `TrainingSettings`.
training_settings_kwargs: For backwards compatability: arguments to instantiate TrainingSettings when
`training_settings` is `None`.
"""
def __init__(
self,
*, # Disables positional arguments. Please provide arguments as keyword arguments.
max_stage_steps: Union[int, Callable],
loss_names: List[str],
loss_weights: Optional[Sequence[float]] = None,
teacher_forcing: Optional[Callable[[int], float]] = None,
stage_components: Optional[Sequence[StageComponent]] = None,
early_stopping_criterion: Optional[EarlyStoppingCriterion] = None,
training_settings: Optional[TrainingSettings] = None,
callback_to_change_engine_attributes: Optional[Dict[str, Any]] = None,
**training_settings_kwargs,
):
self.callback_to_change_engine_attributes = callback_to_change_engine_attributes
# Populate TrainingSettings members
# THIS MUST COME FIRST IN `__init__` as otherwise `__getattr__` will loop infinitely.
assert training_settings is None or len(training_settings_kwargs) == 0
if training_settings is None:
training_settings = TrainingSettings(**training_settings_kwargs)
self.training_settings = training_settings
assert self.training_settings.update_repeats is None or isinstance(
self.training_settings.update_repeats, numbers.Integral
), (
"`training_settings` passed to `PipelineStage` must have `training_settings.update_repeats`"
" equal to `None` or an integer. If you'd like to specify per-loss `update_repeats` then please"
" do so in the training settings of a `StageComponent`."
)
self.loss_names = loss_names
self.max_stage_steps = max_stage_steps
self.loss_weights = (
[1.0] * len(loss_names) if loss_weights is None else loss_weights
)
assert len(self.loss_weights) == len(self.loss_names)
self.teacher_forcing = teacher_forcing
self.early_stopping_criterion = early_stopping_criterion
self.steps_taken_in_stage: int = 0
self.rollout_count = 0
self.early_stopping_criterion_met = False
self.uuid_to_loss_weight: Dict[str, float] = {
loss_uuid: loss_weight
for loss_uuid, loss_weight in zip(loss_names, self.loss_weights)
}
self._stage_components: List[StageComponent] = []
self.uuid_to_stage_component: Dict[str, StageComponent] = {}
self.storage_uuid_to_steps_taken_in_stage: Dict[str, int] = {}
self.stage_component_uuid_to_stream_memory: Dict[str, Memory] = {}
if stage_components is not None:
for stage_component in stage_components:
self.add_stage_component(stage_component)
# Sanity check
for key in training_settings.keys():
assert not hasattr(
self, key
), f"`{key}` should be defined in `TrainingSettings`, not in `PipelineStage`."
def reset(self):
self.steps_taken_in_stage: int = 0
self.rollout_count = 0
self.early_stopping_criterion_met = False
for k in self.storage_uuid_to_steps_taken_in_stage:
self.storage_uuid_to_steps_taken_in_stage[k] = 0
for memory in self.stage_component_uuid_to_stream_memory.values():
memory.clear()
# TODO: Replace Any with the correct type
def change_engine_attributes(self, engine: Any):
if self.callback_to_change_engine_attributes is not None:
for key, value in self.callback_to_change_engine_attributes.items():
# check if the engine has the attribute
assert hasattr(engine, key)
func = value["func"]
args = value["args"]
setattr(engine, key, func(engine, **args))
@property
def stage_components(self) -> Tuple[StageComponent]:
return tuple(self._stage_components)
def add_stage_component(self, stage_component: StageComponent):
assert stage_component.uuid not in self.uuid_to_stage_component
# Setting default training settings for the `stage_component`
sc_ts = stage_component.training_settings
sc_ts.set_defaults(self.training_settings)
# Handling the case where different losses should be updated different
# numbers of times
stage_update_repeats = self.training_settings.update_repeats
if stage_update_repeats is not None and sc_ts.update_repeats is None:
loss_to_update_repeats = dict(zip(self.loss_names, stage_update_repeats))
if isinstance(stage_update_repeats, Sequence):
sc_ts.update_repeats = [
loss_to_update_repeats[uuid] for uuid in stage_component.loss_names
]
else:
sc_ts.update_repeats = stage_update_repeats
self._stage_components.append(stage_component)
self.uuid_to_stage_component[stage_component.uuid] = stage_component
if (
stage_component.storage_uuid
not in self.storage_uuid_to_steps_taken_in_stage
):
self.storage_uuid_to_steps_taken_in_stage[stage_component.storage_uuid] = 0
else:
raise NotImplementedError(
"Cannot have multiple stage components which"
f" use the same storage (reused storage uuid: '{stage_component.storage_uuid}'."
)
self.stage_component_uuid_to_stream_memory[stage_component.uuid] = Memory()
def __setattr__(self, key: str, value: Any):
if key not in [
"training_settings",
"callback_to_change_engine_attributes",
] and self.training_settings.has_key(key):
raise NotImplementedError(
f"Cannot set {key} in {self.__name__}, update the"
f" `training_settings` attribute of {self.__name__} instead."
)
else:
return super(PipelineStage, self).__setattr__(key, value)
@property
def is_complete(self):
return (
self.early_stopping_criterion_met
or self.steps_taken_in_stage >= self.max_stage_steps
)
class TrainingPipeline:
"""Class defining the stages (and global training settings) in a training
pipeline.
The training pipeline can be used as an iterator to go through the pipeline
stages in, for instance, a loop.
# Parameters
named_losses : Dictionary mapping a the name of a loss to either an instantiation
of that loss or a `Builder` that, when called, will return that loss.
pipeline_stages : A list of PipelineStages. Each of these define how the agent
will be trained and are executed sequentially.
optimizer_builder : Builder object to instantiate the optimizer to use during training.
named_storages: Map of storage names to corresponding `ExperienceStorage` instances or `Builder` objects.
If this is `None` (or does not contain a value of (sub)type `RolloutStorage`) then a new
`Builder[RolloutBlockStorage]` will be created and added by default.
rollout_storage_uuid: Optional name of `RolloutStorage`, if `None` given, it will be assigned to the
`ExperienceStorage` of subclass `RolloutStorage` in `named_storages`. Note that this assumes that there
is only a single `RolloutStorage` object in the values of `named_storages`.
should_log: `True` if metrics accumulated during training should be logged to the console as well
as to a tensorboard file.
lr_scheduler_builder : Optional builder object to instantiate the learning rate scheduler used
through the pipeline.
training_settings: Instance of `TrainingSettings`
training_settings_kwargs: For backwards compatability: arguments to instantiate TrainingSettings when
`training_settings` is `None`.
"""
# noinspection PyUnresolvedReferences
def __init__(
self,
*,
named_losses: Dict[str, Union[Loss, Builder[Loss]]],
pipeline_stages: List[PipelineStage],
optimizer_builder: Builder[optim.Optimizer], # type: ignore
named_storages: Optional[
Dict[str, Union[ExperienceStorage, Builder[ExperienceStorage]]]
] = None,
rollout_storage_uuid: Optional[str] = None,
should_log: bool = True,
lr_scheduler_builder: Optional[Builder[_LRScheduler]] = None, # type: ignore
training_settings: Optional[TrainingSettings] = None,
valid_pipeline_stage: Optional[PipelineStage] = None,
test_pipeline_stage: Optional[PipelineStage] = None,
**training_settings_kwargs,
):
"""Initializer.
See class docstring for parameter definitions.
"""
# Populate TrainingSettings members
assert training_settings is None or len(training_settings_kwargs) == 0
if training_settings is None:
training_settings = TrainingSettings(**training_settings_kwargs)
self.training_settings = training_settings
assert self.training_settings.update_repeats is None or isinstance(
self.training_settings.update_repeats, numbers.Integral
), (
"`training_settings` passed to `TrainingPipeline` must have `training_settings.update_repeats`"
" equal to `None` or an integer. If you'd like to specify per-loss `update_repeats` then please"
" do so in the training settings of a `StageComponent`."
)
self.training_settings = training_settings
self.optimizer_builder = optimizer_builder
self.lr_scheduler_builder = lr_scheduler_builder
self._named_losses = named_losses
self._named_storages = self._initialize_named_storages(
named_storages=named_storages
)
self.rollout_storage_uuid = self._initialize_rollout_storage_uuid(
rollout_storage_uuid
)
if self.rollout_storage_uuid is None:
get_logger().warning(
f"No rollout storage was specified in the TrainingPipeline. This need not be an issue"
f" if you are performing off-policy training but, otherwise, please ensure you have"
f" defined a rollout storage in the `named_storages` argument of the TrainingPipeline."
)
self.should_log = should_log
self.pipeline_stages = pipeline_stages
def if_none_then_empty_stage(stage: Optional[PipelineStage]) -> PipelineStage:
return (
stage
if stage is not None
else PipelineStage(max_stage_steps=-1, loss_names=[])
)
self.valid_pipeline_stage = if_none_then_empty_stage(valid_pipeline_stage)
self.test_pipeline_stage = if_none_then_empty_stage(test_pipeline_stage)
assert (
len(self.pipeline_stages) == len(set(id(ps) for ps in pipeline_stages))
and self.valid_pipeline_stage not in self.pipeline_stages
and self.test_pipeline_stage not in self.pipeline_stages
), (
"Duplicate `PipelineStage` object instances found in the pipeline stages input"
" to `TrainingPipeline`. `PipelineStage` objects are not immutable, if you'd"
" like to have multiple pipeline stages of the same type, please instantiate"
" multiple separate instances."
)
self._ensure_pipeline_stages_all_have_at_least_one_stage_component()
self._current_stage: Optional[PipelineStage] = None
self.rollout_count = 0
self._refresh_current_stage(force_stage_search_from_start=True)
def _initialize_rollout_storage_uuid(
self, rollout_storage_uuid: Optional[str]
) -> str:
if rollout_storage_uuid is None:
rollout_storage_uuids = self._get_uuids_of_rollout_storages(
self._named_storages
)
assert len(rollout_storage_uuids) <= 1, (
f"`rollout_storage_uuid` cannot be automatically inferred as there are multiple storages defined"
f" (ids: {rollout_storage_uuids}) of type `RolloutStorage`."
)
rollout_storage_uuid = next(iter(rollout_storage_uuids), None)
assert (
rollout_storage_uuid is None or rollout_storage_uuid in self._named_storages
)
return rollout_storage_uuid
def _ensure_pipeline_stages_all_have_at_least_one_stage_component(self):
rollout_storages_uuids = self._get_uuids_of_rollout_storages(
self._named_storages
)
named_pipeline_stages = {
f"{i}th": ps for i, ps in enumerate(self.pipeline_stages)
}
named_pipeline_stages["valid"] = self.valid_pipeline_stage
named_pipeline_stages["test"] = self.test_pipeline_stage
for stage_name, stage in named_pipeline_stages.items():
# Forward default `TrainingSettings` to all `PipelineStage`s settings:
stage.training_settings.set_defaults(defaults=self.training_settings)
if len(stage.stage_components) == 0:
assert len(rollout_storages_uuids) <= 1, (
f"In {stage_name} pipeline stage: you have several storages specified ({rollout_storages_uuids}) which"
f" are subclasses of `RolloutStorage`. This is only allowed when stage components are explicitly"
f" defined in every `PipelineStage` instance. You have `PipelineStage`s for which stage components"
f" are not specified."
)
if len(rollout_storages_uuids) > 0:
stage.add_stage_component(
StageComponent(
uuid=rollout_storages_uuids[0],
storage_uuid=rollout_storages_uuids[0],
loss_names=stage.loss_names,
training_settings=TrainingSettings(),
)
)
for sc in stage.stage_components:
assert sc.storage_uuid in self._named_storages, (
f"In {stage_name} pipeline stage: storage with name '{sc.storage_uuid}' not found in collection of"
f" defined storages names: {list(self._named_storages.keys())}"
)
if (
self.rollout_storage_uuid is not None
and self.rollout_storage_uuid
not in stage.storage_uuid_to_steps_taken_in_stage
):
stage.storage_uuid_to_steps_taken_in_stage[
self.rollout_storage_uuid
] = 0
@classmethod
def _get_uuids_of_rollout_storages(
cls,
named_storages: Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]],
) -> List[str]:
return [
uuid
for uuid, storage in named_storages.items()
if isinstance(storage, RolloutStorage)
or (
isinstance(storage, Builder)
and issubclass(storage.class_type, RolloutStorage)
)
]
@classmethod
def _initialize_named_storages(
cls,
named_storages: Optional[
Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]]
],
) -> Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]]:
named_storages = {} if named_storages is None else {**named_storages}
rollout_storages_uuids = cls._get_uuids_of_rollout_storages(named_storages)
if len(named_storages) == 0:
assert (
_DEFAULT_ONPOLICY_UUID not in named_storages
), f"Storage uuid '{_DEFAULT_ONPOLICY_UUID}' is reserved, please pick a different uuid."
named_storages[_DEFAULT_ONPOLICY_UUID] = Builder(RolloutBlockStorage)
rollout_storages_uuids.append(_DEFAULT_ONPOLICY_UUID)
return named_storages
def _refresh_current_stage(
self, force_stage_search_from_start: bool = False
) -> Optional[PipelineStage]:
if force_stage_search_from_start:
self._current_stage = None
if self._current_stage is None or self._current_stage.is_complete:
if self._current_stage is None:
start_index = 0
else:
start_index = self.pipeline_stages.index(self._current_stage) + 1
self._current_stage = None
for ps in self.pipeline_stages[start_index:]:
if not ps.is_complete:
self._current_stage = ps
break
return self._current_stage
@property
def total_steps(self) -> int:
return sum(ps.steps_taken_in_stage for ps in self.pipeline_stages)
@property
def storage_uuid_to_total_experiences(self) -> Dict[str, int]:
totals = {k: 0 for k in self._named_storages}
for ps in self.pipeline_stages:
for k in ps.storage_uuid_to_steps_taken_in_stage:
totals[k] += ps.storage_uuid_to_steps_taken_in_stage[k]
for k in totals:
split = k.split("__")
if len(split) == 2 and split[1] in ["valid", "test"]:
assert totals[k] == 0, (
"Total experiences should be 0 for validation/test storages, i.e."
" storages who have `__valid` or `__test` as their suffix. These storages"
" will copy their `total_experiences` from the corresponding training"
" storage i.e.:\n"
" 1. the storage without the above suffix if it exists, else\n"
" 2. the total number of steps."
)
totals[k] = totals.get(split[0], self.total_steps)
return totals
@property
def current_stage(self) -> Optional[PipelineStage]:
return self._current_stage
@property
def current_stage_index(self) -> Optional[int]:
if self.current_stage is None:
return None
return self.pipeline_stages.index(self.current_stage)
def before_rollout(self, train_metrics: Optional[ScalarMeanTracker] = None) -> bool:
if (
train_metrics is not None
and self.current_stage.early_stopping_criterion is not None
):
self.current_stage.early_stopping_criterion_met = (
self.current_stage.early_stopping_criterion(
stage_steps=self.current_stage.steps_taken_in_stage,
total_steps=self.total_steps,
training_metrics=train_metrics,
)
)
if self.current_stage.early_stopping_criterion_met:
get_logger().debug(
f"Early stopping criterion met after {self.total_steps} total steps "
f"({self.current_stage.steps_taken_in_stage} in current stage, stage index {self.current_stage_index})."
)
return self.current_stage is not self._refresh_current_stage(
force_stage_search_from_start=False
)
def restart_pipeline(self):
for ps in self.pipeline_stages:
ps.reset()
if self.valid_pipeline_stage:
self.valid_pipeline_stage.reset()
if self.test_pipeline_stage:
self.test_pipeline_stage.reset()
self._current_stage = None
self._refresh_current_stage(force_stage_search_from_start=True)
def state_dict(self):
return dict(
stage_info_list=[
{
"early_stopping_criterion_met": ps.early_stopping_criterion_met,
"steps_taken_in_stage": ps.steps_taken_in_stage,
"storage_uuid_to_steps_taken_in_stage": ps.storage_uuid_to_steps_taken_in_stage,
"rollout_count": ps.rollout_count,
}
for ps in self.pipeline_stages
],
rollout_count=self.rollout_count,
)
def load_state_dict(self, state_dict: Dict[str, Any]):
if "off_policy_epochs" in state_dict:
get_logger().warning(
"Loaded state dict was saved using an older version of AllenAct."
" If you are attempting to restart training for a model that had an off-policy component, be aware"
" that logging for the off-policy component will not behave as it previously did."
" Additionally, while the total step count will remain accurate, step counts"
" associated with losses will be reset to step 0."
)
for ps, stage_info in zip(self.pipeline_stages, state_dict["stage_info_list"]):
ps.early_stopping_criterion_met = stage_info["early_stopping_criterion_met"]
ps.steps_taken_in_stage = stage_info["steps_taken_in_stage"]
if "storage_uuid_to_steps_taken_in_stage" in stage_info:
ps.storage_uuid_to_steps_taken_in_stage = stage_info[
"storage_uuid_to_steps_taken_in_stage"
]
ps.rollout_count = stage_info["rollout_count"]
self.rollout_count = state_dict["rollout_count"]
self._refresh_current_stage(force_stage_search_from_start=True)
@property
def rollout_storage(self) -> Optional[RolloutStorage]:
if self.rollout_storage_uuid is None:
return None
rs = self._named_storages[self.rollout_storage_uuid]
if isinstance(rs, Builder):
rs = rs()
self._named_storages[self.rollout_storage_uuid] = rs
return cast(RolloutStorage, rs)
def get_stage_storage(
self, stage: PipelineStage
) -> "OrderedDict[str, ExperienceStorage]":
storage_uuids_for_current_stage_set = set(
sc.storage_uuid for sc in stage.stage_components
)
# Always include self.rollout_storage_uuid in the current stage storage (when the uuid is defined)
if self.rollout_storage_uuid is not None:
storage_uuids_for_current_stage_set.add(self.rollout_storage_uuid)
storage_uuids_for_current_stage = sorted(
list(storage_uuids_for_current_stage_set)
)
for storage_uuid in storage_uuids_for_current_stage:
if isinstance(self._named_storages[storage_uuid], Builder):
self._named_storages[storage_uuid] = cast(
Builder["ExperienceStorage"],
self._named_storages[storage_uuid],
)()
return OrderedDict(
(k, self._named_storages[k]) for k in storage_uuids_for_current_stage
)
@property
def current_stage_storage(self) -> "OrderedDict[str, ExperienceStorage]":
return self.get_stage_storage(self.current_stage)
def get_loss(self, uuid: str):
if isinstance(self._named_losses[uuid], Builder):
self._named_losses[uuid] = cast(
Builder[Union["AbstractActorCriticLoss", "GenericAbstractLoss"]],
self._named_losses[uuid],
)()
return self._named_losses[uuid]
@property
def current_stage_losses(
self,
) -> Dict[str, Union[AbstractActorCriticLoss, GenericAbstractLoss]]:
for loss_name in self.current_stage.loss_names:
if isinstance(self._named_losses[loss_name], Builder):
self._named_losses[loss_name] = cast(
Builder[Union["AbstractActorCriticLoss", "GenericAbstractLoss"]],
self._named_losses[loss_name],
)()
return {
loss_name: cast(
Union[AbstractActorCriticLoss, GenericAbstractLoss],
self._named_losses[loss_name],
)
for loss_name in self.current_stage.loss_names
}
def download_checkpoint_from_wandb(
checkpoint_path_dir_or_pattern, all_ckpt_dir, only_allow_one_ckpt=False
):
api = wandb.Api()
run_token = checkpoint_path_dir_or_pattern.split("//")[1]
ckpt_steps = checkpoint_path_dir_or_pattern.split("//")[2:]
if ckpt_steps[-1] == "":
ckpt_steps = ckpt_steps[:-1]
if not only_allow_one_ckpt:
ckpts_paths = []
for steps in ckpt_steps:
ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
artifact = api.artifact(ckpt_fn)
_ = artifact.download(all_ckpt_dir)
ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, steps)
shutil.move("{}/ckpt.pt".format(all_ckpt_dir), ckpt_dir)
ckpts_paths.append(ckpt_dir)
return ckpts_paths
else:
assert len(ckpt_steps) == 1
step = ckpt_steps[0]
ckpt_fn = "{}-step-{}:latest".format(run_token, step)
artifact = api.artifact(ckpt_fn)
_ = artifact.download(all_ckpt_dir)
ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, step)
shutil.move("{}/ckpt.pt".format(all_ckpt_dir), ckpt_dir)
return ckpt_dir
================================================
FILE: allenact/utils/inference.py
================================================
from typing import Optional, cast, Tuple, Any, Dict
import attr
import torch
from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel
from allenact.algorithms.onpolicy_sync.storage import RolloutStorage
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.misc import (
Memory,
ObservationType,
ActorCriticOutput,
DistributionType,
)
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.utils import spaces_utils as su
from allenact.utils.tensor_utils import batch_observations
@attr.s(kw_only=True)
class InferenceAgent:
actor_critic: ActorCriticModel = attr.ib()
rollout_storage: RolloutStorage = attr.ib()
device: torch.device = attr.ib()
sensor_preprocessor_graph: Optional[SensorPreprocessorGraph] = attr.ib()
steps_before_rollout_refresh: int = attr.ib(default=128)
memory: Optional[Memory] = attr.ib(default=None)
steps_taken_in_task: int = attr.ib(default=0)
last_action_flat: Optional = attr.ib(default=None)
has_initialized: Optional = attr.ib(default=False)
def __attrs_post_init__(self):
self.actor_critic.eval()
self.actor_critic.to(device=self.device)
if self.memory is not None:
self.memory.to(device=self.device)
if self.sensor_preprocessor_graph is not None:
self.sensor_preprocessor_graph.to(self.device)
self.rollout_storage.to(self.device)
self.rollout_storage.set_partition(index=0, num_parts=1)
@classmethod
def from_experiment_config(
cls,
exp_config: ExperimentConfig,
device: torch.device,
checkpoint_path: Optional[str] = None,
model_state_dict: Optional[Dict[str, Any]] = None,
mode: str = "test",
):
assert (
checkpoint_path is None or model_state_dict is None
), "Cannot have `checkpoint_path` and `model_state_dict` both non-None."
rollout_storage = exp_config.training_pipeline().rollout_storage
machine_params = exp_config.machine_params(mode)
if not isinstance(machine_params, MachineParams):
machine_params = MachineParams(**machine_params)
sensor_preprocessor_graph = machine_params.sensor_preprocessor_graph
actor_critic = cast(
ActorCriticModel,
exp_config.create_model(
sensor_preprocessor_graph=sensor_preprocessor_graph
),
)
if checkpoint_path is not None:
actor_critic.load_state_dict(
torch.load(checkpoint_path, map_location="cpu")["model_state_dict"]
)
elif model_state_dict is not None:
actor_critic.load_state_dict(
model_state_dict
if "model_state_dict" not in model_state_dict
else model_state_dict["model_state_dict"]
)
return cls(
actor_critic=actor_critic,
rollout_storage=rollout_storage,
device=device,
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
def reset(self):
if self.has_initialized:
self.rollout_storage.after_updates()
self.steps_taken_in_task = 0
self.memory = None
def act(self, observations: ObservationType):
# Batch of size 1
obs_batch = batch_observations([observations], device=self.device)
if self.sensor_preprocessor_graph is not None:
obs_batch = self.sensor_preprocessor_graph.get_observations(obs_batch)
if self.steps_taken_in_task == 0:
self.has_initialized = True
self.rollout_storage.initialize(
observations=obs_batch,
num_samplers=1,
recurrent_memory_specification=self.actor_critic.recurrent_memory_specification,
action_space=self.actor_critic.action_space,
)
self.rollout_storage.after_updates()
else:
dummy_val = torch.zeros((1, 1), device=self.device) # Unused dummy value
self.rollout_storage.add(
observations=obs_batch,
memory=self.memory,
actions=self.last_action_flat[0],
action_log_probs=dummy_val,
value_preds=dummy_val,
rewards=dummy_val,
masks=torch.ones(
(1, 1), device=self.device
), # Always == 1 as we're in a single task until `reset`
)
agent_input = self.rollout_storage.agent_input_for_next_step()
actor_critic_output, self.memory = cast(
Tuple[ActorCriticOutput[DistributionType], Optional[Memory]],
self.actor_critic(**agent_input),
)
action = actor_critic_output.distributions.sample()
self.last_action_flat = su.flatten(self.actor_critic.action_space, action)
self.steps_taken_in_task += 1
if self.steps_taken_in_task % self.steps_before_rollout_refresh == 0:
self.rollout_storage.after_updates()
return su.action_list(self.actor_critic.action_space, self.last_action_flat)[0]
================================================
FILE: allenact/utils/misc_utils.py
================================================
import copy
import functools
import hashlib
import inspect
import json
import math
import os
import pdb
import random
import subprocess
import sys
import urllib
import urllib.request
from collections import Counter
from contextlib import contextmanager
from typing import Sequence, List, Optional, Tuple, Hashable
import filelock
import numpy as np
import torch
from scipy.special import comb
from allenact.utils.system import get_logger
TABLEAU10_RGB = (
(31, 119, 180),
(255, 127, 14),
(44, 160, 44),
(214, 39, 40),
(148, 103, 189),
(140, 86, 75),
(227, 119, 194),
(127, 127, 127),
(188, 189, 34),
(23, 190, 207),
)
def multiprocessing_safe_download_file_from_url(url: str, save_path: str):
with filelock.FileLock(save_path + ".lock"):
if not os.path.isfile(save_path):
get_logger().info(f"Downloading file from {url} to {save_path}.")
urllib.request.urlretrieve(
url,
save_path,
)
else:
get_logger().debug(f"{save_path} exists - skipping download.")
def experimental_api(to_decorate):
"""Decorate a function to note that it is part of the experimental API."""
have_warned = [False]
name = f"{inspect.getmodule(to_decorate).__name__}.{to_decorate.__qualname__}"
if to_decorate.__name__ == "__init__":
name = name.replace(".__init__", "")
@functools.wraps(to_decorate)
def decorated(*args, **kwargs):
if not have_warned[0]:
get_logger().warning(
f"'{name}' is a part of AllenAct's experimental API."
f" This means: (1) there are likely bugs present and (2)"
f" we may remove/change this functionality without warning."
f" USE AT YOUR OWN RISK.",
)
have_warned[0] = True
return to_decorate(*args, **kwargs)
return decorated
def deprecated(to_decorate):
"""Decorate a function to note that it has been deprecated."""
have_warned = [False]
name = f"{inspect.getmodule(to_decorate).__name__}.{to_decorate.__qualname__}"
if to_decorate.__name__ == "__init__":
name = name.replace(".__init__", "")
@functools.wraps(to_decorate)
def decorated(*args, **kwargs):
if not have_warned[0]:
get_logger().warning(
f"'{name}' has been deprecated and will soon be removed from AllenAct's API."
f" Please discontinue your use of this function.",
)
have_warned[0] = True
return to_decorate(*args, **kwargs)
return decorated
class NumpyJSONEncoder(json.JSONEncoder):
"""JSON encoder for numpy objects.
Based off the stackoverflow answer by Jie Yang here: https://stackoverflow.com/a/57915246.
The license for this code is [BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/).
"""
def default(self, obj):
if isinstance(obj, np.void):
return None
elif isinstance(obj, np.bool_):
return bool(obj)
elif isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(NumpyJSONEncoder, self).default(obj)
@contextmanager
def tensor_print_options(**print_opts):
torch_print_opts = copy.deepcopy(torch._tensor_str.PRINT_OPTS)
np_print_opts = np.get_printoptions()
try:
torch.set_printoptions(**print_opts)
np.set_printoptions(**print_opts)
yield None
finally:
torch.set_printoptions(**{k: getattr(torch_print_opts, k) for k in print_opts})
np.set_printoptions(**np_print_opts)
def md5_hash_str_as_int(to_hash: str):
return int(
hashlib.md5(to_hash.encode()).hexdigest(),
16,
)
def get_git_diff_of_project() -> Tuple[str, str]:
short_sha = (
subprocess.check_output(["git", "describe", "--always"]).decode("utf-8").strip()
)
diff = subprocess.check_output(["git", "diff", short_sha]).decode("utf-8")
return short_sha, diff
class HashableDict(dict):
"""A dictionary which is hashable so long as all of its values are
hashable.
A HashableDict object will allow setting / deleting of items until
the first time that `__hash__()` is called on it after which
attempts to set or delete items will throw `RuntimeError`
exceptions.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._hash_has_been_called = False
def __key(self):
return tuple((k, self[k]) for k in sorted(self))
def __hash__(self):
self._hash_has_been_called = True
return hash(self.__key())
def __eq__(self, other):
return self.__key() == other.__key()
def __setitem__(self, *args, **kwargs):
if not self._hash_has_been_called:
return super(HashableDict, self).__setitem__(*args, **kwargs)
raise RuntimeError("Cannot set item in HashableDict after having called hash.")
def __delitem__(self, *args, **kwargs):
if not self._hash_has_been_called:
return super(HashableDict, self).__delitem__(*args, **kwargs)
raise RuntimeError(
"Cannot delete item in HashableDict after having called hash."
)
def partition_sequence(seq: Sequence, parts: int) -> List:
assert 0 < parts, f"parts [{parts}] must be greater > 0"
assert parts <= len(seq), f"parts [{parts}] > len(seq) [{len(seq)}]"
n = len(seq)
quotient = n // parts
remainder = n % parts
counts = [quotient + (i < remainder) for i in range(parts)]
inds = np.cumsum([0] + counts)
return [seq[ind0:ind1] for ind0, ind1 in zip(inds[:-1], inds[1:])]
def unzip(seq: Sequence[Tuple], n: Optional[int]):
"""Undoes a `zip` operation.
# Parameters
seq: The sequence of tuples that should be unzipped
n: The number of items in each tuple. This is an optional value but is necessary if
`len(seq) == 0` (as there is no other way to infer how many empty lists were zipped together
in this case) and can otherwise be used to error check.
# Returns
A tuple (of length `n` if `n` is given) of lists where the ith list contains all
the ith elements from the tuples in the input `seq`.
"""
assert n is not None or len(seq) != 0
if n is None:
n = len(seq[0])
lists = [[] for _ in range(n)]
for t in seq:
assert len(t) == n
for i in range(n):
lists[i].append(t[i])
return lists
def uninterleave(seq: Sequence, parts: int) -> List:
assert 0 < parts <= len(seq)
n = len(seq)
quotient = n // parts
return [
[seq[i + j * parts] for j in range(quotient + 1) if i + j * parts < len(seq)]
for i in range(parts)
]
@functools.lru_cache(10000)
def cached_comb(n: int, m: int):
return comb(n, m)
def expected_max_of_subset_statistic(vals: List[float], m: int):
n = len(vals)
assert m <= n
vals_and_counts = list(Counter([round(val, 8) for val in vals]).items())
vals_and_counts.sort()
count_so_far = 0
logdenom = math.log(comb(n, m))
expected_max = 0.0
for val, num_occurances_of_val in vals_and_counts:
count_so_far += num_occurances_of_val
if count_so_far < m:
continue
count_where_max = 0
for i in range(1, min(num_occurances_of_val, m) + 1):
count_where_max += cached_comb(num_occurances_of_val, i) * cached_comb(
count_so_far - num_occurances_of_val, m - i
)
expected_max += val * math.exp(math.log(count_where_max) - logdenom)
return expected_max
def bootstrap_max_of_subset_statistic(
vals: List[float], m: int, reps=1000, seed: Optional[int] = None
):
rstate = None
if seed is not None:
rstate = random.getstate()
random.seed(seed)
results = []
for _ in range(reps):
results.append(
expected_max_of_subset_statistic(random.choices(vals, k=len(vals)), m)
)
if seed is not None:
random.setstate(rstate)
return results
def rand_float(low: float, high: float, shape):
assert low <= high
try:
return np.random.rand(*shape) * (high - low) + low
except TypeError as _:
return np.random.rand(shape) * (high - low) + low
def all_unique(seq: Sequence[Hashable]):
seen = set()
for s in seq:
if s in seen:
return False
seen.add(s)
return True
def all_equal(s: Sequence):
if len(s) <= 1:
return True
return all(s[0] == ss for ss in s[1:])
def prepare_locals_for_super(
local_vars, args_name="args", kwargs_name="kwargs", ignore_kwargs=False
):
assert (
args_name not in local_vars
), "`prepare_locals_for_super` does not support {}.".format(args_name)
new_locals = {k: v for k, v in local_vars.items() if k != "self" and "__" not in k}
if kwargs_name in new_locals:
if ignore_kwargs:
new_locals.pop(kwargs_name)
else:
kwargs = new_locals.pop(kwargs_name)
kwargs.update(new_locals)
new_locals = kwargs
return new_locals
def partition_limits(num_items: int, num_parts: int):
return (
np.round(np.linspace(0, num_items, num_parts + 1, endpoint=True))
.astype(np.int32)
.tolist()
)
def str2bool(v: str):
v = v.lower().strip()
if v in ("yes", "true", "t", "y", "1"):
return True
elif v in ("no", "false", "f", "n", "0"):
return False
else:
raise ValueError(f"{v} cannot be converted to a bool")
class ForkedPdb(pdb.Pdb):
"""A Pdb subclass that may be used from a forked multiprocessing child."""
def interaction(self, *args, **kwargs):
_stdin = sys.stdin
try:
sys.stdin = open("/dev/stdin")
pdb.Pdb.interaction(self, *args, **kwargs)
finally:
sys.stdin = _stdin
================================================
FILE: allenact/utils/model_utils.py
================================================
"""Functions used to initialize and manipulate pytorch models."""
import hashlib
from typing import Sequence, Tuple, Union, Optional, Dict, Any, Callable
import numpy as np
import torch
import torch.nn as nn
from allenact.utils.misc_utils import md5_hash_str_as_int
def md5_hash_of_state_dict(state_dict: Dict[str, Any]):
hashables = []
for piece in sorted(state_dict.items()):
if isinstance(piece[1], (np.ndarray, torch.Tensor, nn.Parameter)):
hashables.append(piece[0])
if not isinstance(piece[1], np.ndarray):
p1 = piece[1].data.cpu().numpy()
else:
p1 = piece[1]
hashables.append(
int(
hashlib.md5(p1.tobytes()).hexdigest(),
16,
)
)
else:
hashables.append(md5_hash_str_as_int(str(piece)))
return md5_hash_str_as_int(str(hashables))
class Flatten(nn.Module):
"""Flatten input tensor so that it is of shape (FLATTENED_BATCH x -1)."""
# noinspection PyMethodMayBeStatic
def forward(self, x):
"""Flatten input tensor.
# Parameters
x : Tensor of size (FLATTENED_BATCH x ...) to flatten to size (FLATTENED_BATCH x -1)
# Returns
Flattened tensor.
"""
return x.reshape(x.size(0), -1)
def init_linear_layer(
module: nn.Linear, weight_init: Callable, bias_init: Callable, gain=1
):
"""Initialize a torch.nn.Linear layer.
# Parameters
module : A torch linear layer.
weight_init : Function used to initialize the weight parameters of the linear layer. Should take the weight data
tensor and gain as input.
bias_init : Function used to initialize the bias parameters of the linear layer. Should take the bias data
tensor and gain as input.
gain : The gain to apply.
# Returns
The initialized linear layer.
"""
weight_init(module.weight.data, gain=gain)
bias_init(module.bias.data)
return module
def grad_norm(parameters, norm_type=2):
if isinstance(parameters, torch.Tensor):
parameters = [parameters]
parameters = list(filter(lambda p: p.grad is not None, parameters))
norm_type = float(norm_type)
if norm_type == "inf":
total_norm = max(p.grad.data.abs().max() for p in parameters)
else:
total_norm = 0
for p in parameters:
param_norm = p.grad.data.norm(norm_type)
total_norm += param_norm.item() ** norm_type
total_norm = total_norm ** (1.0 / norm_type)
return total_norm
def make_cnn(
input_channels: int,
layer_channels: Sequence[int],
kernel_sizes: Sequence[Union[int, Tuple[int, int]]],
strides: Sequence[Union[int, Tuple[int, int]]],
paddings: Sequence[Union[int, Tuple[int, int]]],
dilations: Sequence[Union[int, Tuple[int, int]]],
output_height: int,
output_width: int,
output_channels: int,
flatten: bool = True,
output_relu: bool = True,
) -> nn.Module:
assert (
len(layer_channels)
== len(kernel_sizes)
== len(strides)
== len(paddings)
== len(dilations)
), "Mismatched sizes: layers {} kernels {} strides {} paddings {} dilations {}".format(
layer_channels, kernel_sizes, strides, paddings, dilations
)
net = nn.Sequential()
input_channels_list = [input_channels] + list(layer_channels)
for it, current_channels in enumerate(layer_channels):
net.add_module(
"conv_{}".format(it),
nn.Conv2d(
in_channels=input_channels_list[it],
out_channels=current_channels,
kernel_size=kernel_sizes[it],
stride=strides[it],
padding=paddings[it],
dilation=dilations[it],
),
)
if it < len(layer_channels) - 1:
net.add_module("relu_{}".format(it), nn.ReLU(inplace=True))
if flatten:
net.add_module("flatten", Flatten())
net.add_module(
"fc",
nn.Linear(
layer_channels[-1] * output_width * output_height, output_channels
),
)
if output_relu:
net.add_module("out_relu", nn.ReLU(True))
return net
def compute_cnn_output(
cnn: nn.Module,
cnn_input: torch.Tensor,
permute_order: Optional[Tuple[int, ...]] = (
0, # FLAT_BATCH (flattening steps, samplers and agents)
3, # CHANNEL
1, # ROW
2, # COL
), # from [FLAT_BATCH x ROW x COL x CHANNEL] flattened input
):
"""Computes CNN outputs for given inputs.
# Parameters
cnn : A torch CNN.
cnn_input: A torch Tensor with inputs.
permute_order: A permutation Tuple to provide PyTorch dimension order, default (0, 3, 1, 2), where 0 corresponds to
the flattened batch dimensions (combining step, sampler and agent)
# Returns
CNN output with dimensions [STEP, SAMPLER, AGENT, CHANNEL, (HEIGHT, WIDTH)].
"""
nsteps: int
nsamplers: int
nagents: int
assert len(cnn_input.shape) in [
5,
6,
], "CNN input must have shape [STEP, SAMPLER, (AGENT,) dim1, dim2, dim3]"
nagents: Optional[int] = None
if len(cnn_input.shape) == 6:
nsteps, nsamplers, nagents = cnn_input.shape[:3]
else:
nsteps, nsamplers = cnn_input.shape[:2]
# Make FLAT_BATCH = nsteps * nsamplers (* nagents)
cnn_input = cnn_input.view((-1,) + cnn_input.shape[2 + int(nagents is not None) :])
if permute_order is not None:
cnn_input = cnn_input.permute(*permute_order)
cnn_output = cnn(cnn_input)
if nagents is not None:
cnn_output = cnn_output.reshape(
(
nsteps,
nsamplers,
nagents,
)
+ cnn_output.shape[1:]
)
else:
cnn_output = cnn_output.reshape(
(
nsteps,
nsamplers,
)
+ cnn_output.shape[1:]
)
return cnn_output
def simple_conv_and_linear_weights_init(m):
if type(m) in [
nn.Conv1d,
nn.Conv2d,
nn.Conv3d,
nn.ConvTranspose1d,
nn.ConvTranspose2d,
nn.ConvTranspose3d,
]:
weight_shape = list(m.weight.data.size())
fan_in = np.prod(weight_shape[1:4])
fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]
w_bound = np.sqrt(6.0 / (fan_in + fan_out))
m.weight.data.uniform_(-w_bound, w_bound)
if m.bias is not None:
m.bias.data.fill_(0)
elif type(m) == nn.Linear:
simple_linear_weights_init(m)
def simple_linear_weights_init(m):
if type(m) == nn.Linear:
weight_shape = list(m.weight.data.size())
fan_in = weight_shape[1]
fan_out = weight_shape[0]
w_bound = np.sqrt(6.0 / (fan_in + fan_out))
m.weight.data.uniform_(-w_bound, w_bound)
if m.bias is not None:
m.bias.data.fill_(0)
class FeatureEmbedding(nn.Module):
"""A wrapper of nn.Embedding but support zero output Used for extracting
features for actions/rewards."""
def __init__(self, input_size, output_size):
super().__init__()
self.input_size = input_size
self.output_size = output_size
if self.output_size != 0:
self.fc = nn.Embedding(input_size, output_size)
else: # automatically be moved to a device
self.null_embedding: torch.Tensor
self.register_buffer(
"null_embedding",
torch.zeros(
0,
),
persistent=False,
)
def forward(self, inputs):
if self.output_size != 0:
return self.fc(inputs)
else:
return self.null_embedding
================================================
FILE: allenact/utils/multi_agent_viz_utils.py
================================================
from typing import Sequence, Any
import numpy as np
from matplotlib import pyplot as plt, markers
from matplotlib.collections import LineCollection
from allenact.utils.viz_utils import TrajectoryViz
class MultiTrajectoryViz(TrajectoryViz):
def __init__(
self,
path_to_trajectory_prefix: Sequence[str] = ("task_info", "followed_path"),
agent_suffixes: Sequence[str] = ("1", "2"),
label: str = "trajectories",
trajectory_plt_colormaps: Sequence[str] = ("cool", "spring"),
marker_plt_colors: Sequence[Any] = ("blue", "orange"),
axes_equal: bool = True,
**other_base_kwargs,
):
super().__init__(label=label, **other_base_kwargs)
self.path_to_trajectory_prefix = list(path_to_trajectory_prefix)
self.agent_suffixes = list(agent_suffixes)
self.trajectory_plt_colormaps = list(trajectory_plt_colormaps)
self.marker_plt_colors = marker_plt_colors
self.axes_equal = axes_equal
def make_fig(self, episode, episode_id):
# From https://nbviewer.jupyter.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb
def colorline(
x,
y,
z=None,
cmap=plt.get_cmap("cool"),
norm=plt.Normalize(0.0, 1.0),
linewidth=2,
alpha=1.0,
zorder=1,
):
"""Plot a colored line with coordinates x and y.
Optionally specify colors in the array z
Optionally specify a colormap, a norm function and a line width.
"""
def make_segments(x, y):
"""Create list of line segments from x and y coordinates, in
the correct format for LineCollection:
an array of the form numlines x (points per line) x 2
(x and y) array
"""
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
return segments
# Default colors equally spaced on [0,1]:
if z is None:
z = np.linspace(0.0, 1.0, len(x))
# Special case if a single number:
if not hasattr(
z, "__iter__"
): # to check for numerical input -- this is a hack
z = np.array([z])
z = np.asarray(z)
segments = make_segments(x, y)
lc = LineCollection(
segments,
array=z,
cmap=cmap,
norm=norm,
linewidth=linewidth,
alpha=alpha,
zorder=zorder,
)
ax = plt.gca()
ax.add_collection(lc)
return lc
fig, ax = plt.subplots(figsize=self.figsize)
for agent, cmap, marker_color in zip(
self.agent_suffixes, self.trajectory_plt_colormaps, self.marker_plt_colors
):
path = self.path_to_trajectory_prefix[:]
path[-1] = path[-1] + agent
trajectory = self._access(episode, path)
x, y = [], []
for xy in trajectory:
x.append(float(self._access(xy, self.x)))
y.append(float(self._access(xy, self.y)))
colorline(x, y, zorder=1, cmap=cmap)
start_marker = markers.MarkerStyle(marker=self.start_marker_shape)
if self.path_to_rot_degrees is not None:
rot_degrees = float(
self._access(trajectory[0], self.path_to_rot_degrees)
)
if self.adapt_rotation is not None:
rot_degrees = self.adapt_rotation(rot_degrees)
start_marker._transform = start_marker.get_transform().rotate_deg(
rot_degrees
)
ax.scatter(
[x[0]],
[y[0]],
marker=start_marker,
zorder=2,
s=self.start_marker_scale,
color=marker_color,
)
ax.scatter(
[x[-1]], [y[-1]], marker="s", color=marker_color
) # stop (square)
if self.axes_equal:
ax.set_aspect("equal", "box")
ax.set_title(episode_id, fontsize=self.fontsize)
ax.tick_params(axis="x", labelsize=self.fontsize)
ax.tick_params(axis="y", labelsize=self.fontsize)
return fig
================================================
FILE: allenact/utils/spaces_utils.py
================================================
# Original work Copyright (c) 2016 OpenAI (https://openai.com).
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from typing import Union, Tuple, List, cast, Iterable, Callable
from collections import OrderedDict
import numpy as np
import torch
from gym import spaces as gym
ActionType = Union[torch.Tensor, OrderedDict, Tuple, int]
def flatdim(space):
"""Return the number of dimensions a flattened equivalent of this space
would have.
Accepts a space and returns an integer. Raises
``NotImplementedError`` if the space is not defined in
``gym.spaces``.
"""
if isinstance(space, gym.Box):
return int(np.prod(space.shape))
elif isinstance(space, gym.Discrete):
return 1 # we do not expand to one-hot
elif isinstance(space, gym.Tuple):
return int(sum([flatdim(s) for s in space.spaces]))
elif isinstance(space, gym.Dict):
return int(sum([flatdim(s) for s in space.spaces.values()]))
elif isinstance(space, gym.MultiBinary):
return int(space.n)
elif isinstance(space, gym.MultiDiscrete):
return int(np.prod(space.shape))
else:
raise NotImplementedError
def flatten(space, torch_x):
"""Flatten data points from a space."""
if isinstance(space, gym.Box):
if len(space.shape) > 0:
return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,))
else:
return torch_x.view(torch_x.shape + (-1,))
elif isinstance(space, gym.Discrete):
# Assume tensor input does NOT contain a dimension for action
if isinstance(torch_x, torch.Tensor):
return torch_x.unsqueeze(-1)
else:
return torch.tensor(torch_x).view(1)
elif isinstance(space, gym.Tuple):
return torch.cat(
[flatten(s, x_part) for x_part, s in zip(torch_x, space.spaces)], dim=-1
)
elif isinstance(space, gym.Dict):
return torch.cat(
[flatten(s, torch_x[key]) for key, s in space.spaces.items()], dim=-1
)
elif isinstance(space, gym.MultiBinary):
return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,))
elif isinstance(space, gym.MultiDiscrete):
return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,))
else:
raise NotImplementedError
def unflatten(space, torch_x):
"""Unflatten a concatenated data points tensor from a space."""
if isinstance(space, gym.Box):
return torch_x.view(torch_x.shape[:-1] + space.shape).float()
elif isinstance(space, gym.Discrete):
res = torch_x.view(torch_x.shape[:-1] + space.shape).long()
return res if len(res.shape) > 0 else res.item()
elif isinstance(space, gym.Tuple):
dims = [flatdim(s) for s in space.spaces]
list_flattened = torch.split(torch_x, dims, dim=-1)
list_unflattened = [
unflatten(s, flattened)
for flattened, s in zip(list_flattened, space.spaces)
]
return tuple(list_unflattened)
elif isinstance(space, gym.Dict):
dims = [flatdim(s) for s in space.spaces.values()]
list_flattened = torch.split(torch_x, dims, dim=-1)
list_unflattened = [
(key, unflatten(s, flattened))
for flattened, (key, s) in zip(list_flattened, space.spaces.items())
]
return OrderedDict(list_unflattened)
elif isinstance(space, gym.MultiBinary):
return torch_x.view(torch_x.shape[:-1] + space.shape).byte()
elif isinstance(space, gym.MultiDiscrete):
return torch_x.view(torch_x.shape[:-1] + space.shape).long()
else:
raise NotImplementedError
def torch_point(space, np_x):
"""Convert numpy space point into torch."""
if isinstance(space, gym.Box):
return torch.from_numpy(np_x)
elif isinstance(space, gym.Discrete):
return np_x
elif isinstance(space, gym.Tuple):
return tuple([torch_point(s, x_part) for x_part, s in zip(np_x, space.spaces)])
elif isinstance(space, gym.Dict):
return OrderedDict(
[(key, torch_point(s, np_x[key])) for key, s in space.spaces.items()]
)
elif isinstance(space, gym.MultiBinary):
return torch.from_numpy(np_x)
elif isinstance(space, gym.MultiDiscrete):
return torch.from_numpy(np.asarray(np_x))
else:
raise NotImplementedError
def numpy_point(
space: gym.Space, torch_x: Union[int, torch.Tensor, OrderedDict, Tuple]
):
"""Convert torch space point into numpy."""
if isinstance(space, gym.Box):
return cast(torch.Tensor, torch_x).cpu().numpy()
elif isinstance(space, gym.Discrete):
return torch_x
elif isinstance(space, gym.Tuple):
return tuple(
[
numpy_point(s, x_part)
for x_part, s in zip(cast(Iterable, torch_x), space.spaces)
]
)
elif isinstance(space, gym.Dict):
return OrderedDict(
[
(key, numpy_point(s, cast(torch.Tensor, torch_x)[key]))
for key, s in space.spaces.items()
]
)
elif isinstance(space, gym.MultiBinary):
return cast(torch.Tensor, torch_x).cpu().numpy()
elif isinstance(space, gym.MultiDiscrete):
return cast(torch.Tensor, torch_x).cpu().numpy()
else:
raise NotImplementedError
def flatten_space(space: gym.Space):
if isinstance(space, gym.Box):
return gym.Box(space.low.flatten(), space.high.flatten())
if isinstance(space, gym.Discrete):
return gym.Box(low=0, high=space.n, shape=(1,))
if isinstance(space, gym.Tuple):
space = [flatten_space(s) for s in space.spaces]
return gym.Box(
low=np.concatenate([s.low for s in space]),
high=np.concatenate([s.high for s in space]),
)
if isinstance(space, gym.Dict):
space = [flatten_space(s) for s in space.spaces.values()]
return gym.Box(
low=np.concatenate([s.low for s in space]),
high=np.concatenate([s.high for s in space]),
)
if isinstance(space, gym.MultiBinary):
return gym.Box(low=0, high=1, shape=(space.n,))
if isinstance(space, gym.MultiDiscrete):
return gym.Box(
low=np.zeros_like(space.nvec),
high=space.nvec,
)
raise NotImplementedError
def policy_space(
action_space: gym.Space,
box_space_to_policy: Callable[[gym.Box], gym.Space] = None,
) -> gym.Space:
if isinstance(action_space, gym.Box):
if box_space_to_policy is None:
# policy = mean (default)
return action_space
else:
return box_space_to_policy(action_space)
if isinstance(action_space, gym.Discrete):
# policy = prob of each option
return gym.Box(
low=np.float32(0.0), high=np.float32(1.0), shape=(action_space.n,)
)
if isinstance(action_space, gym.Tuple):
# policy = tuple of sub-policies
spaces = [policy_space(s, box_space_to_policy) for s in action_space.spaces]
return gym.Tuple(spaces)
if isinstance(action_space, gym.Dict):
# policy = dict of sub-policies
spaces = [
(
name,
policy_space(s, box_space_to_policy),
)
for name, s in action_space.spaces.items()
]
return gym.Dict(spaces)
if isinstance(action_space, gym.MultiBinary):
# policy = prob of 0, 1 in each entry
return gym.Box(
low=np.float32(0.0), high=np.float32(1.0), shape=(action_space.n, 2)
)
if isinstance(action_space, gym.MultiDiscrete):
# policy = Tuple of prob of each option for each discrete
return gym.Tuple(
[
gym.Box(low=np.float32(0.0), high=np.float32(1.0), shape=(n,))
for n in action_space.nvec
]
)
raise NotImplementedError
def action_list(
action_space: gym.Space, flat_actions: torch.Tensor
) -> List[ActionType]:
"""Convert flattened actions to list.
Assumes `flat_actions` are of shape `[step, sampler, flatdim]`.
"""
def tolist(action):
if isinstance(action, torch.Tensor):
return action.tolist()
if isinstance(action, Tuple):
actions = [tolist(ac) for ac in action]
return tuple(actions)
if isinstance(action, OrderedDict):
actions = [(key, tolist(action[key])) for key in action.keys()]
return OrderedDict(actions)
# else, it's a scalar
return action
return [tolist(unflatten(action_space, ac)) for ac in flat_actions[0]]
================================================
FILE: allenact/utils/system.py
================================================
import io
import logging
import os
import socket
import sys
from contextlib import closing
from typing import cast, Optional, Tuple
from torch import multiprocessing as mp
from allenact._constants import ALLENACT_INSTALL_DIR
HUMAN_LOG_LEVELS: Tuple[str, ...] = ("debug", "info", "warning", "error", "none")
"""
Available log levels: "debug", "info", "warning", "error", "none"
"""
_LOGGER: Optional[logging.Logger] = None
class ColoredFormatter(logging.Formatter):
"""Format a log string with colors.
This implementation taken (with modifications) from
https://stackoverflow.com/a/384125.
"""
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
RESET_SEQ = "\033[0m"
COLOR_SEQ = "\033[1;%dm"
BOLD_SEQ = "\033[1m"
COLORS = {
"WARNING": YELLOW,
"INFO": GREEN,
"DEBUG": BLUE,
"ERROR": RED,
"CRITICAL": MAGENTA,
}
def __init__(self, fmt: str, datefmt: Optional[str] = None, use_color=True):
super().__init__(fmt=fmt, datefmt=datefmt)
self.use_color = use_color
def format(self, record: logging.LogRecord) -> str:
levelname = record.levelname
if self.use_color and levelname in self.COLORS:
levelname_with_color = (
self.COLOR_SEQ % (30 + self.COLORS[levelname])
+ levelname
+ self.RESET_SEQ
)
record.levelname = levelname_with_color
formated_record = logging.Formatter.format(self, record)
record.levelname = (
levelname # Resetting levelname as `record` might be used elsewhere
)
return formated_record
else:
return logging.Formatter.format(self, record)
def get_logger() -> logging.Logger:
"""Get a `logging.Logger` to stderr. It can be called whenever we wish to
log some message. Messages can get mixed-up
(https://docs.python.org/3.6/library/multiprocessing.html#logging), but it
works well in most cases.
# Returns
logger: the `logging.Logger` object
"""
if _new_logger():
if mp.current_process().name == "MainProcess":
_new_logger(logging.DEBUG)
_set_log_formatter()
return _LOGGER
def _human_log_level_to_int(human_log_level):
human_log_level = human_log_level.lower().strip()
assert human_log_level in HUMAN_LOG_LEVELS, "unknown human_log_level {}".format(
human_log_level
)
if human_log_level == "debug":
log_level = logging.DEBUG
elif human_log_level == "info":
log_level = logging.INFO
elif human_log_level == "warning":
log_level = logging.WARNING
elif human_log_level == "error":
log_level = logging.ERROR
elif human_log_level == "none":
log_level = logging.CRITICAL + 1
else:
raise NotImplementedError(f"Unknown log level {human_log_level}.")
return log_level
def init_logging(human_log_level: str = "info") -> None:
"""Init the `logging.Logger`.
It should be called only once in the app (e.g. in `main`). It sets
the log_level to one of `HUMAN_LOG_LEVELS`. And sets up a handler
for stderr. The logging level is propagated to all subprocesses.
"""
_new_logger(_human_log_level_to_int(human_log_level))
_set_log_formatter()
def update_log_level(logger, human_log_level: str):
logger.setLevel(_human_log_level_to_int(human_log_level))
def find_free_port(address: str = "127.0.0.1") -> int:
"""Finds a free port for distributed training.
# Returns
port: port number that can be used to listen
"""
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind((address, 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
port = s.getsockname()[1]
return port
def _new_logger(log_level: Optional[int] = None):
global _LOGGER
if _LOGGER is None:
_LOGGER = mp.get_logger()
if log_level is not None:
get_logger().setLevel(log_level)
return True
if log_level is not None:
get_logger().setLevel(log_level)
return False
def _set_log_formatter():
assert _LOGGER is not None
if _LOGGER.getEffectiveLevel() <= logging.CRITICAL:
add_style_to_logs = True # In case someone wants to turn this off manually.
if add_style_to_logs:
default_format = "$BOLD[%(asctime)s$RESET %(levelname)s$BOLD:]$RESET %(message)s\t[%(filename)s: %(lineno)d]"
default_format = default_format.replace(
"$BOLD", ColoredFormatter.BOLD_SEQ
).replace("$RESET", ColoredFormatter.RESET_SEQ)
else:
default_format = (
"%(asctime)s %(levelname)s: %(message)s\t[%(filename)s: %(lineno)d]"
)
short_date_format = "%m/%d %H:%M:%S"
log_format = "default"
if log_format == "default":
fmt = default_format
datefmt = short_date_format
elif log_format == "defaultMilliseconds":
fmt = default_format
datefmt = None
else:
fmt = log_format
datefmt = short_date_format
if add_style_to_logs:
formatter = ColoredFormatter(
fmt=fmt,
datefmt=datefmt,
)
else:
formatter = logging.Formatter(fmt=fmt, datefmt=datefmt)
ch = logging.StreamHandler()
ch.setFormatter(formatter)
ch.addFilter(cast(logging.Filter, _AllenActMessageFilter(os.getcwd())))
_LOGGER.addHandler(ch)
sys.excepthook = _excepthook
sys.stdout = cast(io.TextIOWrapper, _StreamToLogger())
return _LOGGER
class _StreamToLogger:
def __init__(self):
self.linebuf = ""
def write(self, buf):
temp_linebuf = self.linebuf + buf
self.linebuf = ""
for line in temp_linebuf.splitlines(True):
if line[-1] == "\n":
cast(logging.Logger, _LOGGER).info(line.rstrip())
else:
self.linebuf += line
def flush(self):
if self.linebuf != "":
cast(logging.Logger, _LOGGER).info(self.linebuf.rstrip())
self.linebuf = ""
def _excepthook(*args):
# noinspection PyTypeChecker
get_logger().error(msg="Uncaught exception:", exc_info=args)
class _AllenActMessageFilter:
def __init__(self, working_directory: str):
self.working_directory = working_directory
# noinspection PyMethodMayBeStatic
def filter(self, record):
# TODO: Does this work when pip-installing AllenAct?
return int(
self.working_directory in record.pathname
or ALLENACT_INSTALL_DIR in record.pathname
or "main" in record.pathname
)
class ImportChecker:
def __init__(self, msg=None):
self.msg = msg
def __enter__(self):
pass
def __exit__(self, exc_type, value, traceback):
if exc_type == ModuleNotFoundError and self.msg is not None:
value.msg += self.msg
return exc_type is None
================================================
FILE: allenact/utils/tensor_utils.py
================================================
"""Functions used to manipulate pytorch tensors and numpy arrays."""
import numbers
import os
import tempfile
from collections import defaultdict
from typing import List, Dict, Optional, DefaultDict, Union, Any, cast
import PIL
import numpy as np
import torch
from PIL import Image
from moviepy import editor as mpy
from moviepy.editor import concatenate_videoclips
from tensorboardX import SummaryWriter as TBXSummaryWriter, summary as tbxsummary
from tensorboardX.proto.summary_pb2 import Summary as TBXSummary
# noinspection PyProtectedMember
from tensorboardX.utils import _prepare_video as tbx_prepare_video
from tensorboardX.x2num import make_np as tbxmake_np
from allenact.utils.system import get_logger
def to_device_recursively(
input: Any, device: Union[str, torch.device, int], inplace: bool = True
):
"""Recursively places tensors on the appropriate device."""
if input is None:
return input
elif isinstance(input, torch.Tensor):
return input.to(device) # type: ignore
elif isinstance(input, tuple):
return tuple(
to_device_recursively(input=subinput, device=device, inplace=inplace)
for subinput in input
)
elif isinstance(input, list):
if inplace:
for i in range(len(input)):
input[i] = to_device_recursively(
input=input[i], device=device, inplace=inplace
)
return input
else:
return [
to_device_recursively(input=subpart, device=device, inplace=inplace)
for subpart in input
]
elif isinstance(input, dict):
if inplace:
for key in input:
input[key] = to_device_recursively(
input=input[key], device=device, inplace=inplace
)
return input
else:
return {
k: to_device_recursively(input=input[k], device=device, inplace=inplace)
for k in input
}
elif isinstance(input, set):
if inplace:
for element in list(input):
input.remove(element)
input.add(
to_device_recursively(element, device=device, inplace=inplace)
)
else:
return set(
to_device_recursively(k, device=device, inplace=inplace) for k in input
)
elif isinstance(input, np.ndarray) or np.isscalar(input) or isinstance(input, str):
return input
elif hasattr(input, "to"):
# noinspection PyCallingNonCallable
return input.to(device=device, inplace=inplace)
else:
raise NotImplementedError(
"Sorry, value of type {} is not supported.".format(type(input))
)
def detach_recursively(input: Any, inplace=True):
"""Recursively detaches tensors in some data structure from their
computation graph."""
if input is None:
return input
elif isinstance(input, torch.Tensor):
return input.detach()
elif isinstance(input, tuple):
return tuple(
detach_recursively(input=subinput, inplace=inplace) for subinput in input
)
elif isinstance(input, list):
if inplace:
for i in range(len(input)):
input[i] = detach_recursively(input[i], inplace=inplace)
return input
else:
return [
detach_recursively(input=subinput, inplace=inplace)
for subinput in input
]
elif isinstance(input, dict):
if inplace:
for key in input:
input[key] = detach_recursively(input[key], inplace=inplace)
return input
else:
return {k: detach_recursively(input[k], inplace=inplace) for k in input}
elif isinstance(input, set):
if inplace:
for element in list(input):
input.remove(element)
input.add(detach_recursively(element, inplace=inplace))
else:
return set(detach_recursively(k, inplace=inplace) for k in input)
elif isinstance(input, np.ndarray) or np.isscalar(input) or isinstance(input, str):
return input
elif hasattr(input, "detach_recursively"):
# noinspection PyCallingNonCallable
return input.detach_recursively(inplace=inplace)
else:
raise NotImplementedError(
"Sorry, hidden state of type {} is not supported.".format(type(input))
)
def batch_observations(
observations: List[Dict], device: Optional[torch.device] = None
) -> Dict[str, Union[Dict, torch.Tensor]]:
"""Transpose a batch of observation dicts to a dict of batched
observations.
# Arguments
observations : List of dicts of observations.
device : The torch.device to put the resulting tensors on.
Will not move the tensors if None.
# Returns
Transposed dict of lists of observations.
"""
def dict_from_observation(
observation: Dict[str, Any]
) -> Dict[str, Union[Dict, List]]:
batch_dict: DefaultDict = defaultdict(list)
for sensor in observation:
if isinstance(observation[sensor], Dict):
batch_dict[sensor] = dict_from_observation(observation[sensor])
else:
batch_dict[sensor].append(to_tensor(observation[sensor]))
return batch_dict
def fill_dict_from_observations(
input_batch: Any, observation: Dict[str, Any]
) -> None:
for sensor in observation:
if isinstance(observation[sensor], Dict):
fill_dict_from_observations(input_batch[sensor], observation[sensor])
else:
input_batch[sensor].append(to_tensor(observation[sensor]))
def dict_to_batch(input_batch: Any) -> None:
for sensor in input_batch:
if isinstance(input_batch[sensor], Dict):
dict_to_batch(input_batch[sensor])
else:
input_batch[sensor] = torch.stack(
[batch.to(device=device) for batch in input_batch[sensor]], dim=0
)
if len(observations) == 0:
return cast(Dict[str, Union[Dict, torch.Tensor]], observations)
batch = dict_from_observation(observations[0])
for obs in observations[1:]:
fill_dict_from_observations(batch, obs)
dict_to_batch(batch)
return cast(Dict[str, Union[Dict, torch.Tensor]], batch)
def to_tensor(v) -> torch.Tensor:
"""Return a torch.Tensor version of the input.
# Parameters
v : Input values that can be coerced into being a tensor.
# Returns
A tensor version of the input.
"""
if torch.is_tensor(v):
return v
elif isinstance(v, np.ndarray):
return torch.from_numpy(v)
else:
return torch.tensor(
v, dtype=torch.int64 if isinstance(v, numbers.Integral) else torch.float
)
def tile_images(images: List[np.ndarray]) -> np.ndarray:
"""Tile multiple images into single image.
# Parameters
images : list of images where each image has dimension
(height x width x channels)
# Returns
Tiled image (new_height x width x channels).
"""
assert len(images) > 0, "empty list of images"
np_images = np.asarray(images)
n_images, height, width, n_channels = np_images.shape
new_height = int(np.ceil(np.sqrt(n_images)))
new_width = int(np.ceil(float(n_images) / new_height))
# pad with empty images to complete the rectangle
np_images = np.array(
images + [images[0] * 0 for _ in range(n_images, new_height * new_width)]
)
# img_HWhwc
out_image = np_images.reshape((new_height, new_width, height, width, n_channels))
# img_HhWwc
out_image = out_image.transpose(0, 2, 1, 3, 4)
# img_Hh_Ww_c
out_image = out_image.reshape((new_height * height, new_width * width, n_channels))
return out_image
class SummaryWriter(TBXSummaryWriter):
@staticmethod
def _video(tag, vid):
# noinspection PyProtectedMember
tag = tbxsummary._clean_tag(tag)
return TBXSummary(value=[TBXSummary.Value(tag=tag, image=vid)])
def add_vid(self, tag, vid, global_step=None, walltime=None):
self._get_file_writer().add_summary(
self._video(tag, vid), global_step, walltime
)
def add_image(
self, tag, img_tensor, global_step=None, walltime=None, dataformats="CHW"
):
self._get_file_writer().add_summary(
image(tag, img_tensor, dataformats=dataformats), global_step, walltime
)
def image(tag, tensor, rescale=1, dataformats="CHW"):
"""Outputs a `Summary` protocol buffer with images. The summary has up to
`max_images` summary values containing images. The images are built from
`tensor` which must be 3-D with shape `[height, width, channels]` and where
`channels` can be:
* 1: `tensor` is interpreted as Grayscale.
* 3: `tensor` is interpreted as RGB.
* 4: `tensor` is interpreted as RGBA.
# Parameters
tag: A name for the generated node. Will also serve as a series name in
TensorBoard.
tensor: A 3-D `uint8` or `float32` `Tensor` of shape `[height, width,
channels]` where `channels` is 1, 3, or 4.
'tensor' can either have values in [0, 1] (float32) or [0, 255] (uint8).
The image() function will scale the image values to [0, 255] by applying
a scale factor of either 1 (uint8) or 255 (float32).
rescale: The scale.
dataformats: Input image shape format.
# Returns
A scalar `Tensor` of type `string`. The serialized `Summary` protocol
buffer.
"""
# noinspection PyProtectedMember
tag = tbxsummary._clean_tag(tag)
tensor = tbxmake_np(tensor)
tensor = convert_to_HWC(tensor, dataformats)
# Do not assume that user passes in values in [0, 255], use data type to detect
if tensor.dtype != np.uint8:
tensor = (tensor * 255.0).astype(np.uint8)
img = tbxsummary.make_image(tensor, rescale=rescale)
return TBXSummary(value=[TBXSummary.Value(tag=tag, image=img)])
def convert_to_HWC(tensor, input_format): # tensor: numpy array
assert len(set(input_format)) == len(
input_format
), "You can not use the same dimension shordhand twice. \
input_format: {}".format(
input_format
)
assert len(tensor.shape) == len(
input_format
), "size of input tensor and input format are different. \
tensor shape: {}, input_format: {}".format(
tensor.shape, input_format
)
input_format = input_format.upper()
if len(input_format) == 4:
index = [input_format.find(c) for c in "NCHW"]
tensor_NCHW = tensor.transpose(index)
tensor_CHW = make_grid(tensor_NCHW)
# noinspection PyTypeChecker
return tensor_CHW.transpose(1, 2, 0)
if len(input_format) == 3:
index = [input_format.find(c) for c in "HWC"]
tensor_HWC = tensor.transpose(index)
if tensor_HWC.shape[2] == 1:
tensor_HWC = np.concatenate([tensor_HWC, tensor_HWC, tensor_HWC], 2)
return tensor_HWC
if len(input_format) == 2:
index = [input_format.find(c) for c in "HW"]
tensor = tensor.transpose(index)
tensor = np.stack([tensor, tensor, tensor], 2)
return tensor
def make_grid(I, ncols=8):
# I: N1HW or N3HW
assert isinstance(I, np.ndarray), "plugin error, should pass numpy array here"
if I.shape[1] == 1:
I = np.concatenate([I, I, I], 1)
assert I.ndim == 4 and I.shape[1] == 3 or I.shape[1] == 4
nimg = I.shape[0]
H = I.shape[2]
W = I.shape[3]
ncols = min(nimg, ncols)
nrows = int(np.ceil(float(nimg) / ncols))
canvas = np.zeros((I.shape[1], H * nrows, W * ncols), dtype=I.dtype)
i = 0
for y in range(nrows):
for x in range(ncols):
if i >= nimg:
break
canvas[:, y * H : (y + 1) * H, x * W : (x + 1) * W] = I[i]
i = i + 1
return canvas
def tensor_to_video(tensor, fps=4):
tensor = tbxmake_np(tensor)
tensor = tbx_prepare_video(tensor)
# If user passes in uint8, then we don't need to rescale by 255
if tensor.dtype != np.uint8:
tensor = (tensor * 255.0).astype(np.uint8)
return tbxsummary.make_video(tensor, fps)
def tensor_to_clip(tensor, fps=4):
tensor = tbxmake_np(tensor)
tensor = tbx_prepare_video(tensor)
# If user passes in uint8, then we don't need to rescale by 255
if tensor.dtype != np.uint8:
tensor = (tensor * 255.0).astype(np.uint8)
t, h, w, c = tensor.shape
clip = mpy.ImageSequenceClip(list(tensor), fps=fps)
return clip, (h, w, c)
def clips_to_video(clips, h, w, c):
# encode sequence of images into gif string
clip = concatenate_videoclips(clips)
filename = tempfile.NamedTemporaryFile(suffix=".gif", delete=False).name
# moviepy >= 1.0.0 use logger=None to suppress output.
try:
clip.write_gif(filename, verbose=False, logger=None)
except TypeError:
get_logger().warning(
"Upgrade to moviepy >= 1.0.0 to suppress the progress bar."
)
clip.write_gif(filename, verbose=False)
with open(filename, "rb") as f:
tensor_string = f.read()
try:
os.remove(filename)
except OSError:
get_logger().warning("The temporary file used by moviepy cannot be deleted.")
return TBXSummary.Image(
height=h, width=w, colorspace=c, encoded_image_string=tensor_string
)
def process_video(render, max_clip_len=500, max_video_len=-1, fps=4):
output = []
hwc = None
if len(render) > 0:
if len(render) > max_video_len > 0:
get_logger().warning(
"Clipping video to first {} frames out of {} original frames".format(
max_video_len, len(render)
)
)
render = render[:max_video_len]
for clipstart in range(0, len(render), max_clip_len):
clip = render[clipstart : clipstart + max_clip_len]
try:
current = np.stack(clip, axis=0) # T, H, W, C
current = current.transpose((0, 3, 1, 2)) # T, C, H, W
current = np.expand_dims(current, axis=0) # 1, T, C, H, W
current, cur_hwc = tensor_to_clip(current, fps=fps)
if hwc is None:
hwc = cur_hwc
else:
assert (
hwc == cur_hwc
), "Inconsistent clip shape: previous {} current {}".format(
hwc, cur_hwc
)
output.append(current)
except MemoryError:
get_logger().error(
"Skipping video due to memory error with clip of length {}".format(
len(clip)
)
)
return None
else:
get_logger().warning("Calling process_video with 0 frames")
return None
assert len(output) > 0, "No clips to concatenate"
assert hwc is not None, "No tensor dims assigned"
try:
result = clips_to_video(output, *hwc)
except MemoryError:
get_logger().error("Skipping video due to memory error calling clips_to_video")
result = None
return result
class ScaleBothSides(object):
"""Rescales the input PIL.Image to the given 'width' and `height`.
Attributes
width: new width
height: new height
interpolation: Default: PIL.Image.BILINEAR
"""
def __init__(self, width: int, height: int, interpolation=Image.BILINEAR):
self.width = width
self.height = height
self.interpolation = interpolation
def __call__(self, img: PIL.Image) -> PIL.Image:
return img.resize((self.width, self.height), self.interpolation)
================================================
FILE: allenact/utils/viz_utils.py
================================================
import abc
import json
import os
import sys
from collections import defaultdict
from typing import (
Dict,
Any,
Union,
Optional,
List,
Tuple,
Sequence,
Callable,
cast,
Set,
)
import numpy as np
from allenact.utils.experiment_utils import Builder
from allenact.utils.tensor_utils import SummaryWriter, tile_images, process_video
try:
# Tensorflow not installed for testing
from tensorflow.core.util import event_pb2
from tensorflow.python.lib.io import tf_record
_TF_AVAILABLE = True
except ImportError as _:
event_pb2 = None
tf_record = None
_TF_AVAILABLE = False
import matplotlib
try:
# When debugging we don't want to use the interactive version of matplotlib
# as it causes all sorts of problems.
# noinspection PyPackageRequirements
import pydevd
matplotlib.use("agg")
except ImportError as _:
pass
import matplotlib.pyplot as plt
import matplotlib.markers as markers
import cv2
from allenact.utils.system import get_logger
class AbstractViz:
def __init__(
self,
label: Optional[str] = None,
vector_task_sources: Sequence[Tuple[str, Dict[str, Any]]] = (),
rollout_sources: Sequence[Union[str, Sequence[str]]] = (),
actor_critic_source: bool = False,
**kwargs, # accepts `max_episodes_in_group`
):
self.label = label
self.vector_task_sources = list(vector_task_sources)
self.rollout_sources = [
[entry] if isinstance(entry, str) else list(entry)
for entry in rollout_sources
]
self.actor_critic_source = actor_critic_source
self.mode: Optional[str] = None
self.path_to_id: Optional[Sequence[str]] = None
self.episode_ids: Optional[List[Sequence[str]]] = None
if "max_episodes_in_group" in kwargs:
self.max_episodes_in_group = kwargs["max_episodes_in_group"]
self.assigned_max_eps_in_group = True
else:
self.max_episodes_in_group = 8
self.assigned_max_eps_in_group = False
@staticmethod
def _source_to_str(source, is_vector_task):
source_type = "vector_task" if is_vector_task else "rollout_or_actor_critic"
return "{}__{}".format(
source_type,
"__{}_sep__".format(source_type).join(["{}".format(s) for s in source]),
)
@staticmethod
def _access(dictionary, path):
path = path[::-1]
while len(path) > 0:
dictionary = dictionary[path.pop()]
return dictionary
def _auto_viz_order(self, task_outputs):
if task_outputs is None:
return None, None
all_episodes = {
self._access(episode, self.path_to_id): episode for episode in task_outputs
}
if self.episode_ids is None:
all_episode_keys = list(all_episodes.keys())
viz_order = []
for page_start in range(
0, len(all_episode_keys), self.max_episodes_in_group
):
viz_order.append(
all_episode_keys[
page_start : page_start + self.max_episodes_in_group
]
)
get_logger().debug("visualizing with order {}".format(viz_order))
else:
viz_order = self.episode_ids
return viz_order, all_episodes
def _setup(
self,
mode: str,
path_to_id: Sequence[str],
episode_ids: Optional[Sequence[Union[Sequence[str], str]]],
max_episodes_in_group: int,
force: bool = False,
):
self.mode = mode
self.path_to_id = list(path_to_id)
if (self.episode_ids is None or force) and episode_ids is not None:
self.episode_ids = (
list(episode_ids)
if not isinstance(episode_ids[0], str)
else [list(cast(List[str], episode_ids))]
)
if not self.assigned_max_eps_in_group or force:
self.max_episodes_in_group = max_episodes_in_group
@abc.abstractmethod
def log(
self,
log_writer: SummaryWriter,
task_outputs: Optional[List[Any]],
render: Optional[Dict[str, List[Dict[str, Any]]]],
num_steps: int,
):
raise NotImplementedError()
class TrajectoryViz(AbstractViz):
def __init__(
self,
path_to_trajectory: Sequence[str] = ("task_info", "followed_path"),
path_to_target_location: Optional[Sequence[str]] = (
"task_info",
"target_position",
),
path_to_x: Sequence[str] = ("x",),
path_to_y: Sequence[str] = ("z",),
path_to_rot_degrees: Optional[Sequence[str]] = ("rotation", "y"),
adapt_rotation: Optional[Callable[[float], float]] = None,
label: str = "trajectory",
figsize: Tuple[float, float] = (2, 2),
fontsize: float = 5,
start_marker_shape: str = r"$\spadesuit$",
start_marker_scale: int = 100,
**other_base_kwargs,
):
super().__init__(label, **other_base_kwargs)
self.path_to_trajectory = list(path_to_trajectory)
self.path_to_target_location = (
list(path_to_target_location)
if path_to_target_location is not None
else None
)
self.adapt_rotation = adapt_rotation
self.x = list(path_to_x)
self.y = list(path_to_y)
self.path_to_rot_degrees = (
list(path_to_rot_degrees) if path_to_rot_degrees is not None else None
)
self.figsize = figsize
self.fontsize = fontsize
self.start_marker_shape = start_marker_shape
self.start_marker_scale = start_marker_scale
def log(
self,
log_writer: SummaryWriter,
task_outputs: Optional[List[Any]],
render: Optional[Dict[str, List[Dict[str, Any]]]],
num_steps: int,
):
viz_order, all_episodes = self._auto_viz_order(task_outputs)
if viz_order is None:
get_logger().debug("trajectory viz returning without visualizing")
return
for page, current_ids in enumerate(viz_order):
figs = []
for episode_id in current_ids:
# assert episode_id in all_episodes
if episode_id not in all_episodes:
get_logger().warning(
"skipping viz for missing episode {}".format(episode_id)
)
continue
figs.append(self.make_fig(all_episodes[episode_id], episode_id))
if len(figs) == 0:
continue
log_writer.add_figure(
"{}/{}_group{}".format(self.mode, self.label, page),
figs,
global_step=num_steps,
)
plt.close(
"all"
) # close all current figures (SummaryWriter already closes all figures we log)
def make_fig(self, episode, episode_id):
# From https://nbviewer.jupyter.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb
def colorline(
x,
y,
z=None,
cmap=plt.get_cmap("cool"),
norm=plt.Normalize(0.0, 1.0),
linewidth=2,
alpha=1.0,
zorder=1,
):
"""Plot a colored line with coordinates x and y.
Optionally specify colors in the array z
Optionally specify a colormap, a norm function and a line width.
"""
def make_segments(x, y):
"""Create list of line segments from x and y coordinates, in
the correct format for LineCollection:
an array of the form numlines x (points per line) x 2
(x and y) array
"""
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
return segments
# Default colors equally spaced on [0,1]:
if z is None:
z = np.linspace(0.0, 1.0, len(x))
# Special case if a single number:
if not hasattr(
z, "__iter__"
): # to check for numerical input -- this is a hack
z = np.array([z])
z = np.asarray(z)
segments = make_segments(x, y)
lc = matplotlib.collections.LineCollection(
segments,
array=z,
cmap=cmap,
norm=norm,
linewidth=linewidth,
alpha=alpha,
zorder=zorder,
)
ax = plt.gca()
ax.add_collection(lc)
return lc
trajectory = self._access(episode, self.path_to_trajectory)
x, y = [], []
for xy in trajectory:
x.append(float(self._access(xy, self.x)))
y.append(float(self._access(xy, self.y)))
fig, ax = plt.subplots(figsize=self.figsize)
colorline(x, y, zorder=1)
start_marker = markers.MarkerStyle(marker=self.start_marker_shape)
if self.path_to_rot_degrees is not None:
rot_degrees = float(self._access(trajectory[0], self.path_to_rot_degrees))
if self.adapt_rotation is not None:
rot_degrees = self.adapt_rotation(rot_degrees)
start_marker._transform = start_marker.get_transform().rotate_deg(
rot_degrees
)
ax.scatter(
[x[0]], [y[0]], marker=start_marker, zorder=2, s=self.start_marker_scale
)
ax.scatter([x[-1]], [y[-1]], marker="s") # stop
if self.path_to_target_location is not None:
target = self._access(episode, self.path_to_target_location)
ax.scatter(
[float(self._access(target, self.x))],
[float(self._access(target, self.y))],
marker="*",
)
ax.set_title(episode_id, fontsize=self.fontsize)
ax.tick_params(axis="x", labelsize=self.fontsize)
ax.tick_params(axis="y", labelsize=self.fontsize)
return fig
class AgentViewViz(AbstractViz):
def __init__(
self,
label: str = "agent_view",
max_clip_length: int = 100, # control memory used when converting groups of images into clips
max_video_length: int = -1, # no limit, if > 0, limit the maximum video length (discard last frames)
vector_task_source: Tuple[str, Dict[str, Any]] = (
"render",
{"mode": "raw_rgb_list"},
),
episode_ids: Optional[Sequence[Union[Sequence[str], str]]] = None,
fps: int = 4,
max_render_size: int = 400,
**other_base_kwargs,
):
super().__init__(
label,
vector_task_sources=[vector_task_source],
**other_base_kwargs,
)
self.max_clip_length = max_clip_length
self.max_video_length = max_video_length
self.fps = fps
self.max_render_size = max_render_size
self.episode_ids = (
(
list(episode_ids)
if not isinstance(episode_ids[0], str)
else [list(cast(List[str], episode_ids))]
)
if episode_ids is not None
else None
)
def log(
self,
log_writer: SummaryWriter,
task_outputs: Optional[List[Any]],
render: Optional[Dict[str, List[Dict[str, Any]]]],
num_steps: int,
):
if render is None:
return
datum_id = self._source_to_str(self.vector_task_sources[0], is_vector_task=True)
viz_order, _ = self._auto_viz_order(task_outputs)
if viz_order is None:
get_logger().debug("agent view viz returning without visualizing")
return
for page, current_ids in enumerate(viz_order):
images = [] # list of lists of rgb frames
for episode_id in current_ids:
# assert episode_id in render
if episode_id not in render:
get_logger().warning(
"skipping viz for missing episode {}".format(episode_id)
)
continue
images.append(
[
self._overlay_label(step[datum_id], episode_id)
for step in render[episode_id]
]
)
if len(images) == 0:
continue
vid = self.make_vid(images)
if vid is not None:
log_writer.add_vid(
f"{self.mode}/{self.label}_group{page}",
vid,
global_step=num_steps,
)
@staticmethod
def _overlay_label(
img,
text,
pos=(0, 0),
bg_color=(255, 255, 255),
fg_color=(0, 0, 0),
scale=0.4,
thickness=1,
margin=2,
font_face=cv2.FONT_HERSHEY_SIMPLEX,
):
txt_size = cv2.getTextSize(text, font_face, scale, thickness)
end_x = pos[0] + txt_size[0][0] + margin
end_y = pos[1]
pos = (pos[0], pos[1] + txt_size[0][1] + margin)
cv2.rectangle(img, pos, (end_x, end_y), bg_color, cv2.FILLED)
cv2.putText(
img=img,
text=text,
org=pos,
fontFace=font_face,
fontScale=scale,
color=fg_color,
thickness=thickness,
lineType=cv2.LINE_AA,
)
return img
def make_vid(self, images):
max_length = max([len(ep) for ep in images])
if max_length == 0:
return None
valid_im = None
for ep in images:
if len(ep) > 0:
valid_im = ep[0]
break
frames = []
for it in range(max_length):
current_images = []
for ep in images:
if it < len(ep):
current_images.append(ep[it])
else:
if it == 0:
current_images.append(np.zeros_like(valid_im))
else:
gray = ep[-1].copy()
gray[:, :, 0] = gray[:, :, 2] = gray[:, :, 1]
current_images.append(gray)
frames.append(tile_images(current_images))
return process_video(
frames, self.max_clip_length, self.max_video_length, fps=self.fps
)
class AbstractTensorViz(AbstractViz):
def __init__(
self,
rollout_source: Union[str, Sequence[str]],
label: Optional[str] = None,
figsize: Tuple[float, float] = (3, 3),
**other_base_kwargs,
):
if label is None:
if isinstance(rollout_source, str):
label = rollout_source[:]
else:
label = "/".join(rollout_source)
super().__init__(label, rollout_sources=[rollout_source], **other_base_kwargs)
self.figsize = figsize
self.datum_id = self._source_to_str(
self.rollout_sources[0], is_vector_task=False
)
def log(
self,
log_writer: SummaryWriter,
task_outputs: Optional[List[Any]],
render: Optional[Dict[str, List[Dict[str, Any]]]],
num_steps: int,
):
if render is None:
return
viz_order, _ = self._auto_viz_order(task_outputs)
if viz_order is None:
get_logger().debug("tensor viz returning without visualizing")
return
for page, current_ids in enumerate(viz_order):
figs = []
for episode_id in current_ids:
if episode_id not in render or len(render[episode_id]) == 0:
get_logger().warning(
"skipping viz for missing or 0-length episode {}".format(
episode_id
)
)
continue
episode_src = [
step[self.datum_id]
for step in render[episode_id]
if self.datum_id in step
]
if len(episode_src) > 0:
# If the last episode for an inference worker is of length 1, there's no captured rollout sources
figs.append(self.make_fig(episode_src, episode_id))
if len(figs) == 0:
continue
log_writer.add_figure(
"{}/{}_group{}".format(self.mode, self.label, page),
figs,
global_step=num_steps,
)
plt.close(
"all"
) # close all current figures (SummaryWriter already closes all figures we log)
@abc.abstractmethod
def make_fig(
self, episode_src: Sequence[np.ndarray], episode_id: str
) -> matplotlib.figure.Figure:
raise NotImplementedError()
class TensorViz1D(AbstractTensorViz):
def __init__(
self,
rollout_source: Union[str, Sequence[str]] = "action_log_probs",
label: Optional[str] = None,
figsize: Tuple[float, float] = (3, 3),
**other_base_kwargs,
):
super().__init__(rollout_source, label, figsize, **other_base_kwargs)
def make_fig(self, episode_src, episode_id):
assert episode_src[0].size == 1
# Concatenate along step axis (0)
seq = np.concatenate(episode_src, axis=0).squeeze() # remove all singleton dims
fig, ax = plt.subplots(figsize=self.figsize)
ax.plot(seq)
ax.set_title(episode_id)
ax.set_aspect("auto")
plt.tight_layout()
return fig
class TensorViz2D(AbstractTensorViz):
def __init__(
self,
rollout_source: Union[str, Sequence[str]] = ("memory_first_last", "rnn"),
label: Optional[str] = None,
figsize: Tuple[float, float] = (10, 10),
fontsize: float = 5,
**other_base_kwargs,
):
super().__init__(rollout_source, label, figsize, **other_base_kwargs)
self.fontsize = fontsize
def make_fig(self, episode_src, episode_id):
# Concatenate along step axis (0)
seq = np.concatenate(
episode_src, axis=0
).squeeze() # remove num_layers if it's equal to 1, else die
assert len(seq.shape) == 2, "No support for higher-dimensions"
# get_logger().debug("basic {} h render {}".format(episode_id, seq[:10, 0]))
fig, ax = plt.subplots(figsize=self.figsize)
ax.matshow(seq)
ax.set_xlabel(episode_id, fontsize=self.fontsize)
ax.tick_params(axis="x", labelsize=self.fontsize)
ax.tick_params(axis="y", labelsize=self.fontsize)
ax.tick_params(bottom=False)
ax.set_aspect("auto")
plt.tight_layout()
return fig
class ActorViz(AbstractViz):
def __init__(
self,
label: str = "action_probs",
action_names_path: Optional[Sequence[str]] = ("task_info", "action_names"),
figsize: Tuple[float, float] = (1, 5),
fontsize: float = 5,
**other_base_kwargs,
):
super().__init__(label, actor_critic_source=True, **other_base_kwargs)
self.action_names_path: Optional[Sequence[str]] = (
list(action_names_path) if action_names_path is not None else None
)
self.figsize = figsize
self.fontsize = fontsize
self.action_names: Optional[List[str]] = None
def log(
self,
log_writer: SummaryWriter,
task_outputs: Optional[List[Any]],
render: Optional[Dict[str, List[Dict[str, Any]]]],
num_steps: int,
):
if render is None:
return
if (
self.action_names is None
and task_outputs is not None
and len(task_outputs) > 0
and self.action_names_path is not None
):
self.action_names = list(
self._access(task_outputs[0], self.action_names_path)
)
viz_order, _ = self._auto_viz_order(task_outputs)
if viz_order is None:
get_logger().debug("actor viz returning without visualizing")
return
for page, current_ids in enumerate(viz_order):
figs = []
for episode_id in current_ids:
# assert episode_id in render
if episode_id not in render:
get_logger().warning(
"skipping viz for missing episode {}".format(episode_id)
)
continue
episode_src = [
step["actor_probs"]
for step in render[episode_id]
if "actor_probs" in step
]
assert len(episode_src) == len(render[episode_id])
figs.append(self.make_fig(episode_src, episode_id))
if len(figs) == 0:
continue
log_writer.add_figure(
"{}/{}_group{}".format(self.mode, self.label, page),
figs,
global_step=num_steps,
)
plt.close(
"all"
) # close all current figures (SummaryWriter already closes all figures we log)
def make_fig(self, episode_src, episode_id):
# Concatenate along step axis (0, reused from kept sampler axis)
mat = np.concatenate(episode_src, axis=0)
fig, ax = plt.subplots(figsize=self.figsize)
ax.matshow(mat)
if self.action_names is not None:
assert len(self.action_names) == mat.shape[-1]
ax.set_xticklabels([""] + self.action_names, rotation="vertical")
ax.set_xlabel(episode_id, fontsize=self.fontsize)
ax.tick_params(axis="x", labelsize=self.fontsize)
ax.tick_params(axis="y", labelsize=self.fontsize)
ax.tick_params(bottom=False)
# Gridlines based on minor ticks
ax.set_yticks(np.arange(-0.5, mat.shape[0], 1), minor=True)
ax.set_xticks(np.arange(-0.5, mat.shape[1], 1), minor=True)
ax.grid(which="minor", color="w", linestyle="-", linewidth=0.05)
ax.tick_params(
axis="both", which="minor", left=False, top=False, right=False, bottom=False
)
ax.set_aspect("auto")
plt.tight_layout()
return fig
class VizSuite(AbstractViz):
def __init__(
self,
episode_ids: Optional[Sequence[Union[Sequence[str], str]]] = None,
path_to_id: Sequence[str] = ("task_info", "id"),
mode: str = "valid",
force_episodes_and_max_episodes_in_group: bool = False,
max_episodes_in_group: int = 8,
*viz,
**kw_viz,
):
super().__init__(max_episodes_in_group=max_episodes_in_group)
self._setup(
mode=mode,
path_to_id=path_to_id,
episode_ids=episode_ids,
max_episodes_in_group=max_episodes_in_group,
)
self.force_episodes_and_max_episodes_in_group = (
force_episodes_and_max_episodes_in_group
)
self.all_episode_ids = self._episodes_set()
self.viz = [
v() if isinstance(v, Builder) else v
for v in viz
if isinstance(v, Builder) or isinstance(v, AbstractViz)
] + [
v() if isinstance(v, Builder) else v
for k, v in kw_viz.items()
if isinstance(v, Builder) or isinstance(v, AbstractViz)
]
self.max_render_size: Optional[int] = None
(
self.rollout_sources,
self.vector_task_sources,
self.actor_critic_source,
) = self._setup_sources()
self.data: Dict[str, List[Dict]] = (
{}
) # dict of episode id to list of dicts with collected data
self.last_it2epid: List[str] = []
def _setup_sources(self):
rollout_sources, vector_task_sources = [], []
labels = []
actor_critic_source = False
new_episodes = []
for v in self.viz:
labels.append(v.label)
rollout_sources += v.rollout_sources
vector_task_sources += v.vector_task_sources
actor_critic_source |= v.actor_critic_source
if (
v.episode_ids is not None
and not self.force_episodes_and_max_episodes_in_group
):
cur_episodes = self._episodes_set(v.episode_ids)
for ep in cur_episodes:
if (
self.all_episode_ids is not None
and ep not in self.all_episode_ids
):
new_episodes.append(ep)
get_logger().info(
"Added new episode {} from {}".format(ep, v.label)
)
v._setup(
mode=self.mode,
path_to_id=self.path_to_id,
episode_ids=self.episode_ids,
max_episodes_in_group=self.max_episodes_in_group,
force=self.force_episodes_and_max_episodes_in_group,
)
if isinstance(v, AgentViewViz):
self.max_render_size = v.max_render_size
get_logger().info("Logging labels {}".format(labels))
if len(new_episodes) > 0:
get_logger().info("Added new episodes {}".format(new_episodes))
self.episode_ids.append(new_episodes) # new group with all added episodes
self.all_episode_ids = self._episodes_set()
rol_flat = {json.dumps(src, sort_keys=True): src for src in rollout_sources}
vt_flat = {json.dumps(src, sort_keys=True): src for src in vector_task_sources}
rol_keys = list(set(rol_flat.keys()))
vt_keys = list(set(vt_flat.keys()))
return (
[rol_flat[k] for k in rol_keys],
[vt_flat[k] for k in vt_keys],
actor_critic_source,
)
def _episodes_set(self, episode_list=None) -> Optional[Set[str]]:
source = self.episode_ids if episode_list is None else episode_list
if source is None:
return None
all_episode_ids: List[str] = []
for group in source:
all_episode_ids += group
return set(all_episode_ids)
def empty(self):
return len(self.data) == 0
def _update(self, collected_data):
for epid in collected_data:
assert epid in self.data
self.data[epid][-1].update(collected_data[epid])
def _append(self, vector_task_data):
for epid in vector_task_data:
if epid in self.data:
self.data[epid].append(vector_task_data[epid])
else:
self.data[epid] = [vector_task_data[epid]]
def _collect_actor_critic(self, actor_critic):
actor_critic_data = {
epid: dict()
for epid in self.last_it2epid
if self.all_episode_ids is None or epid in self.all_episode_ids
}
if len(actor_critic_data) > 0 and actor_critic is not None:
if self.actor_critic_source:
# TODO this code only supports Discrete action spaces!
probs = (
actor_critic.distributions.probs
) # step (=1) x sampler x agent (=1) x action
values = actor_critic.values # step x sampler x agent x 1
for it, epid in enumerate(self.last_it2epid):
if epid in actor_critic_data:
# Select current episode (sampler axis will be reused as step axis)
prob = (
# probs.narrow(dim=0, start=it, length=1) # works for sampler x action
probs.narrow(
dim=1, start=it, length=1
) # step x sampler x agent x action -> step x 1 x agent x action
.squeeze(
0
) # step x 1 x agent x action -> 1 x agent x action
# .squeeze(-2) # 1 x agent x action -> 1 x action
.to("cpu")
.detach()
.numpy()
)
assert "actor_probs" not in actor_critic_data[epid]
actor_critic_data[epid]["actor_probs"] = prob
val = (
# values.narrow(dim=0, start=it, length=1) # works for sampler x 1
values.narrow(
dim=1, start=it, length=1
) # step x sampler x agent x 1 -> step x 1 x agent x 1
.squeeze(0) # step x 1 x agent x 1 -> 1 x agent x 1
# .squeeze(-2) # 1 x agent x 1 -> 1 x 1
.to("cpu")
.detach()
.numpy()
)
assert "critic_value" not in actor_critic_data[epid]
actor_critic_data[epid]["critic_value"] = val
self._update(actor_critic_data)
def _collect_rollout(self, rollout, alive):
alive_set = set(alive)
assert len(alive_set) == len(alive)
alive_it2epid = [
epid for it, epid in enumerate(self.last_it2epid) if it in alive_set
]
rollout_data = {
epid: dict()
for epid in alive_it2epid
if self.all_episode_ids is None or epid in self.all_episode_ids
}
if len(rollout_data) > 0 and rollout is not None:
for source in self.rollout_sources:
datum_id = self._source_to_str(source, is_vector_task=False)
storage, path = source[0], source[1:]
# Access storage
res = getattr(rollout, storage)
episode_dim = rollout.dim_names.index("sampler")
# Access sub-storage if path not empty
if len(path) > 0:
if storage == "memory_first_last":
storage = "memory"
flattened_name = rollout.unflattened_to_flattened[storage][
tuple(path)
]
# for path_step in path:
# res = res[path_step]
res = res[flattened_name]
res, episode_dim = res
if rollout.step > 0:
if rollout.step > res.shape[0]:
# e.g. rnn with only latest memory saved
rollout_step = res.shape[0] - 1
else:
rollout_step = rollout.step - 1
else:
if rollout.num_steps - 1 < res.shape[0]:
rollout_step = rollout.num_steps - 1
else:
# e.g. rnn with only latest memory saved
rollout_step = res.shape[0] - 1
# Select latest step
res = res.narrow(
dim=0,
start=rollout_step,
length=1, # step dimension
) # 1 x ... x sampler x ...
# get_logger().debug("basic collect h {}".format(res[..., 0]))
for it, epid in enumerate(alive_it2epid):
if epid in rollout_data:
# Select current episode and remove episode/sampler axis
datum = (
res.narrow(dim=episode_dim, start=it, length=1)
.squeeze(axis=episode_dim)
.to("cpu")
.detach()
.numpy()
) # 1 x ... (no sampler dim)
# get_logger().debug("basic collect ep {} h {}".format(epid, res[..., 0]))
assert datum_id not in rollout_data[epid]
rollout_data[epid][
datum_id
] = datum.copy() # copy needed when running on CPU!
self._update(rollout_data)
def _collect_vector_task(self, vector_task):
it2epid = [
self._access(info, self.path_to_id[1:])
for info in vector_task.attr("task_info")
]
# get_logger().debug("basic epids {}".format(it2epid))
def limit_spatial_res(data: np.ndarray, max_size=400):
if data.shape[0] <= max_size and data.shape[1] <= max_size:
return data
else:
f = float(max_size) / max(data.shape[0], data.shape[1])
size = (int(data.shape[1] * f), int(data.shape[0] * f))
return cv2.resize(data, size, 0, 0, interpolation=cv2.INTER_AREA)
vector_task_data = {
epid: dict()
for epid in it2epid
if self.all_episode_ids is None or epid in self.all_episode_ids
}
if len(vector_task_data) > 0:
for (
source
) in self.vector_task_sources: # these are observations for next step!
datum_id = self._source_to_str(source, is_vector_task=True)
method, kwargs = source
res = getattr(vector_task, method)(**kwargs)
if not isinstance(res, Sequence):
assert len(it2epid) == 1
res = [res]
if method == "render":
res = [limit_spatial_res(r, self.max_render_size) for r in res]
assert len(res) == len(it2epid)
for datum, epid in zip(res, it2epid):
if epid in vector_task_data:
assert datum_id not in vector_task_data[epid]
vector_task_data[epid][datum_id] = datum
self._append(vector_task_data)
return it2epid
# to be called by engine
def collect(self, vector_task=None, alive=None, rollout=None, actor_critic=None):
if actor_critic is not None:
# in phase with last_it2epid
try:
self._collect_actor_critic(actor_critic)
except (AssertionError, RuntimeError):
get_logger().debug(
msg=f"Failed collect (actor_critic) for viz due to exception:",
exc_info=sys.exc_info(),
)
get_logger().error(f"Failed collect (actor_critic) for viz")
if alive is not None and rollout is not None:
# in phase with last_it2epid that stay alive
try:
self._collect_rollout(rollout=rollout, alive=alive)
except (AssertionError, RuntimeError):
get_logger().debug(
msg=f"Failed collect (rollout) for viz due to exception:",
exc_info=sys.exc_info(),
)
get_logger().error(f"Failed collect (rollout) for viz")
# Always call this one last!
if vector_task is not None:
# in phase with identifiers of current episodes from vector_task
try:
self.last_it2epid = self._collect_vector_task(vector_task)
except (AssertionError, RuntimeError):
get_logger().debug(
msg=f"Failed collect (vector_task) for viz due to exception:",
exc_info=sys.exc_info(),
)
get_logger().error(f"Failed collect (vector_task) for viz")
def read_and_reset(self) -> Dict[str, List[Dict[str, Any]]]:
res = self.data
self.data = {}
# get_logger().debug("Returning episodes {}".format(list(res.keys())))
return res
# to be called by logger
def log(
self,
log_writer: SummaryWriter,
task_outputs: Optional[List[Any]],
render: Optional[Dict[str, List[Dict[str, Any]]]],
num_steps: int,
):
for v in self.viz:
try:
v.log(log_writer, task_outputs, render, num_steps)
except (AssertionError, RuntimeError):
get_logger().debug(
msg=f"Dropped {v.label} viz due to exception:",
exc_info=sys.exc_info(),
)
get_logger().error(f"Dropped {v.label} viz")
class TensorboardSummarizer:
"""Assumption: tensorboard tags/labels include a valid/test/train substr indicating the data modality"""
def __init__(
self,
experiment_to_train_events_paths_map: Dict[str, Sequence[str]],
experiment_to_test_events_paths_map: Dict[str, Sequence[str]],
eval_min_mega_steps: Optional[Sequence[float]] = None,
tensorboard_tags_to_labels_map: Optional[Dict[str, str]] = None,
tensorboard_output_summary_folder: str = "tensorboard_plotter_output",
):
if not _TF_AVAILABLE:
raise ImportError(
"Please install tensorflow e.g. with `pip install tensorflow` to enable TensorboardSummarizer"
)
self.experiment_to_train_events_paths_map = experiment_to_train_events_paths_map
self.experiment_to_test_events_paths_map = experiment_to_test_events_paths_map
train_experiments = set(list(experiment_to_train_events_paths_map.keys()))
test_experiments = set(list(experiment_to_test_events_paths_map.keys()))
assert (train_experiments - test_experiments) in [
set(),
train_experiments,
], (
f"`experiment_to_test_events_paths_map` must have identical keys (experiment names) to those"
f" in `experiment_to_train_events_paths_map`, or be empty."
f" Got {train_experiments} train keys and {test_experiments} test keys."
)
self.eval_min_mega_steps = eval_min_mega_steps
self.tensorboard_tags_to_labels_map = tensorboard_tags_to_labels_map
if self.tensorboard_tags_to_labels_map is not None:
for tag, label in self.tensorboard_tags_to_labels_map.items():
assert ("valid" in label) + ("train" in label) + (
"test" in label
) == 1, (
f"One (and only one) of {'train', 'valid', 'test'} must be part of the label for"
f" tag {tag} ({label} given)."
)
self.tensorboard_output_summary_folder = tensorboard_output_summary_folder
self.train_data = self._read_tensorflow_experiment_events(
self.experiment_to_train_events_paths_map
)
self.test_data = self._read_tensorflow_experiment_events(
self.experiment_to_test_events_paths_map
)
def _read_tensorflow_experiment_events(
self, experiment_to_events_paths_map, skip_map=False
):
def my_summary_iterator(path):
try:
for r in tf_record.tf_record_iterator(path):
yield event_pb2.Event.FromString(r)
except IOError:
get_logger().debug(f"IOError for path {path}")
return None
collected_data = {}
for experiment_name, path_list in experiment_to_events_paths_map.items():
experiment_data = defaultdict(list)
for filename_path in path_list:
for event in my_summary_iterator(filename_path):
if event is None:
break
for value in event.summary.value:
if self.tensorboard_tags_to_labels_map is None or skip_map:
label = value.tag
elif value.tag in self.tensorboard_tags_to_labels_map:
label = self.tensorboard_tags_to_labels_map[value.tag]
else:
continue
experiment_data[label].append(
dict(
score=value.simple_value,
time=event.wall_time,
steps=event.step,
)
)
collected_data[experiment_name] = experiment_data
return collected_data
def _eval_vs_train_time_steps(self, eval_data, train_data):
min_mega_steps = self.eval_min_mega_steps
if min_mega_steps is None:
min_mega_steps = [(item["steps"] - 1) / 1e6 for item in eval_data]
scores, times, steps = [], [], []
i, t, last_i = 0, 0, -1
while len(times) < len(min_mega_steps):
while eval_data[i]["steps"] / min_mega_steps[len(times)] / 1e6 < 1:
i += 1
while train_data[t]["steps"] / min_mega_steps[len(times)] / 1e6 < 1:
t += 1
# step might be missing in valid! (and would duplicate future value at previous steps!)
# solution: move forward last entry's time if no change in i (instead of new entry)
if i == last_i:
times[-1] = train_data[t]["time"]
else:
scores.append(eval_data[i]["score"])
times.append(train_data[t]["time"])
steps.append(eval_data[i]["steps"])
last_i = i
scores.insert(0, train_data[0]["score"])
times.insert(0, train_data[0]["time"])
steps.insert(0, 0)
return scores, times, steps
def _train_vs_time_steps(self, train_data):
last_eval_step = (
self.eval_min_mega_steps[-1] * 1e6
if self.eval_min_mega_steps is not None
else float("inf")
)
scores = [train_data[0]["score"]]
times = [train_data[0]["time"]]
steps = [train_data[0]["steps"]]
t = 1
while steps[-1] < last_eval_step and t < len(train_data):
scores.append(train_data[t]["score"])
times.append(train_data[t]["time"])
steps.append(train_data[t]["steps"])
t += 1
return scores, times, steps
def make_tensorboard_summary(self):
all_experiments = list(self.experiment_to_train_events_paths_map.keys())
for experiment_name in all_experiments:
summary_writer = SummaryWriter(
os.path.join(self.tensorboard_output_summary_folder, experiment_name)
)
test_labels = (
sorted(list(self.test_data[experiment_name].keys()))
if len(self.test_data) > 0
else []
)
for test_label in test_labels:
train_label = test_label.replace("valid", "test").replace(
"test", "train"
)
if train_label not in self.train_data[experiment_name]:
print(
f"Missing matching 'train' label {train_label} for eval label {test_label}. Skipping"
)
continue
train_data = self.train_data[experiment_name][train_label]
test_data = self.test_data[experiment_name][test_label]
scores, times, steps = self._eval_vs_train_time_steps(
test_data, train_data
)
for score, t, step in zip(scores, times, steps):
summary_writer.add_scalar(
test_label, score, global_step=step, walltime=t
)
valid_labels = sorted(
[
key
for key in list(self.train_data[experiment_name].keys())
if "valid" in key
]
)
for valid_label in valid_labels:
train_label = valid_label.replace("valid", "train")
assert (
train_label in self.train_data[experiment_name]
), f"Missing matching 'train' label {train_label} for valid label {valid_label}"
train_data = self.train_data[experiment_name][train_label]
valid_data = self.train_data[experiment_name][valid_label]
scores, times, steps = self._eval_vs_train_time_steps(
valid_data, train_data
)
for score, t, step in zip(scores, times, steps):
summary_writer.add_scalar(
valid_label, score, global_step=step, walltime=t
)
train_labels = sorted(
[
key
for key in list(self.train_data[experiment_name].keys())
if "train" in key
]
)
for train_label in train_labels:
scores, times, steps = self._train_vs_time_steps(
self.train_data[experiment_name][train_label]
)
for score, t, step in zip(scores, times, steps):
summary_writer.add_scalar(
train_label, score, global_step=step, walltime=t
)
summary_writer.close()
================================================
FILE: allenact_plugins/__init__.py
================================================
try:
# noinspection PyProtectedMember,PyUnresolvedReferences
from allenact_plugins._version import __version__
except ModuleNotFoundError:
__version__ = None
================================================
FILE: allenact_plugins/babyai_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker
with ImportChecker(
"\n\nPlease install babyai with:\n\n"
"pip install -e git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\n",
):
# noinspection PyUnresolvedReferences
import babyai
================================================
FILE: allenact_plugins/babyai_plugin/babyai_constants.py
================================================
import os
from pathlib import Path
BABYAI_EXPERT_TRAJECTORIES_DIR = os.path.abspath(
os.path.join(os.path.dirname(Path(__file__)), "data", "demos")
)
================================================
FILE: allenact_plugins/babyai_plugin/babyai_models.py
================================================
from typing import Dict, Optional, List, cast, Tuple, Any
import babyai.model
import babyai.rl
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from gym.spaces.dict import Dict as SpaceDict
from allenact.algorithms.onpolicy_sync.policy import (
ActorCriticModel,
ObservationType,
Memory,
DistributionType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
class BabyAIACModelWrapped(babyai.model.ACModel):
def __init__(
self,
obs_space: Dict[str, int],
action_space: gym.spaces.Discrete,
image_dim=128,
memory_dim=128,
instr_dim=128,
use_instr=False,
lang_model="gru",
use_memory=False,
arch="cnn1",
aux_info=None,
include_auxiliary_head: bool = False,
):
self.use_cnn2 = arch == "cnn2"
super().__init__(
obs_space=obs_space,
action_space=action_space,
image_dim=image_dim,
memory_dim=memory_dim,
instr_dim=instr_dim,
use_instr=use_instr,
lang_model=lang_model,
use_memory=use_memory,
arch="cnn1" if self.use_cnn2 else arch,
aux_info=aux_info,
)
self.semantic_embedding = None
if self.use_cnn2:
self.semantic_embedding = nn.Embedding(33, embedding_dim=8)
self.image_conv = nn.Sequential(
nn.Conv2d(in_channels=24, out_channels=16, kernel_size=(2, 2)),
*self.image_conv[1:] # type:ignore
)
self.image_conv[0].apply(babyai.model.initialize_parameters)
self.include_auxiliary_head = include_auxiliary_head
if self.use_memory and self.lang_model == "gru":
self.memory_rnn = nn.LSTM(self.image_dim, self.memory_dim)
if self.include_auxiliary_head:
self.aux = nn.Sequential(
nn.Linear(self.memory_dim, 64),
nn.Tanh(),
nn.Linear(64, action_space.n),
)
self.aux.apply(babyai.model.initialize_parameters)
self.train()
def forward_once(self, obs, memory, instr_embedding=None):
"""Copied (with minor modifications) from
`babyai.model.ACModel.forward(...)`."""
if self.use_instr and instr_embedding is None:
instr_embedding = self._get_instr_embedding(obs.instr)
if self.use_instr and self.lang_model == "attgru":
# outputs: B x L x D
# memory: B x M
mask = (obs.instr != 0).float()
# The mask tensor has the same length as obs.instr, and
# thus can be both shorter and longer than instr_embedding.
# It can be longer if instr_embedding is computed
# for a subbatch of obs.instr.
# It can be shorter if obs.instr is a subbatch of
# the batch that instr_embeddings was computed for.
# Here, we make sure that mask and instr_embeddings
# have equal length along dimension 1.
mask = mask[:, : instr_embedding.shape[1]]
instr_embedding = instr_embedding[:, : mask.shape[1]]
keys = self.memory2key(memory)
pre_softmax = (keys[:, None, :] * instr_embedding).sum(2) + 1000 * mask
attention = F.softmax(pre_softmax, dim=1)
instr_embedding = (instr_embedding * attention[:, :, None]).sum(1)
x = torch.transpose(torch.transpose(obs.image, 1, 3), 2, 3)
if self.arch.startswith("expert_filmcnn"):
x = self.image_conv(x)
for controler in self.controllers:
x = controler(x, instr_embedding)
x = F.relu(self.film_pool(x))
else:
x = self.image_conv(x.contiguous())
x = x.reshape(x.shape[0], -1)
if self.use_memory:
hidden = (
memory[:, : self.semi_memory_size],
memory[:, self.semi_memory_size :],
)
hidden = self.memory_rnn(x, hidden)
embedding = hidden[0]
memory = torch.cat(hidden, dim=1) # type: ignore
else:
embedding = x
if self.use_instr and not "filmcnn" in self.arch:
embedding = torch.cat((embedding, instr_embedding), dim=1)
if hasattr(self, "aux_info") and self.aux_info:
extra_predictions = {
info: self.extra_heads[info](embedding) for info in self.extra_heads
}
else:
extra_predictions = dict()
return {
"embedding": embedding,
"memory": memory,
"extra_predictions": extra_predictions,
}
def forward_loop(
self,
observations: ObservationType,
recurrent_hidden_states: torch.FloatTensor,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
):
results = []
images = cast(torch.FloatTensor, observations["minigrid_ego_image"]).float()
instrs: Optional[torch.Tensor] = None
if "minigrid_mission" in observations:
instrs = cast(torch.Tensor, observations["minigrid_mission"])
_, nsamplers, _ = recurrent_hidden_states.shape
rollouts_len = images.shape[0] // nsamplers
obs = babyai.rl.DictList()
images = images.view(rollouts_len, nsamplers, *images.shape[1:])
masks = masks.view(rollouts_len, nsamplers, *masks.shape[1:]) # type:ignore
# needs_reset = (masks != 1.0).view(nrollouts, -1).any(-1)
if instrs is not None:
instrs = instrs.view(rollouts_len, nsamplers, instrs.shape[-1])
needs_instr_reset_mask = masks != 1.0
needs_instr_reset_mask[0] = 1
needs_instr_reset_mask = needs_instr_reset_mask.squeeze(-1)
instr_embeddings: Optional[torch.Tensor] = None
if self.use_instr:
instr_reset_multi_inds = list(
(int(a), int(b))
for a, b in zip(*np.where(needs_instr_reset_mask.cpu().numpy()))
)
time_ind_to_which_need_instr_reset: List[List] = [
[] for _ in range(rollouts_len)
]
reset_multi_ind_to_index = {
mi: i for i, mi in enumerate(instr_reset_multi_inds)
}
for a, b in instr_reset_multi_inds:
time_ind_to_which_need_instr_reset[a].append(b)
unique_instr_embeddings = self._get_instr_embedding(
instrs[needs_instr_reset_mask]
)
instr_embeddings_list = [unique_instr_embeddings[:nsamplers]]
current_instr_embeddings_list = list(instr_embeddings_list[-1])
for time_ind in range(1, rollouts_len):
if len(time_ind_to_which_need_instr_reset[time_ind]) == 0:
instr_embeddings_list.append(instr_embeddings_list[-1])
else:
for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[
time_ind
]:
current_instr_embeddings_list[sampler_needing_reset_ind] = (
unique_instr_embeddings[
reset_multi_ind_to_index[
(time_ind, sampler_needing_reset_ind)
]
]
)
instr_embeddings_list.append(
torch.stack(current_instr_embeddings_list, dim=0)
)
instr_embeddings = torch.stack(instr_embeddings_list, dim=0)
assert recurrent_hidden_states.shape[0] == 1
memory = recurrent_hidden_states[0]
# instr_embedding: Optional[torch.Tensor] = None
for i in range(rollouts_len):
obs.image = images[i]
if "minigrid_mission" in observations:
obs.instr = instrs[i]
# reset = needs_reset[i].item()
# if self.baby_ai_model.use_instr and (reset or i == 0):
# instr_embedding = self.baby_ai_model._get_instr_embedding(obs.instr)
results.append(
self.forward_once(
obs, memory=memory * masks[i], instr_embedding=instr_embeddings[i]
)
)
memory = results[-1]["memory"]
embedding = torch.cat([r["embedding"] for r in results], dim=0)
extra_predictions_list = [r["extra_predictions"] for r in results]
extra_predictions = {
key: torch.cat([ep[key] for ep in extra_predictions_list], dim=0)
for key in extra_predictions_list[0]
}
return (
ActorCriticOutput(
distributions=CategoricalDistr(
logits=self.actor(embedding),
),
values=self.critic(embedding),
extras=(
extra_predictions
if not self.include_auxiliary_head
else {
**extra_predictions,
"auxiliary_distributions": cast(
Any, CategoricalDistr(logits=self.aux(embedding))
),
}
),
),
torch.stack([r["memory"] for r in results], dim=0),
)
# noinspection PyMethodOverriding
def forward(
self,
observations: ObservationType,
recurrent_hidden_states: torch.FloatTensor,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
):
(
observations,
recurrent_hidden_states,
prev_actions,
masks,
num_steps,
num_samplers,
num_agents,
num_layers,
) = self.adapt_inputs(
observations, recurrent_hidden_states, prev_actions, masks
)
if self.lang_model != "gru":
ac_output, hidden_states = self.forward_loop(
observations=observations,
recurrent_hidden_states=recurrent_hidden_states,
prev_actions=prev_actions,
masks=masks, # type: ignore
)
return self.adapt_result(
ac_output,
hidden_states[-1:],
num_steps,
num_samplers,
num_agents,
num_layers,
observations,
)
assert recurrent_hidden_states.shape[0] == 1
images = cast(torch.FloatTensor, observations["minigrid_ego_image"])
if self.use_cnn2:
images_shape = images.shape
# noinspection PyArgumentList
images = images + torch.LongTensor([0, 11, 22]).view( # type:ignore
1, 1, 1, 3
).to(images.device)
images = self.semantic_embedding(images).view( # type:ignore
*images_shape[:3], 24
)
images = images.permute(0, 3, 1, 2).float() # type:ignore
_, nsamplers, _ = recurrent_hidden_states.shape
rollouts_len = images.shape[0] // nsamplers
masks = cast(
torch.FloatTensor, masks.view(rollouts_len, nsamplers, *masks.shape[1:])
)
instrs: Optional[torch.Tensor] = None
if "minigrid_mission" in observations and self.use_instr:
instrs = cast(torch.FloatTensor, observations["minigrid_mission"])
instrs = instrs.view(rollouts_len, nsamplers, instrs.shape[-1])
needs_instr_reset_mask = masks != 1.0
needs_instr_reset_mask[0] = 1
needs_instr_reset_mask = needs_instr_reset_mask.squeeze(-1)
blocking_inds: List[int] = np.where(
needs_instr_reset_mask.view(rollouts_len, -1).any(-1).cpu().numpy()
)[0].tolist()
blocking_inds.append(rollouts_len)
instr_embeddings: Optional[torch.Tensor] = None
if self.use_instr:
instr_reset_multi_inds = list(
(int(a), int(b))
for a, b in zip(*np.where(needs_instr_reset_mask.cpu().numpy()))
)
time_ind_to_which_need_instr_reset: List[List] = [
[] for _ in range(rollouts_len)
]
reset_multi_ind_to_index = {
mi: i for i, mi in enumerate(instr_reset_multi_inds)
}
for a, b in instr_reset_multi_inds:
time_ind_to_which_need_instr_reset[a].append(b)
unique_instr_embeddings = self._get_instr_embedding(
instrs[needs_instr_reset_mask]
)
instr_embeddings_list = [unique_instr_embeddings[:nsamplers]]
current_instr_embeddings_list = list(instr_embeddings_list[-1])
for time_ind in range(1, rollouts_len):
if len(time_ind_to_which_need_instr_reset[time_ind]) == 0:
instr_embeddings_list.append(instr_embeddings_list[-1])
else:
for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[
time_ind
]:
current_instr_embeddings_list[sampler_needing_reset_ind] = (
unique_instr_embeddings[
reset_multi_ind_to_index[
(time_ind, sampler_needing_reset_ind)
]
]
)
instr_embeddings_list.append(
torch.stack(current_instr_embeddings_list, dim=0)
)
instr_embeddings = torch.stack(instr_embeddings_list, dim=0)
# The following code can be used to compute the instr_embeddings in another way
# and thus verify that the above logic is (more likely to be) correct
# needs_instr_reset_mask = (masks != 1.0)
# needs_instr_reset_mask[0] *= 0
# needs_instr_reset_inds = needs_instr_reset_mask.view(nrollouts, -1).any(-1).cpu().numpy()
#
# # Get inds where a new task has started
# blocking_inds: List[int] = np.where(needs_instr_reset_inds)[0].tolist()
# blocking_inds.append(needs_instr_reset_inds.shape[0])
# if nrollouts != 1:
# pdb.set_trace()
# if blocking_inds[0] != 0:
# blocking_inds.insert(0, 0)
# if self.use_instr:
# instr_embeddings_list = []
# for ind0, ind1 in zip(blocking_inds[:-1], blocking_inds[1:]):
# instr_embeddings_list.append(
# self._get_instr_embedding(instrs[ind0])
# .unsqueeze(0)
# .repeat(ind1 - ind0, 1, 1)
# )
# tmp_instr_embeddings = torch.cat(instr_embeddings_list, dim=0)
# assert (instr_embeddings - tmp_instr_embeddings).abs().max().item() < 1e-6
# Embed images
# images = images.view(nrollouts, nsamplers, *images.shape[1:])
image_embeddings = self.image_conv(images)
if self.arch.startswith("expert_filmcnn"):
instr_embeddings_flatter = instr_embeddings.view(
-1, *instr_embeddings.shape[2:]
)
for controller in self.controllers:
image_embeddings = controller(
image_embeddings, instr_embeddings_flatter
)
image_embeddings = F.relu(self.film_pool(image_embeddings))
image_embeddings = image_embeddings.view(rollouts_len, nsamplers, -1)
if self.use_instr and self.lang_model == "attgru":
raise NotImplementedError("Currently attgru is not implemented.")
memory = None
if self.use_memory:
assert recurrent_hidden_states.shape[0] == 1
hidden = (
recurrent_hidden_states[:, :, : self.semi_memory_size],
recurrent_hidden_states[:, :, self.semi_memory_size :],
)
embeddings_list = []
for ind0, ind1 in zip(blocking_inds[:-1], blocking_inds[1:]):
hidden = (hidden[0] * masks[ind0], hidden[1] * masks[ind0])
rnn_out, hidden = self.memory_rnn(image_embeddings[ind0:ind1], hidden)
embeddings_list.append(rnn_out)
# embedding = hidden[0]
embedding = torch.cat(embeddings_list, dim=0)
memory = torch.cat(hidden, dim=-1)
else:
embedding = image_embeddings
if self.use_instr and not "filmcnn" in self.arch:
embedding = torch.cat((embedding, instr_embeddings), dim=-1)
if hasattr(self, "aux_info") and self.aux_info:
extra_predictions = {
info: self.extra_heads[info](embedding) for info in self.extra_heads
}
else:
extra_predictions = dict()
embedding = embedding.view(rollouts_len * nsamplers, -1)
ac_output = ActorCriticOutput(
distributions=CategoricalDistr(
logits=self.actor(embedding),
),
values=self.critic(embedding),
extras=(
extra_predictions
if not self.include_auxiliary_head
else {
**extra_predictions,
"auxiliary_distributions": CategoricalDistr(
logits=self.aux(embedding)
),
}
),
)
hidden_states = memory
return self.adapt_result(
ac_output,
hidden_states,
num_steps,
num_samplers,
num_agents,
num_layers,
observations,
)
@staticmethod
def adapt_inputs( # type: ignore
observations: ObservationType,
recurrent_hidden_states: torch.FloatTensor,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
):
# INPUTS
# observations are of shape [num_steps, num_samplers, ...]
# recurrent_hidden_states are of shape [num_layers, num_samplers, (num_agents,) num_dims]
# prev_actions are of shape [num_steps, num_samplers, ...]
# masks are of shape [num_steps, num_samplers, 1]
# num_agents is assumed to be 1
num_steps, num_samplers = masks.shape[:2]
num_layers = recurrent_hidden_states.shape[0]
num_agents = 1
# Flatten all observation batch dims
def recursively_adapt_observations(obs):
for entry in obs:
if isinstance(obs[entry], Dict):
recursively_adapt_observations(obs[entry])
else:
assert isinstance(obs[entry], torch.Tensor)
if entry in ["minigrid_ego_image", "minigrid_mission"]:
final_dims = obs[entry].shape[2:]
obs[entry] = obs[entry].view(
num_steps * num_samplers, *final_dims
)
# Old-style inputs need to be
# observations [num_steps * num_samplers, ...]
# recurrent_hidden_states [num_layers, num_samplers (* num_agents), num_dims]
# prev_actions [num_steps * num_samplers, -1]
# masks [num_steps * num_samplers, 1]
recursively_adapt_observations(observations)
recurrent_hidden_states = cast(
torch.FloatTensor,
recurrent_hidden_states.view(num_layers, num_samplers * num_agents, -1),
)
if prev_actions is not None:
prev_actions = prev_actions.view( # type:ignore
num_steps * num_samplers, -1
)
masks = masks.view(num_steps * num_samplers, 1) # type:ignore
return (
observations,
recurrent_hidden_states,
prev_actions,
masks,
num_steps,
num_samplers,
num_agents,
num_layers,
)
@staticmethod
def adapt_result(ac_output, hidden_states, num_steps, num_samplers, num_agents, num_layers, observations): # type: ignore
distributions = CategoricalDistr(
logits=ac_output.distributions.logits.view(num_steps, num_samplers, -1),
)
values = ac_output.values.view(num_steps, num_samplers, num_agents)
extras = ac_output.extras # ignore shape
# TODO confirm the shape of the auxiliary distribution is the same as the actor's
if "auxiliary_distributions" in extras:
extras["auxiliary_distributions"] = CategoricalDistr(
logits=extras["auxiliary_distributions"].logits.view(
num_steps, num_samplers, -1 # assume single-agent
),
)
hidden_states = hidden_states.view(num_layers, num_samplers * num_agents, -1)
# Unflatten all observation batch dims
def recursively_adapt_observations(obs):
for entry in obs:
if isinstance(obs[entry], Dict):
recursively_adapt_observations(obs[entry])
else:
assert isinstance(obs[entry], torch.Tensor)
if entry in ["minigrid_ego_image", "minigrid_mission"]:
final_dims = obs[entry].shape[
1:
] # assumes no agents dim in observations!
obs[entry] = obs[entry].view(
num_steps, num_samplers * num_agents, *final_dims
)
recursively_adapt_observations(observations)
return (
ActorCriticOutput(
distributions=distributions, values=values, extras=extras
),
hidden_states,
)
class BabyAIRecurrentACModel(ActorCriticModel[CategoricalDistr]):
def __init__(
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
image_dim=128,
memory_dim=128,
instr_dim=128,
use_instr=False,
lang_model="gru",
use_memory=False,
arch="cnn1",
aux_info=None,
include_auxiliary_head: bool = False,
):
super().__init__(action_space=action_space, observation_space=observation_space)
assert "minigrid_ego_image" in observation_space.spaces
assert not use_instr or "minigrid_mission" in observation_space.spaces
self.memory_dim = memory_dim
self.include_auxiliary_head = include_auxiliary_head
self.baby_ai_model = BabyAIACModelWrapped(
obs_space={
"image": 7 * 7 * 3,
"instr": 100,
},
action_space=action_space,
image_dim=image_dim,
memory_dim=memory_dim,
instr_dim=instr_dim,
use_instr=use_instr,
lang_model=lang_model,
use_memory=use_memory,
arch=arch,
aux_info=aux_info,
include_auxiliary_head=self.include_auxiliary_head,
)
self.memory_key = "rnn"
@property
def recurrent_hidden_state_size(self) -> int:
return 2 * self.memory_dim
@property
def num_recurrent_layers(self):
return 1
def _recurrent_memory_specification(self):
return {
self.memory_key: (
(
("layer", self.num_recurrent_layers),
("sampler", None),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
}
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
out, recurrent_hidden_states = self.baby_ai_model.forward(
observations=observations,
recurrent_hidden_states=cast(
torch.FloatTensor, memory.tensor(self.memory_key)
),
prev_actions=prev_actions,
masks=masks,
)
return out, memory.set_tensor(self.memory_key, recurrent_hidden_states)
================================================
FILE: allenact_plugins/babyai_plugin/babyai_tasks.py
================================================
import random
import signal
from typing import Tuple, Any, List, Dict, Optional, Union, Callable
import babyai
import babyai.bot
import gym
import numpy as np
from gym.utils import seeding
from gym_minigrid.minigrid import MiniGridEnv
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.base_abstractions.task import Task, TaskSampler
from allenact.utils.system import get_logger
class BabyAITask(Task[MiniGridEnv]):
def __init__(
self,
env: MiniGridEnv,
sensors: Union[SensorSuite, List[Sensor]],
task_info: Dict[str, Any],
expert_view_size: int = 7,
expert_can_see_through_walls: bool = False,
**kwargs,
):
super().__init__(
env=env,
sensors=sensors,
task_info=task_info,
max_steps=env.max_steps,
**kwargs,
)
self._was_successful: bool = False
self.bot: Optional[babyai.bot.Bot] = None
self._bot_died = False
self.expert_view_size = expert_view_size
self.expert_can_see_through_walls = expert_can_see_through_walls
self._last_action: Optional[int] = None
env.max_steps = env.max_steps + 1
@property
def action_space(self) -> gym.spaces.Discrete:
return self.env.action_space
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
return self.env.render(mode=mode)
def _step(self, action: int) -> RLStepResult:
assert isinstance(action, int)
minigrid_obs, reward, done, info = self.env.step(action=action)
self._last_action = action
self._was_successful = done and reward > 0
return RLStepResult(
observation=self.get_observations(minigrid_output_obs=minigrid_obs),
reward=reward,
done=self.is_done(),
info=info,
)
def get_observations(
self, *args, minigrid_output_obs: Optional[Dict[str, Any]] = None, **kwargs
) -> Any:
return self.sensor_suite.get_observations(
env=self.env, task=self, minigrid_output_obs=minigrid_output_obs
)
def reached_terminal_state(self) -> bool:
return self._was_successful
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return tuple(
x
for x, _ in sorted(
[(str(a), a.value) for a in MiniGridEnv.Actions], key=lambda x: x[1]
)
)
def close(self) -> None:
pass
def _expert_timeout_hander(self, signum, frame):
raise TimeoutError
def query_expert(self, **kwargs) -> Tuple[Any, bool]:
see_through_walls = self.env.see_through_walls
agent_view_size = self.env.agent_view_size
if self._bot_died:
return 0, False
try:
self.env.agent_view_size = self.expert_view_size
self.env.expert_can_see_through_walls = self.expert_can_see_through_walls
if self.bot is None:
self.bot = babyai.bot.Bot(self.env)
signal.signal(signal.SIGALRM, self._expert_timeout_hander)
signal.alarm(kwargs.get("timeout", 4 if self.num_steps_taken() == 0 else 2))
return self.bot.replan(self._last_action), True
except TimeoutError as _:
self._bot_died = True
return 0, False
finally:
signal.alarm(0)
self.env.see_through_walls = see_through_walls
self.env.agent_view_size = agent_view_size
def metrics(self) -> Dict[str, Any]:
metrics = {
**super(BabyAITask, self).metrics(),
"success": 1.0 * (self.reached_terminal_state()),
}
return metrics
class BabyAITaskSampler(TaskSampler):
def __init__(
self,
env_builder: Union[str, Callable[..., MiniGridEnv]],
sensors: Union[SensorSuite, List[Sensor]],
max_tasks: Optional[int] = None,
num_unique_seeds: Optional[int] = None,
task_seeds_list: Optional[List[int]] = None,
deterministic_sampling: bool = False,
extra_task_kwargs: Optional[Dict] = None,
**kwargs,
):
super(BabyAITaskSampler, self).__init__()
self.sensors = (
SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors
)
self.max_tasks = max_tasks
self.num_unique_seeds = num_unique_seeds
self.deterministic_sampling = deterministic_sampling
self.extra_task_kwargs = (
extra_task_kwargs if extra_task_kwargs is not None else {}
)
self._last_env_seed: Optional[int] = None
self._last_task: Optional[BabyAITask] = None
assert (self.num_unique_seeds is None) or (
0 < self.num_unique_seeds
), "`num_unique_seeds` must be a positive integer."
self.num_unique_seeds = num_unique_seeds
self.task_seeds_list = task_seeds_list
if self.task_seeds_list is not None:
if self.num_unique_seeds is not None:
assert self.num_unique_seeds == len(
self.task_seeds_list
), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
self.num_unique_seeds = len(self.task_seeds_list)
elif self.num_unique_seeds is not None:
self.task_seeds_list = list(range(self.num_unique_seeds))
if (not deterministic_sampling) and self.max_tasks:
get_logger().warning(
"`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
" this might be a mistake when running testing."
)
if isinstance(env_builder, str):
self.env = gym.make(env_builder)
else:
self.env = env_builder()
self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1))
self.num_tasks_generated = 0
@property
def length(self) -> Union[int, float]:
return (
float("inf")
if self.max_tasks is None
else self.max_tasks - self.num_tasks_generated
)
@property
def total_unique(self) -> Optional[Union[int, float]]:
return None if self.num_unique_seeds is None else self.num_unique_seeds
@property
def last_sampled_task(self) -> Optional[Task]:
raise NotImplementedError
def next_task(self, force_advance_scene: bool = False) -> Optional[BabyAITask]:
if self.length <= 0:
return None
if self.num_unique_seeds is not None:
if self.deterministic_sampling:
self._last_env_seed = self.task_seeds_list[
self.num_tasks_generated % len(self.task_seeds_list)
]
else:
self._last_env_seed = self.np_seeded_random_gen.choice(
self.task_seeds_list
)
else:
self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)
self.env.seed(self._last_env_seed)
self.env.saved_seed = self._last_env_seed
self.env.reset()
self.num_tasks_generated += 1
self._last_task = BabyAITask(env=self.env, sensors=self.sensors, task_info={})
return self._last_task
def close(self) -> None:
self.env.close()
@property
def all_observation_spaces_equal(self) -> bool:
return True
def reset(self) -> None:
self.num_tasks_generated = 0
self.env.reset()
def set_seed(self, seed: int) -> None:
self.np_seeded_random_gen, _ = seeding.np_random(seed)
================================================
FILE: allenact_plugins/babyai_plugin/configs/__init__.py
================================================
================================================
FILE: allenact_plugins/babyai_plugin/data/__init__.py
================================================
================================================
FILE: allenact_plugins/babyai_plugin/extra_environment.yml
================================================
dependencies:
- networkx
- pip
- pip:
- "--editable=git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai"
================================================
FILE: allenact_plugins/babyai_plugin/extra_requirements.txt
================================================
babyai @ git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd
networkx
================================================
FILE: allenact_plugins/babyai_plugin/scripts/__init__.py
================================================
================================================
FILE: allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py
================================================
import argparse
import os
import platform
from allenact_plugins.babyai_plugin.babyai_constants import (
BABYAI_EXPERT_TRAJECTORIES_DIR,
)
LEVEL_TO_TRAIN_VALID_IDS = {
"BossLevel": (
"1DkVVpIEVtpyo1LxOXQL_bVyjFCTO3cHD",
"1ccEFA_n5RT4SWD0Wa_qO65z2HACJBace",
),
"GoToObjMaze": (
"1P1CuMbGDJtZit1f-8hmd-HwweXZMj77T",
"1MVlVsIpJUZ0vjrYGXY6Ku4m4vBxtWjRZ",
),
"GoTo": ("1ABR1q-TClgjSlbhVdVJjzOBpTmTtlTN1", "13DlEx5woi31MIs_dzyLxfi7dPe1g59l2"),
"GoToLocal": (
"1U8YWdd3viN2lxOP5BByNUZRPVDKVvDAN",
"1Esy-J0t8eJUg6_RT8F4kkegHYDWwqmSl",
),
}
def get_args():
"""Creates the argument parser and parses input arguments."""
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(
description="download_babyai_expert_demos",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"dataset",
nargs="?",
default="all",
help="dataset name (one of {}, or all)".format(
", ".join(LEVEL_TO_TRAIN_VALID_IDS.keys())
),
)
return parser.parse_args()
if __name__ == "__main__":
args = get_args()
if platform.system() == "Linux":
download_template = """wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id={}" -O {}"""
elif platform.system() == "Darwin":
download_template = """wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={}' -O- | gsed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id={}" -O {}"""
else:
raise NotImplementedError("{} is not supported".format(platform.system()))
try:
os.makedirs(BABYAI_EXPERT_TRAJECTORIES_DIR, exist_ok=True)
if args.dataset == "all":
id_items = LEVEL_TO_TRAIN_VALID_IDS
else:
assert (
args.dataset in LEVEL_TO_TRAIN_VALID_IDS
), "Only {} are valid datasets".format(
", ".join(LEVEL_TO_TRAIN_VALID_IDS.keys())
)
id_items = {args.dataset: LEVEL_TO_TRAIN_VALID_IDS[args.dataset]}
for level_name, (train_id, valid_id) in id_items.items():
train_path = os.path.join(
BABYAI_EXPERT_TRAJECTORIES_DIR, "BabyAI-{}-v0.pkl".format(level_name)
)
if os.path.exists(train_path):
print("{} already exists, skipping...".format(train_path))
else:
os.system(download_template.format(train_id, train_id, train_path))
print("Demos saved to {}.".format(train_path))
valid_path = os.path.join(
BABYAI_EXPERT_TRAJECTORIES_DIR,
"BabyAI-{}-v0_valid.pkl".format(level_name),
)
if os.path.exists(valid_path):
print("{} already exists, skipping...".format(valid_path))
else:
os.system(download_template.format(valid_id, valid_id, valid_path))
print("Demos saved to {}.".format(valid_path))
except Exception as _:
raise Exception(
"Failed to download babyai demos. Make sure you have the appropriate command line"
" tools installed for your platform. For MacOS you'll need to install `gsed` and `gwget (the gnu version"
" of sed) using homebrew or some other method."
)
================================================
FILE: allenact_plugins/babyai_plugin/scripts/get_instr_length_percentiles.py
================================================
import glob
import os
import babyai
import numpy as np
from allenact_plugins.babyai_plugin.babyai_constants import (
BABYAI_EXPERT_TRAJECTORIES_DIR,
)
# Boss level
# [(50, 11.0), (90, 22.0), (99, 32.0), (99.9, 38.0), (99.99, 43.0)]
if __name__ == "__main__":
# level = "BossLevel"
level = "GoToLocal"
files = glob.glob(
os.path.join(BABYAI_EXPERT_TRAJECTORIES_DIR, "*{}-v0.pkl".format(level))
)
assert len(files) == 1
demos = babyai.utils.load_demos(files[0])
percentiles = [50, 90, 99, 99.9, 99.99, 100]
print(
list(
zip(
percentiles,
np.percentile([len(d[0].split(" ")) for d in demos], percentiles),
)
)
)
================================================
FILE: allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py
================================================
import glob
import os
import babyai
from allenact_plugins.babyai_plugin.babyai_constants import (
BABYAI_EXPERT_TRAJECTORIES_DIR,
)
def make_small_demos(dir: str):
for file_path in glob.glob(os.path.join(dir, "*.pkl")):
if "valid" not in file_path and "small" not in file_path:
new_file_path = file_path.replace(".pkl", "-small.pkl")
if os.path.exists(new_file_path):
continue
print(
"Saving small version of {} to {}...".format(
os.path.basename(file_path), new_file_path
)
)
babyai.utils.save_demos(
babyai.utils.load_demos(file_path)[:1000], new_file_path
)
print("Done.")
if __name__ == "__main__":
make_small_demos(BABYAI_EXPERT_TRAJECTORIES_DIR)
================================================
FILE: allenact_plugins/clip_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker
with ImportChecker(
"Cannot `import clip`. Please install clip from the openai/CLIP git repository:"
"\n`pip install git+https://github.com/openai/CLIP.git@b46f5ac7587d2e1862f8b7b1573179d80dcdd620`"
):
# noinspection PyUnresolvedReferences
import clip
================================================
FILE: allenact_plugins/clip_plugin/clip_preprocessors.py
================================================
from typing import List, Optional, Any, cast, Dict, Tuple
import clip
import gym
import numpy as np
import torch
import torch.nn as nn
from clip.model import CLIP
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.misc_utils import prepare_locals_for_super
class ClipResNetEmbedder(nn.Module):
def __init__(self, resnet: CLIP, pool=True, pooling_type="avg"):
super().__init__()
self.model = resnet
self.pool = pool
self.pooling_type = pooling_type
if not pool:
self.model.visual.attnpool = nn.Identity()
elif self.pooling_type == "attn":
pass
elif self.pooling_type == "avg":
self.model.visual.attnpool = nn.Sequential(
nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(start_dim=-3, end_dim=-1)
)
else:
raise NotImplementedError("`pooling_type` must be 'avg' or 'attn'.")
self.eval()
def forward(self, x):
with torch.no_grad():
return self.model.visual(x)
class ClipResNetPreprocessor(Preprocessor):
"""Preprocess RGB or depth image using a ResNet model with CLIP model
weights."""
CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073)
CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711)
def __init__(
self,
rgb_input_uuid: str,
clip_model_type: str,
pool: bool,
device: Optional[torch.device] = None,
device_ids: Optional[List[torch.device]] = None,
input_img_height_width: Tuple[int, int] = (224, 224),
chunk_size: Optional[int] = None,
**kwargs: Any,
):
assert clip_model_type in clip.available_models()
assert pool == False or input_img_height_width == (224, 224)
assert all(iis % 32 == 0 for iis in input_img_height_width)
output_height_width = tuple(iis // 32 for iis in input_img_height_width)
if clip_model_type == "RN50":
output_shape = (2048,) + output_height_width
elif clip_model_type == "RN50x16":
output_shape = (3072,) + output_height_width
else:
raise NotImplementedError(
f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'"
)
if pool:
output_shape = output_shape[:1]
self.clip_model_type = clip_model_type
self.pool = pool
self.device = torch.device("cpu") if device is None else device
self.device_ids = device_ids or cast(
List[torch.device], list(range(torch.cuda.device_count()))
)
self._resnet: Optional[ClipResNetEmbedder] = None
self.chunk_size = chunk_size
low = -np.inf
high = np.inf
shape = output_shape
input_uuids = [rgb_input_uuid]
assert (
len(input_uuids) == 1
), "resnet preprocessor can only consume one observation type"
observation_space = gym.spaces.Box(low=low, high=high, shape=shape)
super().__init__(**prepare_locals_for_super(locals()))
@property
def resnet(self) -> ClipResNetEmbedder:
if self._resnet is None:
self._resnet = ClipResNetEmbedder(
clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool
).to(self.device)
for module in self._resnet.modules():
if "BatchNorm" in type(module).__name__:
module.momentum = 0.0
self._resnet.eval()
return self._resnet
def to(self, device: torch.device) -> "ClipResNetPreprocessor":
self._resnet = self.resnet.to(device)
self.device = device
return self
def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw
# If the input is depth, repeat it across all 3 channels
if x.shape[1] == 1:
x = x.repeat(1, 3, 1, 1)
n = x.shape[0]
if self.chunk_size is not None and x.shape[0] > self.chunk_size:
processed_chunks = []
for idx in range(0, n, self.chunk_size):
processed_chunks.append(
self.resnet(x[idx : min(idx + self.chunk_size, n)]).float()
)
x = torch.cat(processed_chunks, dim=0)
else:
x = self.resnet(x).float()
return x
class ClipViTEmbedder(nn.Module):
def __init__(self, model: CLIP, class_emb_only: bool = False):
super().__init__()
self.model = model
self.model.visual.transformer.resblocks = nn.Sequential(
*list(self.model.visual.transformer.resblocks)[:-1]
)
self.class_emb_only = class_emb_only
self.eval()
def forward(self, x):
m = self.model.visual
with torch.no_grad():
x = m.conv1(x) # shape = [*, width, grid, grid]
x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2]
x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width]
x = torch.cat(
[
m.class_embedding.to(x.dtype)
+ torch.zeros(
x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device
),
x,
],
dim=1,
) # shape = [*, grid ** 2 + 1, width]
x = x + m.positional_embedding.to(x.dtype)
x = m.ln_pre(x)
x = x.permute(1, 0, 2) # NLD -> LND
x = m.transformer(x)
x = x.permute(1, 0, 2) # LND -> NLD
if self.class_emb_only:
return x[:, 0, :]
else:
return x
class ClipViTPreprocessor(Preprocessor):
"""Preprocess RGB or depth image using a ResNet model with CLIP model
weights."""
CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073)
CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711)
def __init__(
self,
rgb_input_uuid: str,
clip_model_type: str,
class_emb_only: bool,
device: Optional[torch.device] = None,
device_ids: Optional[List[torch.device]] = None,
**kwargs: Any,
):
assert clip_model_type in clip.available_models()
if clip_model_type == "ViT-B/32":
output_shape = (7 * 7 + 1, 768)
elif clip_model_type == "ViT-B/16":
output_shape = (14 * 14 + 1, 768)
elif clip_model_type == "ViT-L/14":
output_shape = (16 * 16 + 1, 1024)
else:
raise NotImplementedError(
f"Currently `clip_model_type` must be one of 'ViT-B/32', 'ViT-B/16', or 'ViT-B/14'"
)
if class_emb_only:
output_shape = output_shape[1:]
self.clip_model_type = clip_model_type
self.class_emb_only = class_emb_only
self.device = torch.device("cpu") if device is None else device
self.device_ids = device_ids or cast(
List[torch.device], list(range(torch.cuda.device_count()))
)
self._vit: Optional[ClipViTEmbedder] = None
low = -np.inf
high = np.inf
shape = output_shape
input_uuids = [rgb_input_uuid]
assert (
len(input_uuids) == 1
), "resnet preprocessor can only consume one observation type"
observation_space = gym.spaces.Box(low=low, high=high, shape=shape)
super().__init__(**prepare_locals_for_super(locals()))
@property
def vit(self) -> ClipViTEmbedder:
if self._vit is None:
self._vit = ClipViTEmbedder(
model=clip.load(self.clip_model_type, device=self.device)[0],
class_emb_only=self.class_emb_only,
).to(self.device)
for module in self._vit.modules():
if "BatchNorm" in type(module).__name__:
module.momentum = 0.0
self._vit.eval()
return self._vit
def to(self, device: torch.device) -> "ClipViTPreprocessor":
self._vit = self.vit.to(device)
self.device = device
return self
def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw
# If the input is depth, repeat it across all 3 channels
if x.shape[1] == 1:
x = x.repeat(1, 3, 1, 1)
x = self.vit(x).float()
return x
================================================
FILE: allenact_plugins/clip_plugin/extra_environment.yml
================================================
channels:
- pytorch
- defaults
- conda-forge
dependencies:
- pytorch>=1.7.1
- torchvision
- pip:
- ftfy
- regex
- tqdm
- "--editable=git+https://github.com/openai/CLIP.git@e184f608c5d5e58165682f7c332c3a8b4c1545f2#egg=clip"
================================================
FILE: allenact_plugins/clip_plugin/extra_requirements.txt
================================================
torch>=1.7.1
torchvision
ftfy
regex
tqdm
clip @ git+https://github.com/openai/clip@e184f608c5d5e58165682f7c332c3a8b4c1545f2#egg=clip
================================================
FILE: allenact_plugins/gym_plugin/__init__.py
================================================
================================================
FILE: allenact_plugins/gym_plugin/extra_environment.yml
================================================
channels:
- defaults
- conda-forge
dependencies:
- gym-box2d>=0.17.0,<0.20.0
================================================
FILE: allenact_plugins/gym_plugin/extra_requirements.txt
================================================
gym[box2d]>=0.17.0,<0.20.0
================================================
FILE: allenact_plugins/gym_plugin/gym_distributions.py
================================================
import torch
from allenact.base_abstractions.distributions import Distr
class GaussianDistr(torch.distributions.Normal, Distr):
"""PyTorch's Normal distribution with a `mode` method."""
def mode(self) -> torch.FloatTensor:
return super().mean
================================================
FILE: allenact_plugins/gym_plugin/gym_environment.py
================================================
from typing import Optional
import gym
import numpy as np
class GymEnvironment(gym.Wrapper):
"""gym.Wrapper with minimal bookkeeping (initial observation)."""
def __init__(self, gym_env_name: str):
super().__init__(gym.make(gym_env_name))
self._initial_observation: Optional[np.ndarray] = None
self.reset() # generate initial observation
def reset(self) -> np.ndarray:
self._initial_observation = self.env.reset()
return self._initial_observation
@property
def initial_observation(self) -> np.ndarray:
assert (
self._initial_observation is not None
), "Attempted to read initial_observation without calling reset()"
res = self._initial_observation
self._initial_observation = None
return res
================================================
FILE: allenact_plugins/gym_plugin/gym_models.py
================================================
from typing import Dict, Union, Optional, Tuple, Any, Sequence, cast
import gym
import torch
import torch.nn as nn
from allenact.algorithms.onpolicy_sync.policy import (
ActorCriticModel,
DistributionType,
)
from allenact.base_abstractions.misc import ActorCriticOutput, Memory
from allenact_plugins.gym_plugin.gym_distributions import GaussianDistr
class MemorylessActorCritic(ActorCriticModel[GaussianDistr]):
"""ActorCriticModel for gym tasks with continuous control in the range [-1,
1]."""
def __init__(
self,
input_uuid: str,
action_space: gym.spaces.Box,
observation_space: gym.spaces.Dict,
action_std: float = 0.5,
mlp_hidden_dims: Sequence[int] = (64, 32),
):
super().__init__(action_space, observation_space)
self.input_uuid = input_uuid
assert len(observation_space[self.input_uuid].shape) == 1
state_dim = observation_space[self.input_uuid].shape[0]
assert len(action_space.shape) == 1
action_dim = action_space.shape[0]
mlp_hidden_dims = (state_dim,) + tuple(mlp_hidden_dims)
# action mean range -1 to 1
self.actor = nn.Sequential(
*self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims),
nn.Linear(32, action_dim),
nn.Tanh(),
)
# critic
self.critic = nn.Sequential(
*self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims),
nn.Linear(32, 1),
)
# maximum standard deviation
self.register_buffer(
"action_std",
torch.tensor([action_std] * action_dim).view(1, 1, -1),
persistent=False,
)
@staticmethod
def make_mlp_hidden(nl, *dims):
res = []
for it, dim in enumerate(dims[:-1]):
res.append(
nn.Linear(dim, dims[it + 1]),
)
res.append(nl())
return res
def _recurrent_memory_specification(self):
return None
def forward( # type:ignore
self,
observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]],
memory: Memory,
prev_actions: Any,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
means = self.actor(observations[self.input_uuid])
values = self.critic(observations[self.input_uuid])
return (
ActorCriticOutput(
cast(DistributionType, GaussianDistr(loc=means, scale=self.action_std)),
values,
{},
),
None, # no Memory
)
================================================
FILE: allenact_plugins/gym_plugin/gym_sensors.py
================================================
from typing import Optional, Any
import gym
import numpy as np
from allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super
from allenact.base_abstractions.task import Task, SubTaskType
from allenact_plugins.gym_plugin.gym_environment import GymEnvironment
class GymBox2DSensor(Sensor[gym.Env, Task[gym.Env]]):
"""Wrapper for gym Box2D tasks' observations."""
def __init__(
self,
gym_env_name: str = "LunarLanderContinuous-v2",
uuid: str = "gym_box2d_sensor",
**kwargs: Any
):
self.gym_env_name = gym_env_name
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self) -> gym.Space:
if self.gym_env_name in ["LunarLanderContinuous-v2", "LunarLander-v2"]:
return gym.spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
elif self.gym_env_name in ["BipedalWalker-v2", "BipedalWalkerHardcore-v2"]:
high = np.array([np.inf] * 24)
return gym.spaces.Box(-high, high, dtype=np.float32)
elif self.gym_env_name == "CarRacing-v0":
state_w, state_h = 96, 96
return gym.spaces.Box(
low=0, high=255, shape=(state_h, state_w, 3), dtype=np.uint8
)
raise NotImplementedError()
def get_observation(
self,
env: GymEnvironment,
task: Optional[SubTaskType],
*args,
gym_obs: Optional[np.ndarray] = None,
**kwargs: Any
) -> np.ndarray:
if gym_obs is not None:
return gym_obs
else:
return env.initial_observation
class GymMuJoCoSensor(Sensor[gym.Env, Task[gym.Env]]):
"""Wrapper for gym MuJoCo and Robotics tasks observations."""
def __init__(self, gym_env_name: str, uuid: str, **kwargs: Any):
self.gym_env_name = gym_env_name
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self) -> gym.Space:
# observation space for gym MoJoCo
if self.gym_env_name == "InvertedPendulum-v2":
return gym.spaces.Box(-np.inf, np.inf, shape=(4,), dtype="float32")
elif self.gym_env_name == "Ant-v2":
return gym.spaces.Box(-np.inf, np.inf, shape=(111,), dtype="float32")
elif self.gym_env_name in ["Reacher-v2", "Hopper-v2"]:
return gym.spaces.Box(-np.inf, np.inf, shape=(11,), dtype="float32")
elif self.gym_env_name == "InvertedDoublePendulum-v2":
return gym.spaces.Box(-np.inf, np.inf, (11,), "float32")
elif self.gym_env_name in ["HumanoidStandup-v2", "Humanoid-v2"]:
return gym.spaces.Box(-np.inf, np.inf, (376,), "float32")
elif self.gym_env_name in ["HalfCheetah-v2", "Walker2d-v2"]:
return gym.spaces.Box(-np.inf, np.inf, (17,), "float32")
elif self.gym_env_name == "Swimmer-v2":
return gym.spaces.Box(-np.inf, np.inf, (8,), "float32")
# TODO observation space for gym Robotics
elif self.gym_env_name == "HandManipulateBlock-v0":
return gym.spaces.Dict(
dict(
desired_goal=gym.spaces.Box(
-np.inf, np.inf, shape=(7,), dtype="float32"
),
achieved_goal=gym.spaces.Box(
-np.inf, np.inf, shape=(7,), dtype="float32"
),
observation=gym.spaces.Box(
-np.inf, np.inf, shape=(61,), dtype="float32"
),
)
)
else:
raise NotImplementedError
def get_observation(
self,
env: GymEnvironment,
task: Optional[SubTaskType],
*args,
gym_obs: Optional[np.ndarray] = None,
**kwargs: Any
) -> np.ndarray:
if gym_obs is not None:
return np.array(gym_obs, dtype=np.float32) # coerce to be float32
else:
return np.array(env.initial_observation, dtype=np.float32)
================================================
FILE: allenact_plugins/gym_plugin/gym_tasks.py
================================================
import random
from typing import Any, List, Dict, Optional, Union, Callable, Sequence, Tuple
import gym
import numpy as np
from gym.utils import seeding
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.base_abstractions.task import Task, TaskSampler
from allenact.utils.experiment_utils import set_seed
from allenact.utils.system import get_logger
from allenact_plugins.gym_plugin.gym_environment import GymEnvironment
from allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor, GymMuJoCoSensor
class GymTask(Task[gym.Env]):
"""Abstract gym task.
Subclasses need to implement `class_action_names` and `_step`.
"""
def __init__(
self,
env: GymEnvironment,
sensors: Union[SensorSuite, List[Sensor]],
task_info: Dict[str, Any],
**kwargs,
):
max_steps = env.spec.max_episode_steps
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self._gym_done = False
self.task_name: str = self.env.spec.id
@property
def action_space(self) -> gym.spaces.Space:
return self.env.action_space
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
if mode == "rgb":
mode = "rgb_array"
return self.env.render(mode=mode)
def get_observations(
self, *args, gym_obs: Optional[Dict[str, Any]] = None, **kwargs
) -> Any:
return self.sensor_suite.get_observations(
env=self.env, task=self, gym_obs=gym_obs
)
def reached_terminal_state(self) -> bool:
return self._gym_done
def close(self) -> None:
pass
def metrics(self) -> Dict[str, Any]:
# noinspection PyUnresolvedReferences,PyCallingNonCallable
env_metrics = self.env.metrics() if hasattr(self.env, "metrics") else {}
return {
**super().metrics(),
**{k: float(v) for k, v in env_metrics.items()},
"success": int(
self.env.was_successful
if hasattr(self.env, "was_successful")
else self.cumulative_reward > 0
),
}
class GymContinuousTask(GymTask):
"""Task for a continuous-control gym Box2D & MuJoCo Env; it allows
interfacing allenact with gym tasks."""
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return tuple()
def _step(self, action: Sequence[float]) -> RLStepResult:
action = np.array(action)
gym_obs, reward, self._gym_done, info = self.env.step(action=action)
return RLStepResult(
observation=self.get_observations(gym_obs=gym_obs),
reward=reward,
done=self.is_done(),
info=info,
)
def default_task_selector(env_name: str) -> type:
"""Helper function for `GymTaskSampler`."""
if env_name in [
# Box2d Env
"CarRacing-v0",
"LunarLanderContinuous-v2",
"BipedalWalker-v2",
"BipedalWalkerHardcore-v2",
# MuJoCo Env
"InvertedPendulum-v2",
"Ant-v2",
"InvertedDoublePendulum-v2",
"Humanoid-v2",
"Reacher-v2",
"Hopper-v2",
"HalfCheetah-v2",
"Swimmer-v2",
"Walker2d-v2",
]:
return GymContinuousTask
raise NotImplementedError()
def sensor_selector(env_name: str) -> Sensor:
"""Helper function for `GymTaskSampler`."""
if env_name in [
"CarRacing-v0",
"LunarLanderContinuous-v2",
"BipedalWalker-v2",
"BipedalWalkerHardcore-v2",
"LunarLander-v2",
]:
return GymBox2DSensor(env_name)
elif env_name in [
"InvertedPendulum-v2",
"Ant-v2",
"InvertedDoublePendulum-v2",
"Humanoid-v2",
"Reacher-v2",
"Hopper-v2",
"HalfCheetah-v2",
"Swimmer-v2",
"Walker2d-v2",
]:
return GymMuJoCoSensor(gym_env_name=env_name, uuid="gym_mujoco_data")
raise NotImplementedError()
class GymTaskSampler(TaskSampler):
"""TaskSampler for gym environments."""
def __init__(
self,
gym_env_type: str = "LunarLanderContinuous-v2",
sensors: Optional[Union[SensorSuite, List[Sensor]]] = None,
max_tasks: Optional[int] = None,
num_unique_seeds: Optional[int] = None,
task_seeds_list: Optional[List[int]] = None,
deterministic_sampling: bool = False,
task_selector: Callable[[str], type] = default_task_selector,
repeat_failed_task_for_min_steps: int = 0,
extra_task_kwargs: Optional[Dict] = None,
seed: Optional[int] = None,
**kwargs,
):
super().__init__()
self.gym_env_type = gym_env_type
self.sensors: SensorSuite
if sensors is None:
self.sensors = SensorSuite([sensor_selector(self.gym_env_type)])
else:
self.sensors = (
SensorSuite(sensors)
if not isinstance(sensors, SensorSuite)
else sensors
)
self.max_tasks = max_tasks
self.num_unique_seeds = num_unique_seeds
self.deterministic_sampling = deterministic_sampling
self.repeat_failed_task_for_min_steps = repeat_failed_task_for_min_steps
self.extra_task_kwargs = (
extra_task_kwargs if extra_task_kwargs is not None else {}
)
self._last_env_seed: Optional[int] = None
self._last_task: Optional[GymTask] = None
self._number_of_steps_taken_with_task_seed = 0
assert (not deterministic_sampling) or repeat_failed_task_for_min_steps <= 0, (
"If `deterministic_sampling` is True then we require"
" `repeat_failed_task_for_min_steps <= 0`"
)
assert (self.num_unique_seeds is None) or (
0 < self.num_unique_seeds
), "`num_unique_seeds` must be a positive integer."
self.num_unique_seeds = num_unique_seeds
self.task_seeds_list = task_seeds_list
if self.task_seeds_list is not None:
if self.num_unique_seeds is not None:
assert self.num_unique_seeds == len(
self.task_seeds_list
), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
self.num_unique_seeds = len(self.task_seeds_list)
elif self.num_unique_seeds is not None:
self.task_seeds_list = list(range(self.num_unique_seeds))
if num_unique_seeds is not None and repeat_failed_task_for_min_steps > 0:
raise NotImplementedError(
"`repeat_failed_task_for_min_steps` must be <=0 if number"
" of unique seeds is not None."
)
assert (not deterministic_sampling) or (
self.num_unique_seeds is not None
), "Cannot use deterministic sampling when `num_unique_seeds` is `None`."
if (not deterministic_sampling) and self.max_tasks:
get_logger().warning(
"`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
" this might be a mistake when running testing."
)
if seed is not None:
self.set_seed(seed)
else:
self.np_seeded_random_gen, _ = seeding.np_random(
random.randint(0, 2**31 - 1)
)
self.num_tasks_generated = 0
self.task_type = task_selector(self.gym_env_type)
self.env: GymEnvironment = GymEnvironment(self.gym_env_type)
@property
def length(self) -> Union[int, float]:
return (
float("inf")
if self.max_tasks is None
else self.max_tasks - self.num_tasks_generated
)
@property
def total_unique(self) -> Optional[Union[int, float]]:
return None if self.num_unique_seeds is None else self.num_unique_seeds
@property
def last_sampled_task(self) -> Optional[Task]:
raise NotImplementedError
def next_task(self, force_advance_scene: bool = False) -> Optional[GymTask]:
if self.length <= 0:
return None
repeating = False
if self.num_unique_seeds is not None:
if self.deterministic_sampling:
self._last_env_seed = self.task_seeds_list[
self.num_tasks_generated % len(self.task_seeds_list)
]
else:
self._last_env_seed = self.np_seeded_random_gen.choice(
self.task_seeds_list
)
else:
if self._last_task is not None:
self._number_of_steps_taken_with_task_seed += (
self._last_task.num_steps_taken()
)
if (
self._last_env_seed is not None
and self._number_of_steps_taken_with_task_seed
< self.repeat_failed_task_for_min_steps
and self._last_task.cumulative_reward == 0
):
repeating = True
else:
self._number_of_steps_taken_with_task_seed = 0
self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)
task_has_same_seed_reset = hasattr(self.env, "same_seed_reset")
if repeating and task_has_same_seed_reset:
# noinspection PyUnresolvedReferences
self.env.same_seed_reset()
else:
self.env.seed(self._last_env_seed)
self.env.saved_seed = self._last_env_seed
self.env.reset()
self.num_tasks_generated += 1
task_info = {"id": "random%d" % random.randint(0, 2**63 - 1)}
self._last_task = self.task_type(
**dict(env=self.env, sensors=self.sensors, task_info=task_info),
**self.extra_task_kwargs,
)
return self._last_task
def close(self) -> None:
self.env.close()
@property
def all_observation_spaces_equal(self) -> bool:
return True
def reset(self) -> None:
self.num_tasks_generated = 0
self.env.reset()
def set_seed(self, seed: int) -> None:
self.np_seeded_random_gen, _ = seeding.np_random(seed)
if seed is not None:
set_seed(seed)
================================================
FILE: allenact_plugins/habitat_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker
with ImportChecker(
"\n\nPlease install habitat following\n\n"
"https://allenact.org/installation/installation-framework/#installation-of-habitat\n\n"
):
import habitat
import habitat_sim
================================================
FILE: allenact_plugins/habitat_plugin/data/__init__.py
================================================
================================================
FILE: allenact_plugins/habitat_plugin/extra_environment.yml
================================================
channels:
- defaults
- conda-forge
- aihabitat
dependencies:
- habitat-sim=0.1.5
- numba
- pip
- pip:
- "--editable=git+https://github.com/Lucaweihs/habitat-lab.git@99124c785bd5ca51e321ea20462f71071cd43ae2#egg=habitat"
- numpy-quaternion
- pyquaternion>=0.9.9
================================================
FILE: allenact_plugins/habitat_plugin/extra_environment_headless.yml
================================================
channels:
- defaults
- conda-forge
- aihabitat
dependencies:
- habitat-sim=0.1.5
- headless
- numba
- pip
- pip:
- "--editable=git+https://github.com/Lucaweihs/habitat-lab.git@99124c785bd5ca51e321ea20462f71071cd43ae2#egg=habitat"
- numpy-quaternion
- pyquaternion>=0.9.9
================================================
FILE: allenact_plugins/habitat_plugin/extra_requirements.txt
================================================
habitat @ git+https://github.com/facebookresearch/habitat-lab.git@33654923dc733f5fcea23aea6391034c3f694a67
numpy-quaternion
pyquaternion>=0.9.9
numba
================================================
FILE: allenact_plugins/habitat_plugin/habitat_constants.py
================================================
import os
HABITAT_BASE = os.getenv(
"HABITAT_BASE_DIR",
default=os.path.join(os.getcwd(), "external_projects", "habitat-lab"),
)
HABITAT_DATA_BASE = os.path.join(
os.getcwd(),
"data",
)
if (not os.path.exists(HABITAT_BASE)) or (not os.path.exists(HABITAT_DATA_BASE)):
raise ImportError(
"In order to run properly the Habitat environment makes several assumptions about the file structure of"
" the local system. The file structure of the current environment does not seem to respect this required"
" file structure. Please see https://allenact.org/installation/installation-framework/#installation-of-habitat"
" for details as to how to set up your local environment to make it possible to use the habitat plugin of"
" AllenAct."
)
HABITAT_DATASETS_DIR = os.path.join(HABITAT_DATA_BASE, "datasets")
HABITAT_SCENE_DATASETS_DIR = os.path.join(HABITAT_DATA_BASE, "scene_datasets")
HABITAT_CONFIGS_DIR = os.path.join(HABITAT_BASE, "configs")
TESTED_HABITAT_COMMIT = "33654923dc733f5fcea23aea6391034c3f694a67"
MOVE_AHEAD = "MOVE_FORWARD"
ROTATE_LEFT = "TURN_LEFT"
ROTATE_RIGHT = "TURN_RIGHT"
LOOK_DOWN = "LOOK_DOWN"
LOOK_UP = "LOOK_UP"
END = "STOP"
================================================
FILE: allenact_plugins/habitat_plugin/habitat_environment.py
================================================
"""A wrapper for interacting with the Habitat environment."""
import os
from typing import Dict, Union, List, Optional
import numpy as np
import habitat
from allenact.utils.cache_utils import DynamicDistanceCache
from allenact.utils.system import get_logger
from habitat.config import Config
from habitat.core.dataset import Dataset
from habitat.core.simulator import Observations, AgentState, ShortestPathPoint
from habitat.tasks.nav.nav import NavigationEpisode as HabitatNavigationEpisode
class HabitatEnvironment:
def __init__(self, config: Config, dataset: Dataset, verbose: bool = False) -> None:
self.env = habitat.Env(config=config, dataset=dataset)
if not verbose:
os.environ["GLOG_minloglevel"] = "2"
os.environ["MAGNUM_LOG"] = "quiet"
# Set the target to a random goal from the provided list for this episode
self.goal_index = 0
self.last_geodesic_distance = None
self.distance_cache = DynamicDistanceCache(rounding=1)
self._current_frame: Optional[np.ndarray] = None
@property
def scene_name(self) -> str:
return self.env.current_episode.scene_id
@property
def current_frame(self) -> np.ndarray:
assert self._current_frame is not None
return self._current_frame
def step(self, action_dict: Dict[str, Union[str, int]]) -> Observations:
obs = self.env.step(action_dict["action"])
self._current_frame = obs
return obs
def get_location(self) -> Optional[np.ndarray]:
return self.env.sim.get_agent_state().position
def get_rotation(self) -> Optional[List[float]]:
return self.env.sim.get_agent_state().rotation
def get_shortest_path(
self,
source_state: AgentState,
target_state: AgentState,
) -> List[ShortestPathPoint]:
return self.env.sim.action_space_shortest_path(source_state, [target_state])
def get_current_episode(self) -> HabitatNavigationEpisode:
return self.env.current_episode # type: ignore
# noinspection PyMethodMayBeStatic
def start(self):
get_logger().debug("No need to start a habitat_plugin env")
def stop(self):
self.env.close()
def reset(self):
self._current_frame = self.env.reset()
@property
def last_action_success(self) -> bool:
# For now we can not have failure of actions
return True
@property
def num_episodes(self) -> int:
ep_iterator = self.env.episode_iterator
assert isinstance(ep_iterator, habitat.core.dataset.EpisodeIterator)
return len(ep_iterator.episodes)
================================================
FILE: allenact_plugins/habitat_plugin/habitat_preprocessors.py
================================================
================================================
FILE: allenact_plugins/habitat_plugin/habitat_sensors.py
================================================
from typing import Any, Optional, Tuple, TYPE_CHECKING
import gym
import numpy as np
from pyquaternion import Quaternion
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment
if TYPE_CHECKING:
from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask, ObjectNavTask # type: ignore
class RGBSensorHabitat(RGBSensor[HabitatEnvironment, Task[HabitatEnvironment]]):
# For backwards compatibility
def __init__(
self,
use_resnet_normalization: bool = False,
mean: Optional[np.ndarray] = np.array(
[[[0.485, 0.456, 0.406]]], dtype=np.float32
),
stdev: Optional[np.ndarray] = np.array(
[[[0.229, 0.224, 0.225]]], dtype=np.float32
),
height: Optional[int] = None,
width: Optional[int] = None,
uuid: str = "rgb",
output_shape: Optional[Tuple[int, ...]] = None,
output_channels: int = 3,
unnormalized_infimum: float = 0.0,
unnormalized_supremum: float = 1.0,
scale_first: bool = True,
**kwargs: Any
):
super().__init__(**prepare_locals_for_super(locals()))
def frame_from_env(
self, env: HabitatEnvironment, task: Optional[Task[HabitatEnvironment]]
) -> np.ndarray:
return env.current_frame["rgb"].copy()
class DepthSensorHabitat(DepthSensor[HabitatEnvironment, Task[HabitatEnvironment]]):
# For backwards compatibility
def __init__(
self,
use_resnet_normalization: Optional[bool] = None,
use_normalization: Optional[bool] = None,
mean: Optional[np.ndarray] = np.array([[0.5]], dtype=np.float32),
stdev: Optional[np.ndarray] = np.array([[0.25]], dtype=np.float32),
height: Optional[int] = None,
width: Optional[int] = None,
uuid: str = "depth",
output_shape: Optional[Tuple[int, ...]] = None,
output_channels: int = 1,
unnormalized_infimum: float = 0.0,
unnormalized_supremum: float = 5.0,
scale_first: bool = False,
**kwargs: Any
):
# Give priority to use_normalization, but use_resnet_normalization for backward compat. if not set
if use_resnet_normalization is not None and use_normalization is None:
use_normalization = use_resnet_normalization
elif use_normalization is None:
use_normalization = False
super().__init__(**prepare_locals_for_super(locals()))
def frame_from_env(
self, env: HabitatEnvironment, task: Optional[Task[HabitatEnvironment]]
) -> np.ndarray:
return env.current_frame["depth"].copy()
class TargetCoordinatesSensorHabitat(Sensor[HabitatEnvironment, "PointNavTask"]):
def __init__(
self, coordinate_dims: int, uuid: str = "target_coordinates_ind", **kwargs: Any
):
self.coordinate_dims = coordinate_dims
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self):
# Distance is a non-negative real and angle is normalized to the range (-Pi, Pi] or [-Pi, Pi)
return gym.spaces.Box(
np.float32(-3.15), np.float32(1000), shape=(self.coordinate_dims,)
)
def get_observation(
self,
env: HabitatEnvironment,
task: Optional["PointNavTask"],
*args: Any,
**kwargs: Any
) -> Any:
frame = env.current_frame
goal = frame["pointgoal_with_gps_compass"]
return goal
class TargetObjectSensorHabitat(Sensor[HabitatEnvironment, "ObjectNavTask"]):
def __init__(self, num_objects: int, uuid: str = "target_object_id", **kwargs: Any):
observation_space = self._get_observation_space(num_objects)
super().__init__(**prepare_locals_for_super(locals()))
@staticmethod
def _get_observation_space(num_objects: int):
return gym.spaces.Discrete(num_objects)
def get_observation(
self,
env: HabitatEnvironment,
task: Optional["ObjectNavTask"],
*args: Any,
**kwargs: Any
) -> Any:
frame = env.current_frame
goal = frame["objectgoal"][0]
return goal
class AgentCoordinatesSensorHabitat(Sensor[HabitatEnvironment, "PointNavTask"]):
def __init__(self, uuid: str = "agent_position_and_rotation", **kwargs: Any):
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
@staticmethod
def _get_observation_space():
return gym.spaces.Box(np.float32(-1000), np.float32(1000), shape=(4,))
@staticmethod
def get_observation(
env: HabitatEnvironment,
task: Optional["PointNavTask"],
*args: Any,
**kwargs: Any
) -> Any:
position = env.env.sim.get_agent_state().position
quaternion = Quaternion(env.env.sim.get_agent_state().rotation.components)
return np.array([position[0], position[1], position[2], quaternion.radians])
================================================
FILE: allenact_plugins/habitat_plugin/habitat_task_samplers.py
================================================
from typing import List, Optional, Union, Callable, Any, Dict, Type
import gym
import habitat
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import Builder
from allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment
from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask, ObjectNavTask # type: ignore
from habitat.config import Config
class PointNavTaskSampler(TaskSampler):
def __init__(
self,
env_config: Config,
sensors: List[Sensor],
max_steps: int,
action_space: gym.Space,
distance_to_goal: float,
filter_dataset_func: Optional[
Callable[[habitat.Dataset], habitat.Dataset]
] = None,
**task_init_kwargs,
) -> None:
self.grid_size = 0.25
self.env: Optional[HabitatEnvironment] = None
self.max_tasks: Optional[int] = None
self.reset_tasks: Optional[int] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_space = action_space
self.env_config = env_config
self.distance_to_goal = distance_to_goal
self.seed: Optional[int] = None
self.filter_dataset_func = filter_dataset_func
self._last_sampled_task: Optional[PointNavTask] = None
self.task_init_kwargs = task_init_kwargs
def _create_environment(self) -> HabitatEnvironment:
dataset = habitat.make_dataset(
self.env_config.DATASET.TYPE, config=self.env_config.DATASET
)
if len(dataset.episodes) == 0:
raise RuntimeError("Empty input dataset.")
if self.filter_dataset_func is not None:
dataset = self.filter_dataset_func(dataset)
if len(dataset.episodes) == 0:
raise RuntimeError("Empty dataset after filtering.")
env = HabitatEnvironment(config=self.env_config, dataset=dataset)
self.max_tasks = None if self.env_config.MODE == "train" else env.num_episodes
self.reset_tasks = self.max_tasks
return env
@property
def length(self) -> Union[int, float]:
"""
@return: Number of total tasks remaining that can be sampled. Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
@property
def total_unique(self) -> Union[int, float, None]:
return self.env.num_episodes
@property
def last_sampled_task(self) -> Optional[PointNavTask]:
return self._last_sampled_task
def close(self) -> None:
if self.env is not None:
self.env.stop()
@property
def all_observation_spaces_equal(self) -> bool:
"""
@return: True if all Tasks that can be sampled by this sampler have the
same observation space. Otherwise False.
"""
return True
def next_task(self, force_advance_scene=False) -> Optional[PointNavTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
return None
if self.env is not None:
self.env.reset()
else:
self.env = self._create_environment()
self.env.reset()
ep_info = self.env.get_current_episode()
assert len(ep_info.goals) == 1
target = ep_info.goals[0].position
task_info = {
"target": target,
"distance_to_goal": self.distance_to_goal,
"episode_id": ep_info.episode_id,
"scene_id": ep_info.scene_id.split("/")[-1],
**ep_info.info,
}
self._last_sampled_task = PointNavTask(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
**self.task_init_kwargs,
)
if self.max_tasks is not None:
self.max_tasks -= 1
return self._last_sampled_task
def reset(self):
self.max_tasks = self.reset_tasks
def set_seed(self, seed: int):
self.seed = seed
if seed is not None:
self.env.env.seed(seed)
class ObjectNavTaskSampler(TaskSampler):
def __init__(
self,
env_config: Config,
sensors: List[Sensor],
max_steps: int,
action_space: gym.Space,
filter_dataset_func: Optional[
Callable[[habitat.Dataset], habitat.Dataset]
] = None,
task_kwargs: Dict[str, Any] = None,
objectnav_task_type: Union[
Type[ObjectNavTask], Builder[ObjectNavTask]
] = ObjectNavTask,
**kwargs,
) -> None:
self.grid_size = 0.25
self.env: Optional[HabitatEnvironment] = None
self.max_tasks: Optional[int] = None
self.reset_tasks: Optional[int] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_space = action_space
self.env_config = env_config
self.seed: Optional[int] = None
self.filter_dataset_func = filter_dataset_func
self.objectnav_task_type = objectnav_task_type
self.task_kwargs = {} if task_kwargs is None else task_kwargs
self._last_sampled_task: Optional[ObjectNavTask] = None
def _create_environment(self) -> HabitatEnvironment:
dataset = habitat.make_dataset(
self.env_config.DATASET.TYPE, config=self.env_config.DATASET
)
if self.filter_dataset_func is not None:
dataset = self.filter_dataset_func(dataset)
if len(dataset.episodes) == 0:
raise RuntimeError("Empty dataset after filtering.")
env = HabitatEnvironment(config=self.env_config, dataset=dataset)
self.max_tasks = (
None if self.env_config.MODE == "train" else env.num_episodes
) # mp3d objectnav val -> 2184
self.reset_tasks = self.max_tasks
return env
@property
def length(self) -> Union[int, float]:
"""
@return: Number of total tasks remaining that can be sampled. Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
@property
def total_unique(self) -> Union[int, float, None]:
return self.env.num_episodes
@property
def last_sampled_task(self) -> Optional[ObjectNavTask]:
return self._last_sampled_task
def close(self) -> None:
if self.env is not None:
self.env.stop()
@property
def all_observation_spaces_equal(self) -> bool:
"""
@return: True if all Tasks that can be sampled by this sampler have the
same observation space. Otherwise False.
"""
return True
def next_task(self, force_advance_scene=False) -> Optional[ObjectNavTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
return None
if self.env is not None:
if force_advance_scene:
self.env.env._episode_iterator._forced_scene_switch()
self.env.env._episode_iterator._set_shuffle_intervals()
self.env.reset()
else:
self.env = self._create_environment()
self.env.reset()
ep_info = self.env.get_current_episode()
target_categories = {g.object_category for g in ep_info.goals}
assert len(target_categories) == 1
target_category = list(target_categories)[0]
task_info = {
"target_category": target_category,
"episode_id": ep_info.episode_id,
"scene_id": ep_info.scene_id.split("/")[-1],
**ep_info.info,
}
self._last_sampled_task = self.objectnav_task_type(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
**self.task_kwargs,
)
if self.max_tasks is not None:
self.max_tasks -= 1
return self._last_sampled_task
def reset(self):
self.max_tasks = self.reset_tasks
def set_seed(self, seed: int):
self.seed = seed
if seed is not None:
self.env.env.seed(seed)
================================================
FILE: allenact_plugins/habitat_plugin/habitat_tasks.py
================================================
from abc import ABC
from typing import Tuple, List, Dict, Any, Optional, Union, Sequence, cast
import gym
import numpy as np
from habitat.sims.habitat_simulator.actions import HabitatSimActions
from habitat.sims.habitat_simulator.habitat_simulator import HabitatSim
from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.utils.system import get_logger
from allenact_plugins.habitat_plugin.habitat_constants import (
MOVE_AHEAD,
ROTATE_LEFT,
ROTATE_RIGHT,
END,
LOOK_UP,
LOOK_DOWN,
)
from allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment
from allenact_plugins.habitat_plugin.habitat_sensors import (
AgentCoordinatesSensorHabitat,
)
class HabitatTask(Task[HabitatEnvironment], ABC):
def __init__(
self,
env: HabitatEnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
**kwargs,
) -> None:
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self._last_action: Optional[str] = None
self._last_action_ind: Optional[int] = None
self._last_action_success: Optional[bool] = None
self._actions_taken: List[str] = []
self._positions = []
pos = self.get_agent_position_and_rotation()
self._positions.append(
{"x": pos[0], "y": pos[1], "z": pos[2], "rotation": pos[3]}
)
ep = self.env.get_current_episode()
# Extract the scene name from the scene path and append the episode id to generate
# a globally unique episode_id
self._episode_id = ep.scene_id.split("/")[-1][:-4] + "_" + ep.episode_id
def get_agent_position_and_rotation(self):
return AgentCoordinatesSensorHabitat.get_observation(self.env, self)
@property
def last_action(self):
return self._last_action
@last_action.setter
def last_action(self, value: str):
self._last_action = value
@property
def last_action_success(self):
return self._last_action_success
@last_action_success.setter
def last_action_success(self, value: Optional[bool]):
self._last_action_success = value
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
if mode == "rgb":
return self.env.current_frame["rgb"]
elif mode == "depth":
return self.env.current_frame["depth"]
else:
raise NotImplementedError()
class PointNavTask(Task[HabitatEnvironment]):
_actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END)
def __init__(
self,
env: HabitatEnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
failed_end_reward: float = 0.0,
**kwargs,
) -> None:
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self._took_end_action: bool = False
self._success: Optional[bool] = False
self._subsampled_locations_from_which_obj_visible = None
# Get the geodesic distance to target from the environment and make sure it is
# a valid value
self.last_geodesic_distance = self.current_geodesic_dist_to_target()
self.start_distance = self.last_geodesic_distance
assert self.last_geodesic_distance is not None
# noinspection PyProtectedMember
self._shortest_path_follower = ShortestPathFollower(
cast(HabitatSim, env.env.sim), env.env._config.TASK.SUCCESS_DISTANCE, False
)
self._shortest_path_follower.mode = "geodesic_path"
self._rewards: List[float] = []
self._metrics = None
self.failed_end_reward = failed_end_reward
def current_geodesic_dist_to_target(self) -> Optional[float]:
metrics = self.env.env.get_metrics()
if metrics["distance_to_goal"] is None:
habitat_env = self.env.env
habitat_env.task.measurements.update_measures(
episode=habitat_env.current_episode, action=None, task=habitat_env.task
)
metrics = self.env.env.get_metrics()
return metrics["distance_to_goal"]
@property
def action_space(self):
return gym.spaces.Discrete(len(self._actions))
def reached_terminal_state(self) -> bool:
return self.env.env.episode_over
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return cls._actions
def close(self) -> None:
self.env.stop()
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
action_str = self.class_action_names()[action]
self.env.step({"action": action_str})
if action_str == END:
self._took_end_action = True
self._success = self._is_goal_in_range()
self.last_action_success = self._success
else:
self.last_action_success = self.env.last_action_success
step_result = RLStepResult(
observation=self.get_observations(),
reward=self.judge(),
done=self.is_done(),
info={"last_action_success": self.last_action_success},
)
return step_result
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
return self.env.current_frame["rgb"]
def _is_goal_in_range(self) -> bool:
return (
self.current_geodesic_dist_to_target() <= self.task_info["distance_to_goal"]
)
def judge(self) -> float:
reward = -0.01
new_geodesic_distance = self.current_geodesic_dist_to_target()
if self.last_geodesic_distance is None:
self.last_geodesic_distance = new_geodesic_distance
if self.last_geodesic_distance is not None:
if (
new_geodesic_distance is None
or new_geodesic_distance in [float("-inf"), float("inf")]
or np.isnan(new_geodesic_distance)
):
new_geodesic_distance = self.last_geodesic_distance
delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance
reward += delta_distance_reward
self.last_geodesic_distance = new_geodesic_distance
if self.is_done():
reward += 10.0 if self._success else self.failed_end_reward
else:
get_logger().warning("Could not get geodesic distance from habitat env.")
self._rewards.append(float(reward))
return float(reward)
def metrics(self) -> Dict[str, Any]:
if not self.is_done():
return {}
_metrics = self.env.env.get_metrics()
metrics = {
**super(PointNavTask, self).metrics(),
"success": 1 * self._success,
"ep_length": self.num_steps_taken(),
"reward": np.sum(self._rewards),
"spl": _metrics["spl"] if _metrics["spl"] is not None else 0.0,
"dist_to_target": self.current_geodesic_dist_to_target(),
}
self._rewards = []
return metrics
def query_expert(self, **kwargs) -> Tuple[int, bool]:
if self._is_goal_in_range():
return self.class_action_names().index(END), True
target = self.task_info["target"]
habitat_action = self._shortest_path_follower.get_next_action(target)
if habitat_action == HabitatSimActions.MOVE_FORWARD:
return self.class_action_names().index(MOVE_AHEAD), True
elif habitat_action == HabitatSimActions.TURN_LEFT:
return self.class_action_names().index(ROTATE_LEFT), True
elif habitat_action == HabitatSimActions.TURN_RIGHT:
return self.class_action_names().index(ROTATE_RIGHT), True
else:
return 0, False
class ObjectNavTask(HabitatTask):
_actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN)
def __init__(
self,
env: HabitatEnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
look_constraints: Optional[Tuple[int, int]] = None,
**kwargs,
) -> None:
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self.look_constraints = look_constraints
self._look_state = 0
self._took_end_action: bool = False
self._success: Optional[bool] = False
self._subsampled_locations_from_which_obj_visible = None
# Get the geodesic distance to target from the environemnt and make sure it is
# a valid value
self.last_geodesic_distance = self.current_geodesic_dist_to_target()
assert not (
self.last_geodesic_distance is None
or self.last_geodesic_distance in [float("-inf"), float("inf")]
or np.isnan(self.last_geodesic_distance)
), "Bad geodesic distance"
self._min_distance_to_goal = self.last_geodesic_distance
self._num_invalid_actions = 0
# noinspection PyProtectedMember
self._shortest_path_follower = ShortestPathFollower(
env.env.sim, env.env._config.TASK.SUCCESS.SUCCESS_DISTANCE, False
)
self._shortest_path_follower.mode = "geodesic_path"
self._rewards: List[float] = []
self._metrics = None
self.task_info["episode_id"] = self._episode_id
@property
def action_space(self):
return gym.spaces.Discrete(len(self._actions))
def reached_terminal_state(self) -> bool:
return self.env.env.episode_over
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return cls._actions
def action_names(self, **kwargs) -> Tuple[str, ...]:
return self._actions
def close(self) -> None:
self.env.stop()
def current_geodesic_dist_to_target(self) -> Optional[float]:
metrics = self.env.env.get_metrics()
if metrics["distance_to_goal"] is None:
habitat_env = self.env.env
habitat_env.task.measurements.update_measures(
episode=habitat_env.current_episode, action=None, task=habitat_env.task
)
metrics = self.env.env.get_metrics()
return metrics["distance_to_goal"]
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
old_pos = self.get_agent_position_and_rotation()
action_str = self.action_names()[action]
self._actions_taken.append(action_str)
skip_action = False
if self.look_constraints is not None:
max_look_up, max_look_down = self.look_constraints
if action_str == LOOK_UP:
num_look_ups = self._look_state
# assert num_look_ups <= max_look_up
skip_action = num_look_ups >= max_look_up
self._look_state += 1
if action_str == LOOK_DOWN:
num_look_downs = -self._look_state
# assert num_look_downs <= max_look_down
skip_action = num_look_downs >= max_look_down
self._look_state -= 1
self._look_state = min(max(self._look_state, -max_look_down), max_look_up)
if not skip_action:
self.env.step({"action": action_str})
if action_str == END:
self._took_end_action = True
self._success = self._is_goal_in_range()
self.last_action_success = self._success
else:
self.last_action_success = self.env.last_action_success
step_result = RLStepResult(
observation=self.get_observations(),
reward=self.judge(),
done=self.is_done(),
info={"last_action_success": self.last_action_success},
)
new_pos = self.get_agent_position_and_rotation()
if np.all(old_pos == new_pos):
self._num_invalid_actions += 1
pos = self.get_agent_position_and_rotation()
self._positions.append(
{"x": pos[0], "y": pos[1], "z": pos[2], "rotation": pos[3]}
)
return step_result
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
return self.env.current_frame["rgb"]
def _is_goal_in_range(self) -> bool:
# The habitat simulator will return an SPL value of 0.0 whenever the goal is not in range
return bool(self.env.env.get_metrics()["spl"])
def judge(self) -> float:
# Set default reward
reward = -0.01
# Get geodesic distance reward
new_geodesic_distance = self.current_geodesic_dist_to_target()
self._min_distance_to_goal = min(
new_geodesic_distance, self._min_distance_to_goal
)
if (
new_geodesic_distance is None
or new_geodesic_distance in [float("-inf"), float("inf")]
or np.isnan(new_geodesic_distance)
):
new_geodesic_distance = self.last_geodesic_distance
delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance
reward += delta_distance_reward
if self._took_end_action:
reward += 10.0 if self._success else 0.0
# Get success reward
self._rewards.append(float(reward))
self.last_geodesic_distance = new_geodesic_distance
return float(reward)
def metrics(self) -> Dict[str, Any]:
self.task_info["taken_actions"] = self._actions_taken
self.task_info["action_names"] = self.action_names()
self.task_info["followed_path"] = self._positions
if not self.is_done():
return {}
else:
_metrics = self.env.env.get_metrics()
metrics = {
"success": self._success,
"ep_length": self.num_steps_taken(),
"total_reward": np.sum(self._rewards),
"spl": _metrics["spl"] if _metrics["spl"] is not None else 0.0,
"min_distance_to_target": self._min_distance_to_goal,
"num_invalid_actions": self._num_invalid_actions,
"task_info": self.task_info,
}
self._rewards = []
return metrics
def query_expert(self, **kwargs) -> Tuple[int, bool]:
if self._is_goal_in_range():
return self.class_action_names().index(END), True
target = self.task_info["target"]
action = self._shortest_path_follower.get_next_action(target)
return action, action is not None
================================================
FILE: allenact_plugins/habitat_plugin/habitat_utils.py
================================================
import os
from typing import List
import habitat
from allenact_plugins.habitat_plugin.habitat_constants import (
HABITAT_BASE,
HABITAT_CONFIGS_DIR,
)
from habitat import Config
def construct_env_configs(
config: Config,
allow_scene_repeat: bool = False,
) -> List[Config]:
"""Create list of Habitat Configs for training on multiple processes To
allow better performance, dataset are split into small ones for each
individual env, grouped by scenes.
# Parameters
config : configs that contain num_processes as well as information
necessary to create individual environments.
allow_scene_repeat: if `True` and the number of distinct scenes
in the dataset is less than the total number of processes this will
result in scenes being repeated across processes. If `False`, then
if the total number of processes is greater than the number of scenes,
this will result in a RuntimeError exception being raised.
# Returns
List of Configs, one for each process.
"""
config.freeze()
num_processes = config.NUM_PROCESSES
configs = []
dataset = habitat.make_dataset(config.DATASET.TYPE)
scenes = dataset.get_scenes_to_load(config.DATASET)
if len(scenes) > 0:
if len(scenes) < num_processes:
if not allow_scene_repeat:
raise RuntimeError(
"reduce the number of processes as there aren't enough number of scenes."
)
else:
scenes = (scenes * (1 + (num_processes // len(scenes))))[:num_processes]
scene_splits: List[List] = [[] for _ in range(num_processes)]
for idx, scene in enumerate(scenes):
scene_splits[idx % len(scene_splits)].append(scene)
assert sum(map(len, scene_splits)) == len(scenes)
for i in range(num_processes):
task_config = config.clone()
task_config.defrost()
if len(scenes) > 0:
task_config.DATASET.CONTENT_SCENES = scene_splits[i]
if len(config.SIMULATOR_GPU_IDS) == 0:
task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = -1
else:
task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (
config.SIMULATOR_GPU_IDS[i % len(config.SIMULATOR_GPU_IDS)]
)
task_config.freeze()
configs.append(task_config.clone())
return configs
def construct_env_configs_mp3d(config: Config) -> List[Config]:
r"""Create list of Habitat Configs for training on multiple processes
To allow better performance, dataset are split into small ones for
each individual env, grouped by scenes.
Args:
config: configs that contain num_processes as well as information
necessary to create individual environments.
Returns:
List of Configs, one for each process
"""
config.freeze()
num_processes = config.NUM_PROCESSES
configs = []
# dataset = habitat.make_dataset(config.DATASET.TYPE)
# scenes = dataset.get_scenes_to_load(config.DATASET)
if num_processes == 1:
scene_splits = [["pRbA3pwrgk9"]]
else:
small = [
"rPc6DW4iMge",
"e9zR4mvMWw7",
"uNb9QFRL6hY",
"qoiz87JEwZ2",
"sKLMLpTHeUy",
"s8pcmisQ38h",
"759xd9YjKW5",
"XcA2TqTSSAj",
"SN83YJsR3w2",
"8WUmhLawc2A",
"JeFG25nYj2p",
"17DRP5sb8fy",
"Uxmj2M2itWa",
"XcA2TqTSSAj",
"SN83YJsR3w2",
"8WUmhLawc2A",
"JeFG25nYj2p",
"17DRP5sb8fy",
"Uxmj2M2itWa",
"D7N2EKCX4Sj",
"b8cTxDM8gDG",
"sT4fr6TAbpF",
"S9hNv5qa7GM",
"82sE5b5pLXE",
"pRbA3pwrgk9",
"aayBHfsNo7d",
"cV4RVeZvu5T",
"i5noydFURQK",
"YmJkqBEsHnH",
"jh4fc5c5qoQ",
"VVfe2KiqLaN",
"29hnd4uzFmX",
"Pm6F8kyY3z2",
"JF19kD82Mey",
"GdvgFV5R1Z5",
"HxpKQynjfin",
"vyrNrziPKCB",
]
med = [
"V2XKFyX4ASd",
"VFuaQ6m2Qom",
"ZMojNkEp431",
"5LpN3gDmAk7",
"r47D5H71a5s",
"ULsKaCPVFJR",
"E9uDoFAP3SH",
"kEZ7cmS4wCh",
"ac26ZMwG7aT",
"dhjEzFoUFzH",
"mJXqzFtmKg4",
"p5wJjkQkbXX",
"Vvot9Ly1tCj",
"EDJbREhghzL",
"VzqfbhrpDEA",
"7y3sRwLe3Va",
]
scene_splits = [[] for _ in range(config.NUM_PROCESSES)]
distribute(
small,
scene_splits,
num_gpus=8,
procs_per_gpu=3,
proc_offset=1,
scenes_per_process=2,
)
distribute(
med,
scene_splits,
num_gpus=8,
procs_per_gpu=3,
proc_offset=0,
scenes_per_process=1,
)
# gpu0 = [['pRbA3pwrgk9', '82sE5b5pLXE', 'S9hNv5qa7GM'],
# ['Uxmj2M2itWa', '17DRP5sb8fy', 'JeFG25nYj2p'],
# ['5q7pvUzZiYa', '759xd9YjKW5', 's8pcmisQ38h'],
# ['e9zR4mvMWw7', 'rPc6DW4iMge', 'vyrNrziPKCB']]
# gpu1 = [['sT4fr6TAbpF', 'b8cTxDM8gDG', 'D7N2EKCX4Sj'],
# ['8WUmhLawc2A', 'SN83YJsR3w2', 'XcA2TqTSSAj'],
# ['sKLMLpTHeUy', 'qoiz87JEwZ2', 'uNb9QFRL6hY'],
# ['V2XKFyX4ASd', 'VFuaQ6m2Qom', 'ZMojNkEp431']]
# gpu2 = [['5LpN3gDmAk7', 'r47D5H71a5s', 'ULsKaCPVFJR', 'E9uDoFAP3SH'],
# ['VVfe2KiqLaN', 'jh4fc5c5qoQ', 'YmJkqBEsHnH'], # small
# ['i5noydFURQK', 'cV4RVeZvu5T', 'aayBHfsNo7d']] # small
# gpu3 = [['kEZ7cmS4wCh', 'ac26ZMwG7aT', 'dhjEzFoUFzH'],
# ['mJXqzFtmKg4', 'p5wJjkQkbXX', 'Vvot9Ly1tCj']]
# gpu4 = [['EDJbREhghzL', 'VzqfbhrpDEA', '7y3sRwLe3Va'],
# ['ur6pFq6Qu1A', 'PX4nDJXEHrG', 'PuKPg4mmafe']]
# gpu5 = [['r1Q1Z4BcV1o', 'gTV8FGcVJC9', '1pXnuDYAj8r'],
# ['JF19kD82Mey', 'Pm6F8kyY3z2', '29hnd4uzFmX']] # small
# gpu6 = [['VLzqgDo317F', '1LXtFkjw3qL'],
# ['HxpKQynjfin', 'gZ6f7yhEvPG', 'GdvgFV5R1Z5']] # small
# gpu7 = [['D7G3Y4RVNrH', 'B6ByNegPMKs']]
#
# scene_splits = gpu0 + gpu1 + gpu2 + gpu3 + gpu4 + gpu5 + gpu6 + gpu7
for i in range(num_processes):
task_config = config.clone()
task_config.defrost()
task_config.DATASET.CONTENT_SCENES = scene_splits[i]
task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_IDS[
i % len(config.SIMULATOR_GPU_IDS)
]
task_config.freeze()
configs.append(task_config.clone())
return configs
def distribute(
data: List[str],
scene_splits: List[List],
num_gpus=8,
procs_per_gpu=4,
proc_offset=0,
scenes_per_process=1,
) -> None:
for idx, scene in enumerate(data):
i = (idx // num_gpus) % scenes_per_process
j = idx % num_gpus
scene_splits[j * procs_per_gpu + i + proc_offset].append(scene)
def get_habitat_config(path: str):
assert (
path[-4:].lower() == ".yml" or path[-5:].lower() == ".yaml"
), f"path ({path}) must be a .yml or .yaml file."
if not os.path.isabs(path):
candidate_paths = [
os.path.join(d, path)
for d in [os.getcwd(), HABITAT_BASE, HABITAT_CONFIGS_DIR]
]
success = False
for candidate_path in candidate_paths:
if os.path.exists(candidate_path):
success = True
path = candidate_path
break
if not success:
raise FileExistsError(
f"Could not find config file with given relative path {path}. Tried the following possible absolute"
f" paths {candidate_paths}."
)
elif not os.path.exists(path):
raise FileExistsError(f"Could not find config file with given path {path}.")
return habitat.get_config(path)
================================================
FILE: allenact_plugins/habitat_plugin/scripts/__init__.py
================================================
================================================
FILE: allenact_plugins/habitat_plugin/scripts/agent_demo.py
================================================
import os
import cv2
import habitat
from pyquaternion import Quaternion
from allenact_plugins.habitat_plugin.habitat_constants import (
HABITAT_CONFIGS_DIR,
HABITAT_DATASETS_DIR,
HABITAT_SCENE_DATASETS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_utils import get_habitat_config
FORWARD_KEY = "w"
LEFT_KEY = "a"
RIGHT_KEY = "d"
FINISH = "f"
def transform_rgb_bgr(image):
return image[:, :, [2, 1, 0]]
def agent_demo():
config = get_habitat_config(
os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav.yaml")
)
config.defrost()
config.DATASET.DATA_PATH = os.path.join(
HABITAT_DATASETS_DIR, "pointnav/gibson/v1/train/train.json.gz"
)
config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR
config.DATASET.CONTENT_SCENES = ["Adrian"]
config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = 0
config.freeze()
env = habitat.Env(config=config)
print("Environment creation successful")
observations = env.reset()
cv2.imshow("RGB", transform_rgb_bgr(observations["rgb"]))
print("Agent stepping around inside environment.")
count_steps = 0
action = None
while not env.episode_over:
keystroke = cv2.waitKey(0)
if keystroke == ord(FORWARD_KEY):
action = 1
print("action: FORWARD")
elif keystroke == ord(LEFT_KEY):
action = 2
print("action: LEFT")
elif keystroke == ord(RIGHT_KEY):
action = 3
print("action: RIGHT")
elif keystroke == ord(FINISH):
action = 0
print("action: FINISH")
else:
print("INVALID KEY")
continue
observations = env.step(action)
count_steps += 1
print("Position:", env.sim.get_agent_state().position)
print("Quaternions:", env.sim.get_agent_state().rotation)
quat = Quaternion(env.sim.get_agent_state().rotation.components)
print(quat.radians)
cv2.imshow("RGB", transform_rgb_bgr(observations["rgb"]))
print("Episode finished after {} steps.".format(count_steps))
if action == habitat.SimulatorActions.STOP and observations["pointgoal"][0] < 0.2:
print("you successfully navigated to destination point")
else:
print("your navigation was unsuccessful")
if __name__ == "__main__":
agent_demo()
================================================
FILE: allenact_plugins/habitat_plugin/scripts/make_map.py
================================================
import os
import habitat
import numpy as np
from tqdm import tqdm
from allenact_plugins.habitat_plugin.habitat_constants import (
HABITAT_CONFIGS_DIR,
HABITAT_DATA_BASE,
HABITAT_SCENE_DATASETS_DIR,
HABITAT_DATASETS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_utils import get_habitat_config
map_resolution = 0.05
map_size = 960
def make_map(env, scene):
vacancy_map = np.zeros([map_size, map_size], dtype=bool)
for i in tqdm(range(map_size)):
for j in range(map_size):
x = (i - map_size // 2) * map_resolution
z = (j - map_size // 2) * map_resolution
vacancy_map[j, i] = env.sim.is_navigable([x, 0.0, z])
np.save(
os.path.join(HABITAT_DATA_BASE, "map_data/pointnav/v1/gibson/data/" + scene),
vacancy_map,
)
def generate_maps():
config = get_habitat_config(
os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav.yaml")
)
config.defrost()
config.DATASET.DATA_PATH = os.path.join(
HABITAT_DATASETS_DIR, "pointnav/gibson/v1/train/train.json.gz"
)
config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR
config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = 0
config.freeze()
dataset = habitat.make_dataset(config.DATASET.TYPE)
scenes = dataset.get_scenes_to_load(config.DATASET)
for scene in scenes:
print("Making environment for:", scene)
config.defrost()
config.DATASET.CONTENT_SCENES = [scene]
config.freeze()
env = habitat.Env(config=config)
make_map(env, scene)
env.close()
if __name__ == "__main__":
generate_maps()
================================================
FILE: allenact_plugins/ithor_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker
with ImportChecker(
"Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`)."
):
# noinspection PyUnresolvedReferences
import ai2thor
================================================
FILE: allenact_plugins/ithor_plugin/extra_environment.yml
================================================
channels:
- defaults
- conda-forge
dependencies:
- ai2thor>=2.5.3
- numba
- pip
- colour
- packaging
- pip:
- numpy-quaternion
- pyquaternion>=0.9.9
- python-xlib
================================================
FILE: allenact_plugins/ithor_plugin/extra_requirements.txt
================================================
ai2thor>=2.5.3
numpy-quaternion
pyquaternion>=0.9.9
colour
numba
packaging
python-xlib
================================================
FILE: allenact_plugins/ithor_plugin/ithor_constants.py
================================================
"""Common constants used when training agents to complete tasks in iTHOR, the
interactive version of AI2-THOR."""
from collections import OrderedDict
from typing import Set, Dict
MOVE_AHEAD = "MoveAhead"
ROTATE_LEFT = "RotateLeft"
ROTATE_RIGHT = "RotateRight"
LOOK_DOWN = "LookDown"
LOOK_UP = "LookUp"
END = "End"
VISIBILITY_DISTANCE = 1.25
FOV = 90.0
ORDERED_SCENE_TYPES = ("kitchens", "livingrooms", "bedrooms", "bathrooms")
NUM_SCENE_TYPES = len(ORDERED_SCENE_TYPES)
def make_scene_name(type_ind, scene_num):
if type_ind == 1:
return "FloorPlan" + str(scene_num) + "_physics"
elif scene_num < 10:
return "FloorPlan" + str(type_ind) + "0" + str(scene_num) + "_physics"
else:
return "FloorPlan" + str(type_ind) + str(scene_num) + "_physics"
SCENES_TYPE_TO_SCENE_NAMES = OrderedDict(
[
(
ORDERED_SCENE_TYPES[type_ind - 1],
tuple(
make_scene_name(type_ind=type_ind, scene_num=scene_num)
for scene_num in range(1, 31)
),
)
for type_ind in range(1, NUM_SCENE_TYPES + 1)
]
)
SCENES_TYPE_TO_TRAIN_SCENE_NAMES = OrderedDict(
(key, scenes[:20]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items()
)
SCENES_TYPE_TO_VALID_SCENE_NAMES = OrderedDict(
(key, scenes[20:25]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items()
)
SCENES_TYPE_TO_TEST_SCENE_NAMES = OrderedDict(
(key, scenes[25:30]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items()
)
ALL_SCENE_NAMES = sum(SCENES_TYPE_TO_SCENE_NAMES.values(), tuple())
TRAIN_SCENE_NAMES = sum(
(scenes for scenes in SCENES_TYPE_TO_TRAIN_SCENE_NAMES.values()), tuple()
)
VALID_SCENE_NAMES = sum(
(scenes for scenes in SCENES_TYPE_TO_VALID_SCENE_NAMES.values()), tuple()
)
TEST_SCENE_NAMES = sum(
(scenes for scenes in SCENES_TYPE_TO_TEST_SCENE_NAMES.values()), tuple()
)
TRAIN_SCENE_NAMES_SET = set(TRAIN_SCENE_NAMES)
VALID_SCENE_NAMES_SET = set(VALID_SCENE_NAMES)
TEST_SCENE_NAMES_SET = set(TEST_SCENE_NAMES)
_object_type_and_location_tsv = """
AlarmClock bedrooms
Apple kitchens
ArmChair livingrooms,bedrooms
BaseballBat bedrooms
BasketBall bedrooms
Bathtub bathrooms
BathtubBasin bathrooms
Bed bedrooms
Blinds kitchens,bedrooms
Book kitchens,livingrooms,bedrooms
Boots livingrooms,bedrooms
Bottle kitchens
Bowl kitchens,livingrooms,bedrooms
Box livingrooms,bedrooms
Bread kitchens
ButterKnife kitchens
Cabinet kitchens,livingrooms,bedrooms,bathrooms
Candle livingrooms,bathrooms
Cart bathrooms
CD bedrooms
CellPhone kitchens,livingrooms,bedrooms
Chair kitchens,livingrooms,bedrooms
Cloth bedrooms,bathrooms
CoffeeMachine kitchens
CoffeeTable livingrooms,bedrooms
CounterTop kitchens,livingrooms,bedrooms,bathrooms
CreditCard kitchens,livingrooms,bedrooms
Cup kitchens
Curtains kitchens,livingrooms,bedrooms
Desk bedrooms
DeskLamp livingrooms,bedrooms
DiningTable kitchens,livingrooms,bedrooms
DishSponge kitchens,bathrooms
Drawer kitchens,livingrooms,bedrooms,bathrooms
Dresser livingrooms,bedrooms,bathrooms
Egg kitchens
Faucet kitchens,bathrooms
FloorLamp livingrooms,bedrooms
Footstool bedrooms
Fork kitchens
Fridge kitchens
GarbageCan kitchens,livingrooms,bedrooms,bathrooms
HandTowel bathrooms
HandTowelHolder bathrooms
HousePlant kitchens,livingrooms,bedrooms,bathrooms
Kettle kitchens
KeyChain livingrooms,bedrooms
Knife kitchens
Ladle kitchens
Laptop kitchens,livingrooms,bedrooms
LaundryHamper bedrooms
LaundryHamperLid bedrooms
Lettuce kitchens
LightSwitch kitchens,livingrooms,bedrooms,bathrooms
Microwave kitchens
Mirror kitchens,livingrooms,bedrooms,bathrooms
Mug kitchens,bedrooms
Newspaper livingrooms
Ottoman livingrooms,bedrooms
Painting kitchens,livingrooms,bedrooms,bathrooms
Pan kitchens
PaperTowel kitchens,bathrooms
Pen kitchens,livingrooms,bedrooms
Pencil kitchens,livingrooms,bedrooms
PepperShaker kitchens
Pillow livingrooms,bedrooms
Plate kitchens,livingrooms
Plunger bathrooms
Poster bedrooms
Pot kitchens
Potato kitchens
RemoteControl livingrooms,bedrooms
Safe kitchens,livingrooms,bedrooms
SaltShaker kitchens
ScrubBrush bathrooms
Shelf kitchens,livingrooms,bedrooms,bathrooms
ShowerCurtain bathrooms
ShowerDoor bathrooms
ShowerGlass bathrooms
ShowerHead bathrooms
SideTable livingrooms,bedrooms
Sink kitchens,bathrooms
SinkBasin kitchens,bathrooms
SoapBar bathrooms
SoapBottle kitchens,bathrooms
Sofa livingrooms,bedrooms
Spatula kitchens
Spoon kitchens
SprayBottle bathrooms
Statue kitchens,livingrooms,bedrooms
StoveBurner kitchens
StoveKnob kitchens
TeddyBear bedrooms
Television livingrooms,bedrooms
TennisRacket bedrooms
TissueBox livingrooms,bedrooms,bathrooms
Toaster kitchens
Toilet bathrooms
ToiletPaper bathrooms
ToiletPaperHanger bathrooms
Tomato kitchens
Towel bathrooms
TowelHolder bathrooms
TVStand livingrooms
Vase kitchens,livingrooms,bedrooms
Watch livingrooms,bedrooms
WateringCan livingrooms
Window kitchens,livingrooms,bedrooms,bathrooms
WineBottle kitchens
"""
OBJECT_TYPE_TO_SCENE_TYPES = OrderedDict()
for ot_tab_scene_types in _object_type_and_location_tsv.split("\n"):
if ot_tab_scene_types != "":
ot, scene_types_csv = ot_tab_scene_types.split("\t")
OBJECT_TYPE_TO_SCENE_TYPES[ot] = tuple(sorted(scene_types_csv.split(",")))
SCENE_TYPE_TO_OBJECT_TYPES: Dict[str, Set[str]] = OrderedDict(
((k, set()) for k in ORDERED_SCENE_TYPES)
)
for ot_tab_scene_types in _object_type_and_location_tsv.split("\n"):
if ot_tab_scene_types != "":
ot, scene_types_csv = ot_tab_scene_types.split("\t")
for scene_type in scene_types_csv.split(","):
SCENE_TYPE_TO_OBJECT_TYPES[scene_type].add(ot)
================================================
FILE: allenact_plugins/ithor_plugin/ithor_environment.py
================================================
"""A wrapper for engaging with the THOR environment."""
import copy
import functools
import math
import random
from typing import Tuple, Dict, List, Set, Union, Any, Optional, Mapping, cast
import ai2thor.server
import networkx as nx
import numpy as np
from ai2thor.controller import Controller
from scipy.spatial.transform import Rotation
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV
from allenact_plugins.ithor_plugin.ithor_util import round_to_factor
class IThorEnvironment(object):
"""Wrapper for the ai2thor controller providing additional functionality
and bookkeeping.
See [here](https://ai2thor.allenai.org/documentation/installation) for comprehensive
documentation on AI2-THOR.
# Attributes
controller : The ai2thor controller.
"""
def __init__(
self,
x_display: Optional[str] = None,
docker_enabled: bool = False,
local_thor_build: Optional[str] = None,
visibility_distance: float = VISIBILITY_DISTANCE,
fov: float = FOV,
player_screen_width: int = 300,
player_screen_height: int = 300,
quality: str = "Very Low",
restrict_to_initially_reachable_points: bool = False,
make_agents_visible: bool = True,
object_open_speed: float = 1.0,
simplify_physics: bool = False,
) -> None:
"""Initializer.
# Parameters
x_display : The x display into which to launch ai2thor (possibly necessarily if you are running on a server
without an attached display).
docker_enabled : Whether or not to run thor in a docker container (useful on a server without an attached
display so that you don't have to start an x display).
local_thor_build : The path to a local build of ai2thor. This is probably not necessary for your use case
and can be safely ignored.
visibility_distance : The distance (in meters) at which objects, in the viewport of the agent,
are considered visible by ai2thor and will have their "visible" flag be set to `True` in the metadata.
fov : The agent's camera's field of view.
player_screen_width : The width resolution (in pixels) of the images returned by ai2thor.
player_screen_height : The height resolution (in pixels) of the images returned by ai2thor.
quality : The quality at which to render. Possible quality settings can be found in
`ai2thor._quality_settings.QUALITY_SETTINGS`.
restrict_to_initially_reachable_points : Whether or not to restrict the agent to locations in ai2thor
that were found to be (initially) reachable by the agent (i.e. reachable by the agent after resetting
the scene). This can be useful if you want to ensure there are only a fixed set of locations where the
agent can go.
make_agents_visible : Whether or not the agent should be visible. Most noticable when there are multiple agents
or when quality settings are high so that the agent casts a shadow.
object_open_speed : How quickly objects should be opened. High speeds mean faster simulation but also mean
that opening objects have a lot of kinetic energy and can, possibly, knock other objects away.
simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object
interactions when opening drawers (when simplified, objects within a drawer do not slide around on
their own when the drawer is opened or closed, instead they are effectively glued down).
"""
self._start_player_screen_width = player_screen_width
self._start_player_screen_height = player_screen_height
self._local_thor_build = local_thor_build
self.x_display = x_display
self.controller: Optional[Controller] = None
self._started = False
self._quality = quality
self._initially_reachable_points: Optional[List[Dict]] = None
self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None
self._move_mag: Optional[float] = None
self._grid_size: Optional[float] = None
self._visibility_distance = visibility_distance
self._fov = fov
self.restrict_to_initially_reachable_points = (
restrict_to_initially_reachable_points
)
self.make_agents_visible = make_agents_visible
self.object_open_speed = object_open_speed
self._always_return_visible_range = False
self.simplify_physics = simplify_physics
self.start(None)
# noinspection PyTypeHints
self.controller.docker_enabled = docker_enabled # type: ignore
@property
def scene_name(self) -> str:
"""Current ai2thor scene."""
return self.controller.last_event.metadata["sceneName"]
@property
def current_frame(self) -> np.ndarray:
"""Returns rgb image corresponding to the agent's egocentric view."""
return self.controller.last_event.frame
@property
def last_event(self) -> ai2thor.server.Event:
"""Last event returned by the controller."""
return self.controller.last_event
@property
def started(self) -> bool:
"""Has the ai2thor controller been started."""
return self._started
@property
def last_action(self) -> str:
"""Last action, as a string, taken by the agent."""
return self.controller.last_event.metadata["lastAction"]
@last_action.setter
def last_action(self, value: str) -> None:
"""Set the last action taken by the agent.
Doing this is rewriting history, be careful.
"""
self.controller.last_event.metadata["lastAction"] = value
@property
def last_action_success(self) -> bool:
"""Was the last action taken by the agent a success?"""
return self.controller.last_event.metadata["lastActionSuccess"]
@last_action_success.setter
def last_action_success(self, value: bool) -> None:
"""Set whether or not the last action taken by the agent was a success.
Doing this is rewriting history, be careful.
"""
self.controller.last_event.metadata["lastActionSuccess"] = value
@property
def last_action_return(self) -> Any:
"""Get the value returned by the last action (if applicable).
For an example of an action that returns a value, see
`"GetReachablePositions"`.
"""
return self.controller.last_event.metadata["actionReturn"]
@last_action_return.setter
def last_action_return(self, value: Any) -> None:
"""Set the value returned by the last action.
Doing this is rewriting history, be careful.
"""
self.controller.last_event.metadata["actionReturn"] = value
def start(
self,
scene_name: Optional[str],
move_mag: float = 0.25,
**kwargs,
) -> None:
"""Starts the ai2thor controller if it was previously stopped.
After starting, `reset` will be called with the scene name and move magnitude.
# Parameters
scene_name : The scene to load.
move_mag : The amount of distance the agent moves in a single `MoveAhead` step.
kwargs : additional kwargs, passed to reset.
"""
if self._started:
raise RuntimeError(
"Trying to start the environment but it is already started."
)
# noinspection PyUnresolvedReferences
self.controller = Controller(
x_display=self.x_display,
width=self._start_player_screen_width,
height=self._start_player_screen_height,
local_executable_path=self._local_thor_build,
quality=self._quality,
server_class=ai2thor.fifo_server.FifoServer,
)
if (
self._start_player_screen_height,
self._start_player_screen_width,
) != self.current_frame.shape[:2]:
self.controller.step(
{
"action": "ChangeResolution",
"x": self._start_player_screen_width,
"y": self._start_player_screen_height,
}
)
self._started = True
self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs)
def stop(self) -> None:
"""Stops the ai2thor controller."""
try:
self.controller.stop()
except Exception as e:
get_logger().warning(str(e))
finally:
self._started = False
def reset(
self,
scene_name: Optional[str],
move_mag: float = 0.25,
**kwargs,
):
"""Resets the ai2thor in a new scene.
Resets ai2thor into a new scene and initializes the scene/agents with
prespecified settings (e.g. move magnitude).
# Parameters
scene_name : The scene to load.
move_mag : The amount of distance the agent moves in a single `MoveAhead` step.
kwargs : additional kwargs, passed to the controller "Initialize" action.
"""
self._move_mag = move_mag
self._grid_size = self._move_mag
if scene_name is None:
scene_name = self.controller.last_event.metadata["sceneName"]
self.controller.reset(scene_name)
self.controller.step(
{
"action": "Initialize",
"gridSize": self._grid_size,
"visibilityDistance": self._visibility_distance,
"fieldOfView": self._fov,
"makeAgentsVisible": self.make_agents_visible,
"alwaysReturnVisibleRange": self._always_return_visible_range,
**kwargs,
}
)
if self.object_open_speed != 1.0:
self.controller.step(
{"action": "ChangeOpenSpeed", "x": self.object_open_speed}
)
self._initially_reachable_points = None
self._initially_reachable_points_set = None
self.controller.step({"action": "GetReachablePositions"})
if not self.controller.last_event.metadata["lastActionSuccess"]:
get_logger().warning(
"Error when getting reachable points: {}".format(
self.controller.last_event.metadata["errorMessage"]
)
)
self._initially_reachable_points = self.last_action_return
def teleport_agent_to(
self,
x: float,
y: float,
z: float,
rotation: float,
horizon: float,
standing: Optional[bool] = None,
force_action: bool = False,
only_initially_reachable: Optional[bool] = None,
verbose=True,
ignore_y_diffs=False,
) -> None:
"""Helper function teleporting the agent to a given location."""
if standing is None:
standing = self.last_event.metadata.get(
"isStanding", self.last_event.metadata["agent"].get("isStanding")
)
original_location = self.get_agent_location()
target = {"x": x, "y": y, "z": z}
if only_initially_reachable is None:
only_initially_reachable = self.restrict_to_initially_reachable_points
if only_initially_reachable:
reachable_points = self.initially_reachable_points
reachable = False
for p in reachable_points:
if self.position_dist(target, p, ignore_y=ignore_y_diffs) < 0.01:
reachable = True
break
if not reachable:
self.last_action = "TeleportFull"
self.last_event.metadata["errorMessage"] = (
"Target position was not initially reachable."
)
self.last_action_success = False
return
self.controller.step(
dict(
action="TeleportFull",
x=x,
y=y,
z=z,
rotation={"x": 0.0, "y": rotation, "z": 0.0},
horizon=horizon,
standing=standing,
forceAction=force_action,
)
)
if not self.last_action_success:
agent_location = self.get_agent_location()
rot_diff = (
agent_location["rotation"] - original_location["rotation"]
) % 360
new_old_dist = self.position_dist(
original_location, agent_location, ignore_y=ignore_y_diffs
)
if (
self.position_dist(
original_location, agent_location, ignore_y=ignore_y_diffs
)
> 1e-2
or min(rot_diff, 360 - rot_diff) > 1
):
get_logger().warning(
"Teleportation FAILED but agent still moved (position_dist {}, rot diff {})"
" (\nprevious location\n{}\ncurrent_location\n{}\n)".format(
new_old_dist, rot_diff, original_location, agent_location
)
)
return
if force_action:
assert self.last_action_success
return
agent_location = self.get_agent_location()
rot_diff = (agent_location["rotation"] - rotation) % 360
if (
self.position_dist(agent_location, target, ignore_y=ignore_y_diffs) > 1e-2
or min(rot_diff, 360 - rot_diff) > 1
):
if only_initially_reachable:
self._snap_agent_to_initially_reachable(verbose=False)
if verbose:
get_logger().warning(
"Teleportation did not place agent"
" precisely where desired in scene {}"
" (\ndesired\n{}\nactual\n{}\n)"
" perhaps due to grid snapping."
" Action is considered failed but agent may have moved.".format(
self.scene_name,
{
"x": x,
"y": y,
"z": z,
"rotation": rotation,
"standing": standing,
"horizon": horizon,
},
agent_location,
)
)
self.last_action_success = False
return
def random_reachable_state(self, seed: int = None) -> Dict:
"""Returns a random reachable location in the scene."""
if seed is not None:
random.seed(seed)
xyz = random.choice(self.currently_reachable_points)
rotation = random.choice([0, 90, 180, 270])
horizon = random.choice([0, 30, 60, 330])
state = copy.copy(xyz)
state["rotation"] = rotation
state["horizon"] = horizon
return state
def randomize_agent_location(
self, seed: int = None, partial_position: Optional[Dict[str, float]] = None
) -> Dict:
"""Teleports the agent to a random reachable location in the scene."""
if partial_position is None:
partial_position = {}
k = 0
state: Optional[Dict] = None
while k == 0 or (not self.last_action_success and k < 10):
state = self.random_reachable_state(seed=seed)
self.teleport_agent_to(**{**state, **partial_position})
k += 1
if not self.last_action_success:
get_logger().warning(
(
"Randomize agent location in scene {}"
" with seed {} and partial position {} failed in "
"10 attempts. Forcing the action."
).format(self.scene_name, seed, partial_position)
)
self.teleport_agent_to(**{**state, **partial_position}, force_action=True) # type: ignore
assert self.last_action_success
assert state is not None
return state
def object_pixels_in_frame(
self, object_id: str, hide_all: bool = True, hide_transparent: bool = False
) -> np.ndarray:
"""Return an mask for a given object in the agent's current view.
# Parameters
object_id : The id of the object.
hide_all : Whether or not to hide all other objects in the scene before getting the mask.
hide_transparent : Whether or not partially transparent objects are considered to occlude the object.
# Returns
A numpy array of the mask.
"""
# Emphasizing an object turns it magenta and hides all other objects
# from view, we can find where the hand object is on the screen by
# emphasizing it and then scanning across the image for the magenta pixels.
if hide_all:
self.step({"action": "EmphasizeObject", "objectId": object_id})
else:
self.step({"action": "MaskObject", "objectId": object_id})
if hide_transparent:
self.step({"action": "HideTranslucentObjects"})
# noinspection PyShadowingBuiltins
filter = np.array([[[255, 0, 255]]])
object_pixels = 1 * np.all(self.current_frame == filter, axis=2)
if hide_all:
self.step({"action": "UnemphasizeAll"})
else:
self.step({"action": "UnmaskObject", "objectId": object_id})
if hide_transparent:
self.step({"action": "UnhideAllObjects"})
return object_pixels
def object_pixels_on_grid(
self,
object_id: str,
grid_shape: Tuple[int, int],
hide_all: bool = True,
hide_transparent: bool = False,
) -> np.ndarray:
"""Like `object_pixels_in_frame` but counts object pixels in a
partitioning of the image."""
def partition(n, num_parts):
m = n // num_parts
parts = [m] * num_parts
num_extra = n % num_parts
for k in range(num_extra):
parts[k] += 1
return parts
object_pixels = self.object_pixels_in_frame(
object_id=object_id, hide_all=hide_all, hide_transparent=hide_transparent
)
# Divide the current frame into a grid and count the number
# of hand object pixels in each of the grid squares
sums_in_blocks: List[List] = []
frame_shape = self.current_frame.shape[:2]
row_inds = np.cumsum([0] + partition(frame_shape[0], grid_shape[0]))
col_inds = np.cumsum([0] + partition(frame_shape[1], grid_shape[1]))
for i in range(len(row_inds) - 1):
sums_in_blocks.append([])
for j in range(len(col_inds) - 1):
sums_in_blocks[i].append(
np.sum(
object_pixels[
row_inds[i] : row_inds[i + 1], col_inds[j] : col_inds[j + 1]
]
)
)
return np.array(sums_in_blocks, dtype=np.float32)
def object_in_hand(self):
"""Object metadata for the object in the agent's hand."""
inv_objs = self.last_event.metadata["inventoryObjects"]
if len(inv_objs) == 0:
return None
elif len(inv_objs) == 1:
return self.get_object_by_id(
self.last_event.metadata["inventoryObjects"][0]["objectId"]
)
else:
raise AttributeError("Must be <= 1 inventory objects.")
@property
def initially_reachable_points(self) -> List[Dict[str, float]]:
"""List of {"x": x, "y": y, "z": z} locations in the scene that were
reachable after initially resetting."""
assert self._initially_reachable_points is not None
return copy.deepcopy(self._initially_reachable_points) # type:ignore
@property
def initially_reachable_points_set(self) -> Set[Tuple[float, float]]:
"""Set of (x,z) locations in the scene that were reachable after
initially resetting."""
if self._initially_reachable_points_set is None:
self._initially_reachable_points_set = set()
for p in self.initially_reachable_points:
self._initially_reachable_points_set.add(
self._agent_location_to_tuple(p)
)
return self._initially_reachable_points_set
@property
def currently_reachable_points(self) -> List[Dict[str, float]]:
"""List of {"x": x, "y": y, "z": z} locations in the scene that are
currently reachable."""
self.step({"action": "GetReachablePositions"})
return self.last_event.metadata["actionReturn"] # type:ignore
def get_agent_location(self) -> Dict[str, Union[float, bool]]:
"""Gets agent's location."""
metadata = self.controller.last_event.metadata
location = {
"x": metadata["agent"]["position"]["x"],
"y": metadata["agent"]["position"]["y"],
"z": metadata["agent"]["position"]["z"],
"rotation": metadata["agent"]["rotation"]["y"],
"horizon": metadata["agent"]["cameraHorizon"],
"standing": metadata.get("isStanding", metadata["agent"].get("isStanding")),
}
return location
@staticmethod
def _agent_location_to_tuple(p: Dict[str, float]) -> Tuple[float, float]:
return round(p["x"], 2), round(p["z"], 2)
def _snap_agent_to_initially_reachable(self, verbose=True):
agent_location = self.get_agent_location()
end_location_tuple = self._agent_location_to_tuple(agent_location)
if end_location_tuple in self.initially_reachable_points_set:
return
agent_x = agent_location["x"]
agent_z = agent_location["z"]
closest_reachable_points = list(self.initially_reachable_points_set)
closest_reachable_points = sorted(
closest_reachable_points,
key=lambda xz: abs(xz[0] - agent_x) + abs(xz[1] - agent_z),
)
# In rare cases end_location_tuple might be not considered to be in self.initially_reachable_points_set
# even when it is, here we check for such cases.
if (
math.sqrt(
(
(
np.array(closest_reachable_points[0])
- np.array(end_location_tuple)
)
** 2
).sum()
)
< 1e-6
):
return
saved_last_action = self.last_action
saved_last_action_success = self.last_action_success
saved_last_action_return = self.last_action_return
saved_error_message = self.last_event.metadata["errorMessage"]
# Thor behaves weirdly when the agent gets off of the grid and you
# try to teleport the agent back to the closest grid location. To
# get around this we first teleport the agent to random location
# and then back to where it should be.
for point in self.initially_reachable_points:
if abs(agent_x - point["x"]) > 0.1 or abs(agent_z - point["z"]) > 0.1:
self.teleport_agent_to(
rotation=0,
horizon=30,
**point,
only_initially_reachable=False,
verbose=False,
)
if self.last_action_success:
break
for p in closest_reachable_points:
self.teleport_agent_to(
**{**agent_location, "x": p[0], "z": p[1]},
only_initially_reachable=False,
verbose=False,
)
if self.last_action_success:
break
teleport_forced = False
if not self.last_action_success:
self.teleport_agent_to(
**{
**agent_location,
"x": closest_reachable_points[0][0],
"z": closest_reachable_points[0][1],
},
force_action=True,
only_initially_reachable=False,
verbose=False,
)
teleport_forced = True
self.last_action = saved_last_action
self.last_action_success = saved_last_action_success
self.last_action_return = saved_last_action_return
self.last_event.metadata["errorMessage"] = saved_error_message
new_agent_location = self.get_agent_location()
if verbose:
get_logger().warning(
(
"In {}, at location (x,z)=({},{}) which is not in the set "
"of initially reachable points;"
" attempting to correct this: agent teleported to (x,z)=({},{}).\n"
"Teleportation {} forced."
).format(
self.scene_name,
agent_x,
agent_z,
new_agent_location["x"],
new_agent_location["z"],
"was" if teleport_forced else "wasn't",
)
)
def step(
self,
action_dict: Optional[Dict[str, Union[str, int, float, Dict]]] = None,
**kwargs: Union[str, int, float, Dict],
) -> ai2thor.server.Event:
"""Take a step in the ai2thor environment."""
if action_dict is None:
action_dict = dict()
action_dict.update(kwargs)
action = cast(str, action_dict["action"])
skip_render = "renderImage" in action_dict and not action_dict["renderImage"]
last_frame: Optional[np.ndarray] = None
if skip_render:
last_frame = self.current_frame
if self.simplify_physics:
action_dict["simplifyPhysics"] = True
if "Move" in action and "Hand" not in action: # type: ignore
action_dict = {
**action_dict,
"moveMagnitude": self._move_mag,
} # type: ignore
start_location = self.get_agent_location()
sr = self.controller.step(action_dict)
if self.restrict_to_initially_reachable_points:
end_location_tuple = self._agent_location_to_tuple(
self.get_agent_location()
)
if end_location_tuple not in self.initially_reachable_points_set:
self.teleport_agent_to(**start_location, force_action=True) # type: ignore
self.last_action = action
self.last_action_success = False
self.last_event.metadata["errorMessage"] = (
"Moved to location outside of initially reachable points."
)
elif "RandomizeHideSeekObjects" in action:
last_position = self.get_agent_location()
self.controller.step(action_dict)
metadata = self.last_event.metadata
if self.position_dist(last_position, self.get_agent_location()) > 0.001:
self.teleport_agent_to(**last_position, force_action=True) # type: ignore
get_logger().warning(
"In scene {}, after randomization of hide and seek objects, agent moved.".format(
self.scene_name
)
)
sr = self.controller.step({"action": "GetReachablePositions"})
self._initially_reachable_points = self.controller.last_event.metadata[
"actionReturn"
]
self._initially_reachable_points_set = None
self.last_action = action
self.last_action_success = metadata["lastActionSuccess"]
self.controller.last_event.metadata["actionReturn"] = []
elif "RotateUniverse" in action:
sr = self.controller.step(action_dict)
metadata = self.last_event.metadata
if metadata["lastActionSuccess"]:
sr = self.controller.step({"action": "GetReachablePositions"})
self._initially_reachable_points = self.controller.last_event.metadata[
"actionReturn"
]
self._initially_reachable_points_set = None
self.last_action = action
self.last_action_success = metadata["lastActionSuccess"]
self.controller.last_event.metadata["actionReturn"] = []
else:
sr = self.controller.step(action_dict)
if self.restrict_to_initially_reachable_points:
self._snap_agent_to_initially_reachable()
if skip_render:
assert last_frame is not None
self.last_event.frame = last_frame
return sr
@staticmethod
def position_dist(
p0: Mapping[str, Any],
p1: Mapping[str, Any],
ignore_y: bool = False,
l1_dist: bool = False,
) -> float:
"""Distance between two points of the form {"x": x, "y":y, "z":z"}."""
if l1_dist:
return (
abs(p0["x"] - p1["x"])
+ (0 if ignore_y else abs(p0["y"] - p1["y"]))
+ abs(p0["z"] - p1["z"])
)
else:
return math.sqrt(
(p0["x"] - p1["x"]) ** 2
+ (0 if ignore_y else (p0["y"] - p1["y"]) ** 2)
+ (p0["z"] - p1["z"]) ** 2
)
@staticmethod
def rotation_dist(a: Dict[str, float], b: Dict[str, float]):
"""Distance between rotations."""
def deg_dist(d0: float, d1: float):
dist = (d0 - d1) % 360
return min(dist, 360 - dist)
return sum(deg_dist(a[k], b[k]) for k in ["x", "y", "z"])
@staticmethod
def angle_between_rotations(a: Dict[str, float], b: Dict[str, float]):
return np.abs(
(180 / (2 * math.pi))
* (
Rotation.from_euler("xyz", [a[k] for k in "xyz"], degrees=True)
* Rotation.from_euler("xyz", [b[k] for k in "xyz"], degrees=True).inv()
).as_rotvec()
).sum()
def closest_object_with_properties(
self, properties: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""Find the object closest to the agent that has the given
properties."""
agent_pos = self.controller.last_event.metadata["agent"]["position"]
min_dist = float("inf")
closest = None
for o in self.all_objects():
satisfies_all = True
for k, v in properties.items():
if o[k] != v:
satisfies_all = False
break
if satisfies_all:
d = self.position_dist(agent_pos, o["position"])
if d < min_dist:
min_dist = d
closest = o
return closest
def closest_visible_object_of_type(
self, object_type: str
) -> Optional[Dict[str, Any]]:
"""Find the object closest to the agent that is visible and has the
given type."""
properties = {"visible": True, "objectType": object_type}
return self.closest_object_with_properties(properties)
def closest_object_of_type(self, object_type: str) -> Optional[Dict[str, Any]]:
"""Find the object closest to the agent that has the given type."""
properties = {"objectType": object_type}
return self.closest_object_with_properties(properties)
def closest_reachable_point_to_position(
self, position: Dict[str, float]
) -> Tuple[Dict[str, float], float]:
"""Of all reachable positions, find the one that is closest to the
given location."""
target = np.array([position["x"], position["z"]])
min_dist = float("inf")
closest_point = None
for pt in self.initially_reachable_points:
dist = np.linalg.norm(target - np.array([pt["x"], pt["z"]]))
if dist < min_dist:
closest_point = pt
min_dist = dist
if min_dist < 1e-3:
break
assert closest_point is not None
return closest_point, min_dist
@staticmethod
def _angle_from_to(a_from: float, a_to: float) -> float:
a_from = a_from % 360
a_to = a_to % 360
min_rot = min(a_from, a_to)
max_rot = max(a_from, a_to)
rot_across_0 = (360 - max_rot) + min_rot
rot_not_across_0 = max_rot - min_rot
rot_err = min(rot_across_0, rot_not_across_0)
if rot_across_0 == rot_err:
rot_err *= -1 if a_to > a_from else 1
else:
rot_err *= 1 if a_to > a_from else -1
return rot_err
def agent_xz_to_scene_xz(self, agent_xz: Dict[str, float]) -> Dict[str, float]:
agent_pos = self.get_agent_location()
x_rel_agent = agent_xz["x"]
z_rel_agent = agent_xz["z"]
scene_x = agent_pos["x"]
scene_z = agent_pos["z"]
rotation = agent_pos["rotation"]
if abs(rotation) < 1e-5:
scene_x += x_rel_agent
scene_z += z_rel_agent
elif abs(rotation - 90) < 1e-5:
scene_x += z_rel_agent
scene_z += -x_rel_agent
elif abs(rotation - 180) < 1e-5:
scene_x += -x_rel_agent
scene_z += -z_rel_agent
elif abs(rotation - 270) < 1e-5:
scene_x += -z_rel_agent
scene_z += x_rel_agent
else:
raise Exception("Rotation must be one of 0, 90, 180, or 270.")
return {"x": scene_x, "z": scene_z}
def scene_xz_to_agent_xz(self, scene_xz: Dict[str, float]) -> Dict[str, float]:
agent_pos = self.get_agent_location()
x_err = scene_xz["x"] - agent_pos["x"]
z_err = scene_xz["z"] - agent_pos["z"]
rotation = agent_pos["rotation"]
if abs(rotation) < 1e-5:
agent_x = x_err
agent_z = z_err
elif abs(rotation - 90) < 1e-5:
agent_x = -z_err
agent_z = x_err
elif abs(rotation - 180) < 1e-5:
agent_x = -x_err
agent_z = -z_err
elif abs(rotation - 270) < 1e-5:
agent_x = z_err
agent_z = -x_err
else:
raise Exception("Rotation must be one of 0, 90, 180, or 270.")
return {"x": agent_x, "z": agent_z}
def all_objects(self) -> List[Dict[str, Any]]:
"""Return all object metadata."""
return self.controller.last_event.metadata["objects"]
def all_objects_with_properties(
self, properties: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""Find all objects with the given properties."""
objects = []
for o in self.all_objects():
satisfies_all = True
for k, v in properties.items():
if o[k] != v:
satisfies_all = False
break
if satisfies_all:
objects.append(o)
return objects
def visible_objects(self) -> List[Dict[str, Any]]:
"""Return all visible objects."""
return self.all_objects_with_properties({"visible": True})
def get_object_by_id(self, object_id: str) -> Optional[Dict[str, Any]]:
for o in self.last_event.metadata["objects"]:
if o["objectId"] == object_id:
return o
return None
###
# Following is used for computing shortest paths between states
###
_CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}
GRAPH_ACTIONS_SET = {"LookUp", "LookDown", "RotateLeft", "RotateRight", "MoveAhead"}
def reachable_points_with_rotations_and_horizons(self):
self.controller.step({"action": "GetReachablePositions"})
assert self.last_action_success
points_slim = self.last_event.metadata["actionReturn"]
points = []
for r in [0, 90, 180, 270]:
for horizon in [-30, 0, 30, 60]:
for p in points_slim:
p = copy.copy(p)
p["rotation"] = r
p["horizon"] = horizon
points.append(p)
return points
@staticmethod
def location_for_key(key, y_value=0.0):
x, z, rot, hor = key
loc = dict(x=x, y=y_value, z=z, rotation=rot, horizon=hor)
return loc
@staticmethod
def get_key(input_dict: Dict[str, Any]) -> Tuple[float, float, int, int]:
if "x" in input_dict:
x = input_dict["x"]
z = input_dict["z"]
rot = input_dict["rotation"]
hor = input_dict["horizon"]
else:
x = input_dict["position"]["x"]
z = input_dict["position"]["z"]
rot = input_dict["rotation"]["y"]
hor = input_dict["cameraHorizon"]
return (
round(x, 2),
round(z, 2),
round_to_factor(rot, 90) % 360,
round_to_factor(hor, 30) % 360,
)
def update_graph_with_failed_action(self, failed_action: str):
if (
self.scene_name not in self._CACHED_GRAPHS
or failed_action not in self.GRAPH_ACTIONS_SET
):
return
source_key = self.get_key(self.last_event.metadata["agent"])
self._check_contains_key(source_key)
edge_dict = self.graph[source_key]
to_remove_key = None
for target_key in self.graph[source_key]:
if edge_dict[target_key]["action"] == failed_action:
to_remove_key = target_key
break
if to_remove_key is not None:
self.graph.remove_edge(source_key, to_remove_key)
def _add_from_to_edge(
self,
g: nx.DiGraph,
s: Tuple[float, float, int, int],
t: Tuple[float, float, int, int],
):
def ae(x, y):
return abs(x - y) < 0.001
s_x, s_z, s_rot, s_hor = s
t_x, t_z, t_rot, t_hor = t
dist = round(math.sqrt((s_x - t_x) ** 2 + (s_z - t_z) ** 2), 2)
angle_dist = (round_to_factor(t_rot - s_rot, 90) % 360) // 90
horz_dist = (round_to_factor(t_hor - s_hor, 30) % 360) // 30
# If source and target differ by more than one action, continue
if sum(x != 0 for x in [dist, angle_dist, horz_dist]) != 1:
return
grid_size = self._grid_size
action = None
if angle_dist != 0:
if angle_dist == 1:
action = "RotateRight"
elif angle_dist == 3:
action = "RotateLeft"
elif horz_dist != 0:
if horz_dist == 11:
action = "LookUp"
elif horz_dist == 1:
action = "LookDown"
elif ae(dist, grid_size):
if (
(s_rot == 0 and ae(t_z - s_z, grid_size))
or (s_rot == 90 and ae(t_x - s_x, grid_size))
or (s_rot == 180 and ae(t_z - s_z, -grid_size))
or (s_rot == 270 and ae(t_x - s_x, -grid_size))
):
g.add_edge(s, t, action="MoveAhead")
if action is not None:
g.add_edge(s, t, action=action)
@functools.lru_cache(1)
def possible_neighbor_offsets(self) -> Tuple[Tuple[float, float, int, int], ...]:
grid_size = round(self._grid_size, 2)
offsets = []
for rot_diff in [-90, 0, 90]:
for horz_diff in [-30, 0, 30, 60]:
for x_diff in [-grid_size, 0, grid_size]:
for z_diff in [-grid_size, 0, grid_size]:
if (rot_diff != 0) + (horz_diff != 0) + (x_diff != 0) + (
z_diff != 0
) == 1:
offsets.append((x_diff, z_diff, rot_diff, horz_diff))
return tuple(offsets)
def _add_node_to_graph(self, graph: nx.DiGraph, s: Tuple[float, float, int, int]):
if s in graph:
return
existing_nodes = set(graph.nodes())
graph.add_node(s)
for o in self.possible_neighbor_offsets():
t = (s[0] + o[0], s[1] + o[1], s[2] + o[2], s[3] + o[3])
if t in existing_nodes:
self._add_from_to_edge(graph, s, t)
self._add_from_to_edge(graph, t, s)
@property
def graph(self):
if self.scene_name not in self._CACHED_GRAPHS:
g = nx.DiGraph()
points = self.reachable_points_with_rotations_and_horizons()
for p in points:
self._add_node_to_graph(g, self.get_key(p))
self._CACHED_GRAPHS[self.scene_name] = g
return self._CACHED_GRAPHS[self.scene_name]
@graph.setter
def graph(self, g):
self._CACHED_GRAPHS[self.scene_name] = g
def _check_contains_key(self, key: Tuple[float, float, int, int], add_if_not=True):
if key not in self.graph:
get_logger().warning(
"{} was not in the graph for scene {}.".format(key, self.scene_name)
)
if add_if_not:
self._add_node_to_graph(self.graph, key)
def shortest_state_path(self, source_state_key, goal_state_key):
self._check_contains_key(source_state_key)
self._check_contains_key(goal_state_key)
# noinspection PyBroadException
try:
path = nx.shortest_path(self.graph, source_state_key, goal_state_key)
return path
except Exception as _:
return None
def action_transitioning_between_keys(self, s, t):
self._check_contains_key(s)
self._check_contains_key(t)
if self.graph.has_edge(s, t):
return self.graph.get_edge_data(s, t)["action"]
else:
return None
def shortest_path_next_state(self, source_state_key, goal_state_key):
self._check_contains_key(source_state_key)
self._check_contains_key(goal_state_key)
if source_state_key == goal_state_key:
raise RuntimeError("called next state on the same source and goal state")
state_path = self.shortest_state_path(source_state_key, goal_state_key)
return state_path[1]
def shortest_path_next_action(self, source_state_key, goal_state_key):
self._check_contains_key(source_state_key)
self._check_contains_key(goal_state_key)
next_state_key = self.shortest_path_next_state(source_state_key, goal_state_key)
return self.graph.get_edge_data(source_state_key, next_state_key)["action"]
def shortest_path_length(self, source_state_key, goal_state_key):
self._check_contains_key(source_state_key)
self._check_contains_key(goal_state_key)
try:
return nx.shortest_path_length(self.graph, source_state_key, goal_state_key)
except nx.NetworkXNoPath as _:
return float("inf")
================================================
FILE: allenact_plugins/ithor_plugin/ithor_sensors.py
================================================
import copy
from functools import reduce
from typing import Any, Dict, Optional, Union, Sequence
import ai2thor.controller
import gym
import gym.spaces
import numpy as np
import torch
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.embodiedai.mapping.mapping_utils.map_builders import (
BinnedPointCloudMapBuilder,
SemanticMapBuilder,
ObjectHull2d,
)
from allenact.embodiedai.sensors.vision_sensors import RGBSensor
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
from allenact_plugins.ithor_plugin.ithor_util import include_object_data
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask, ObjectNavTask
THOR_ENV_TYPE = Union[
ai2thor.controller.Controller, IThorEnvironment, RoboThorEnvironment
]
THOR_TASK_TYPE = Union[
Task[ai2thor.controller.Controller],
Task[IThorEnvironment],
Task[RoboThorEnvironment],
]
class RGBSensorThor(RGBSensor[THOR_ENV_TYPE, THOR_TASK_TYPE]):
"""Sensor for RGB images in THOR.
Returns from a running IThorEnvironment instance, the current RGB
frame corresponding to the agent's egocentric view.
"""
def frame_from_env(
self,
env: THOR_ENV_TYPE,
task: Optional[THOR_TASK_TYPE],
) -> np.ndarray: # type:ignore
if isinstance(env, ai2thor.controller.Controller):
return env.last_event.frame.copy()
else:
return env.current_frame.copy()
class GoalObjectTypeThorSensor(Sensor):
def __init__(
self,
object_types: Sequence[str],
target_to_detector_map: Optional[Dict[str, str]] = None,
detector_types: Optional[Sequence[str]] = None,
uuid: str = "goal_object_type_ind",
**kwargs: Any,
):
self.ordered_object_types = list(object_types)
assert self.ordered_object_types == sorted(
self.ordered_object_types
), "object types input to goal object type sensor must be ordered"
self.target_to_detector_map = target_to_detector_map
if target_to_detector_map is None:
self.object_type_to_ind = {
ot: i for i, ot in enumerate(self.ordered_object_types)
}
else:
assert (
detector_types is not None
), "Missing detector_types for map {}".format(target_to_detector_map)
self.target_to_detector = target_to_detector_map
self.detector_types = detector_types
detector_index = {ot: i for i, ot in enumerate(self.detector_types)}
self.object_type_to_ind = {
ot: detector_index[self.target_to_detector[ot]]
for ot in self.ordered_object_types
}
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self):
if self.target_to_detector_map is None:
return gym.spaces.Discrete(len(self.ordered_object_types))
else:
return gym.spaces.Discrete(len(self.detector_types))
def get_observation(
self,
env: IThorEnvironment,
task: Optional[ObjectNaviThorGridTask],
*args: Any,
**kwargs: Any,
) -> Any:
return self.object_type_to_ind[task.task_info["object_type"]]
class TakeEndActionThorNavSensor(
Sensor[
Union[RoboThorEnvironment, IThorEnvironment],
Union[ObjectNaviThorGridTask, ObjectNavTask, PointNavTask],
]
):
def __init__(self, nactions: int, uuid: str, **kwargs: Any) -> None:
self.nactions = nactions
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self) -> gym.spaces.Discrete:
"""The observation space.
Equals `gym.spaces.Discrete(2)` where a 0 indicates that the agent
**should not** take the `End` action and a 1 indicates that the agent
**should** take the end action.
"""
return gym.spaces.Discrete(2)
def get_observation( # type:ignore
self,
env: IThorEnvironment,
task: Union[ObjectNaviThorGridTask, ObjectNavTask, PointNavTask],
*args,
**kwargs,
) -> np.ndarray:
if isinstance(task, ObjectNaviThorGridTask):
should_end = task.is_goal_object_visible()
elif isinstance(task, ObjectNavTask):
should_end = task._is_goal_in_range()
elif isinstance(task, PointNavTask):
should_end = task._is_goal_in_range()
else:
raise NotImplementedError
if should_end is None:
should_end = False
return np.array([1 * should_end], dtype=np.int64)
class RelativePositionChangeTHORSensor(
Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]
):
def __init__(self, uuid: str = "rel_position_change", **kwargs: Any):
observation_space = gym.spaces.Dict(
{
"last_allocentric_position": gym.spaces.Box(
low=np.array([-np.inf, -np.inf, 0], dtype=np.float32),
high=np.array([np.inf, np.inf, 360], dtype=np.float32),
shape=(3,),
dtype=np.float32,
),
"dx_dz_dr": gym.spaces.Box(
low=np.array([-np.inf, -np.inf, -360], dtype=np.float32),
high=np.array([-np.inf, -np.inf, 360], dtype=np.float32),
shape=(3,),
dtype=np.float32,
),
}
)
super().__init__(**prepare_locals_for_super(locals()))
self.last_xzr: Optional[np.ndarray] = None
@staticmethod
def get_relative_position_change(from_xzr: np.ndarray, to_xzr: np.ndarray):
dx_dz_dr = to_xzr - from_xzr
# Transform dx, dz (in global coordinates) into the relative coordinates
# given by rotation r0=from_xzr[-2]. This requires rotating everything so that
# r0 is facing in the positive z direction. Since thor rotations are negative
# the usual rotation direction this means we want to rotate by r0 degrees.
theta = np.pi * from_xzr[-1] / 180
cos_theta = np.cos(theta)
sin_theta = np.sin(theta)
dx_dz_dr = (
np.array(
[
[cos_theta, -sin_theta, 0],
[sin_theta, cos_theta, 0],
[0, 0, 1], # Don't change dr
]
)
@ dx_dz_dr.reshape(-1, 1)
).reshape(-1)
dx_dz_dr[-1] = dx_dz_dr[-1] % 360
return dx_dz_dr
def get_observation(
self,
env: RoboThorEnvironment,
task: Optional[Task[RoboThorEnvironment]],
*args: Any,
**kwargs: Any,
) -> Any:
if task.num_steps_taken() == 0:
p = env.controller.last_event.metadata["agent"]["position"]
r = env.controller.last_event.metadata["agent"]["rotation"]["y"]
self.last_xzr = np.array([p["x"], p["z"], r % 360])
p = env.controller.last_event.metadata["agent"]["position"]
r = env.controller.last_event.metadata["agent"]["rotation"]["y"]
current_xzr = np.array([p["x"], p["z"], r % 360])
dx_dz_dr = self.get_relative_position_change(
from_xzr=self.last_xzr, to_xzr=current_xzr
)
to_return = {"last_allocentric_position": self.last_xzr, "dx_dz_dr": dx_dz_dr}
self.last_xzr = current_xzr
return to_return
class ReachableBoundsTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):
def __init__(self, margin: float, uuid: str = "scene_bounds", **kwargs: Any):
observation_space = gym.spaces.Dict(
{
"x_range": gym.spaces.Box(
low=np.array([-np.inf, -np.inf], dtype=np.float32),
high=np.array([np.inf, np.inf], dtype=np.float32),
shape=(2,),
dtype=np.float32,
),
"z_range": gym.spaces.Box(
low=np.array([-np.inf, -np.inf], dtype=np.float32),
high=np.array([np.inf, np.inf], dtype=np.float32),
shape=(2,),
dtype=np.float32,
),
}
)
super().__init__(**prepare_locals_for_super(locals()))
self.margin = margin
self._bounds_cache = {}
@staticmethod
def get_bounds(
controller: ai2thor.controller.Controller,
margin: float,
) -> Dict[str, np.ndarray]:
positions = controller.step("GetReachablePositions").metadata["actionReturn"]
min_x = min(p["x"] for p in positions)
max_x = max(p["x"] for p in positions)
min_z = min(p["z"] for p in positions)
max_z = max(p["z"] for p in positions)
return {
"x_range": np.array([min_x - margin, max_x + margin]),
"z_range": np.array([min_z - margin, max_z + margin]),
}
def get_observation(
self,
env: RoboThorEnvironment,
task: Optional[Task[RoboThorEnvironment]],
*args: Any,
**kwargs: Any,
) -> Any:
if isinstance(env, ai2thor.controller.Controller):
controller = env
else:
controller = env.controller
scene_name = controller.last_event.metadata["sceneName"]
if scene_name not in self._bounds_cache:
self._bounds_cache[scene_name] = self.get_bounds(
controller=controller, margin=self.margin
)
return copy.deepcopy(self._bounds_cache[scene_name])
class SceneBoundsTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):
def __init__(self, uuid: str = "scene_bounds", **kwargs: Any):
observation_space = gym.spaces.Dict(
{
"x_range": gym.spaces.Box(
low=np.array([-np.inf, -np.inf]),
high=np.array([np.inf, np.inf]),
shape=(2,),
dtype=np.float32,
),
"z_range": gym.spaces.Box(
low=np.array([-np.inf, -np.inf]),
high=np.array([np.inf, np.inf]),
shape=(2,),
dtype=np.float32,
),
}
)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation(
self,
env: RoboThorEnvironment,
task: Optional[Task[RoboThorEnvironment]],
*args: Any,
**kwargs: Any,
) -> Any:
scene_bounds = env.controller.last_event.metadata["sceneBounds"]
center = scene_bounds["center"]
size = scene_bounds["size"]
return {
"x_range": np.array(
[center["x"] - size["x"] / 2, center["x"] + size["x"] / 2]
),
"z_range": np.array(
[center["z"] - size["z"] / 2, center["z"] + size["z"] / 2]
),
}
class BinnedPointCloudMapTHORSensor(
Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]
):
observation_space = gym.spaces.Dict
def __init__(
self,
fov: Optional[float],
vision_range_in_cm: int,
map_size_in_cm: int,
resolution_in_cm: int,
map_range_sensor: Sensor,
return_egocentric_local_context: bool = False,
height_bins: Sequence[float] = (0.02, 2),
ego_only: bool = True,
exclude_agent: bool = False,
uuid: str = "binned_pc_map",
device: torch.device = torch.device("cpu"),
**kwargs: Any,
):
self.fov = fov
self.vision_range_in_cm = vision_range_in_cm
self.map_size_in_cm = map_size_in_cm
self.resolution_in_cm = resolution_in_cm
self.height_bins = height_bins
self.ego_only = ego_only
self.return_egocentric_local_context = return_egocentric_local_context
self.exclude_agent = exclude_agent
self.binned_pc_map_builder = BinnedPointCloudMapBuilder(
fov=fov,
vision_range_in_cm=vision_range_in_cm,
map_size_in_cm=map_size_in_cm,
resolution_in_cm=resolution_in_cm,
height_bins=height_bins,
return_egocentric_local_context=return_egocentric_local_context,
)
self.device = device
big_map_space = gym.spaces.Box(
low=0,
high=np.inf,
shape=self.binned_pc_map_builder.binned_point_cloud_map.shape,
dtype=np.float32,
)
local_map_space = gym.spaces.Box(
low=0,
high=np.inf,
shape=(self.binned_pc_map_builder.vision_range_in_map_units,) * 2
+ self.binned_pc_map_builder.binned_point_cloud_map.shape[-1:],
dtype=np.float32,
)
space_dict = {
"egocentric_update": local_map_space,
}
if self.return_egocentric_local_context:
space_dict = {
"egocentric_local_context": copy.deepcopy(local_map_space),
}
if not ego_only:
space_dict["allocentric_update"] = copy.deepcopy(big_map_space)
space_dict["map"] = copy.deepcopy(big_map_space)
observation_space = gym.spaces.Dict(space_dict)
super().__init__(**prepare_locals_for_super(locals()))
self.map_range_sensor = map_range_sensor
@property
def device(self):
return self.binned_pc_map_builder.device
@device.setter
def device(self, val: torch.device):
self.binned_pc_map_builder.device = torch.device(val)
def get_observation(
self,
env: RoboThorEnvironment,
task: Optional[Task[RoboThorEnvironment]],
*args: Any,
**kwargs: Any,
) -> Any:
if isinstance(env, ai2thor.controller.Controller):
controller = env
else:
controller = env.controller
e = controller.last_event
metadata = e.metadata
if task.num_steps_taken() == 0:
xz_ranges_dict = self.map_range_sensor.get_observation(env=env, task=task)
if self.fov is None:
self.binned_pc_map_builder.fov = e.metadata["fov"]
self.binned_pc_map_builder.reset(
min_xyz=np.array(
[
xz_ranges_dict["x_range"][0],
0, # TODO: Should y be different per scene?
xz_ranges_dict["z_range"][0],
]
)
)
depth_frame = e.depth_frame
if self.exclude_agent:
depth_frame = depth_frame.copy()
assert len(e.instance_masks) > 0
depth_frame[~reduce(np.logical_or, e.instance_masks.values())] = np.nan
map_dict = self.binned_pc_map_builder.update(
depth_frame=depth_frame,
camera_xyz=np.array(
[metadata["cameraPosition"][k] for k in ["x", "y", "z"]]
),
camera_rotation=metadata["agent"]["rotation"]["y"],
camera_horizon=metadata["agent"]["cameraHorizon"],
)
return {k: map_dict[k] for k in self.observation_space.spaces.keys()}
class SemanticMapTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):
observation_space = gym.spaces.Dict
def __init__(
self,
fov: float,
vision_range_in_cm: int,
map_size_in_cm: int,
resolution_in_cm: int,
ordered_object_types: Sequence[str],
map_range_sensor: Sensor,
ego_only: bool = True,
uuid: str = "semantic_map",
device: torch.device = torch.device("cpu"),
**kwargs: Any,
):
self.fov = fov
self.vision_range_in_cm = vision_range_in_cm
self.map_size_in_cm = map_size_in_cm
self.resolution_in_cm = resolution_in_cm
self.ordered_object_types = ordered_object_types
self.map_range_sensor = map_range_sensor
self.ego_only = ego_only
self.semantic_map_builder = SemanticMapBuilder(
fov=fov,
vision_range_in_cm=vision_range_in_cm,
map_size_in_cm=map_size_in_cm,
resolution_in_cm=resolution_in_cm,
ordered_object_types=ordered_object_types,
device=device,
)
def get_map_space(nchannels: int, size: int):
return gym.spaces.Box(
low=0,
high=1,
shape=(size, size, nchannels),
dtype=np.bool_,
)
n = len(self.ordered_object_types)
small = self.vision_range_in_cm // self.resolution_in_cm
big = self.semantic_map_builder.ground_truth_semantic_map.shape[0]
space_dict = {
"egocentric_update": get_map_space(
nchannels=n,
size=small,
),
"egocentric_mask": get_map_space(
nchannels=1,
size=small,
),
}
if not ego_only:
space_dict["explored_mask"] = get_map_space(
nchannels=1,
size=big,
)
space_dict["map"] = get_map_space(
nchannels=n,
size=big,
)
observation_space = gym.spaces.Dict(space_dict)
super().__init__(**prepare_locals_for_super(locals()))
@property
def device(self):
return self.semantic_map_builder.device
@device.setter
def device(self, val: torch.device):
self.semantic_map_builder.device = torch.device(val)
def get_observation(
self,
env: RoboThorEnvironment,
task: Optional[Task[RoboThorEnvironment]],
*args: Any,
**kwargs: Any,
) -> Any:
with include_object_data(env.controller):
last_event = env.controller.last_event
metadata = last_event.metadata
if task.num_steps_taken() == 0:
env.controller.step(
"Get2DSemanticHulls", objectTypes=self.ordered_object_types
)
assert env.last_event.metadata[
"lastActionSuccess"
], f"Get2DSemanticHulls failed with error '{env.last_event.metadata['lastActionSuccess']}'"
object_id_to_hull = env.controller.last_event.metadata["actionReturn"]
xz_ranges_dict = self.map_range_sensor.get_observation(
env=env, task=task
)
self.semantic_map_builder.reset(
min_xyz=np.array(
[
xz_ranges_dict["x_range"][0],
0, # TODO: Should y be different per scene?
xz_ranges_dict["z_range"][0],
]
),
object_hulls=[
ObjectHull2d(
object_id=o["objectId"],
object_type=o["objectType"],
hull_points=object_id_to_hull[o["objectId"]],
)
for o in metadata["objects"]
if o["objectId"] in object_id_to_hull
],
)
map_dict = self.semantic_map_builder.update(
depth_frame=last_event.depth_frame,
camera_xyz=np.array(
[metadata["cameraPosition"][k] for k in ["x", "y", "z"]]
),
camera_rotation=metadata["agent"]["rotation"]["y"],
camera_horizon=metadata["agent"]["cameraHorizon"],
)
return {
k: map_dict[k] > 0.001 if map_dict[k].dtype != np.bool_ else map_dict[k]
for k in self.observation_space.spaces.keys()
}
================================================
FILE: allenact_plugins/ithor_plugin/ithor_task_samplers.py
================================================
import copy
import random
from typing import List, Dict, Optional, Any, Union, cast
import gym
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import set_deterministic_cudnn, set_seed
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
class ObjectNavTaskSampler(TaskSampler):
def __init__(
self,
scenes: List[str],
object_types: str,
sensors: List[Sensor],
max_steps: int,
env_args: Dict[str, Any],
action_space: gym.Space,
scene_period: Optional[Union[int, str]] = None,
max_tasks: Optional[int] = None,
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
**kwargs,
) -> None:
self.env_args = env_args
self.scenes = scenes
self.object_types = object_types
self.grid_size = 0.25
self.env: Optional[IThorEnvironment] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_space = action_space
self.scene_counter: Optional[int] = None
self.scene_order: Optional[List[str]] = None
self.scene_id: Optional[int] = None
self.scene_period: Optional[Union[str, int]] = (
scene_period # default makes a random choice
)
self.max_tasks: Optional[int] = None
self.reset_tasks = max_tasks
self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None
self.seed: Optional[int] = None
self.set_seed(seed)
if deterministic_cudnn:
set_deterministic_cudnn()
self.reset()
def _create_environment(self) -> IThorEnvironment:
env = IThorEnvironment(
make_agents_visible=False,
object_open_speed=0.05,
restrict_to_initially_reachable_points=True,
**self.env_args,
)
return env
@property
def length(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled. Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
@property
def total_unique(self) -> Optional[Union[int, float]]:
return None
@property
def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]:
return self._last_sampled_task
def close(self) -> None:
if self.env is not None:
self.env.stop()
@property
def all_observation_spaces_equal(self) -> bool:
"""Check if observation spaces equal.
# Returns
True if all Tasks that can be sampled by this sampler have the
same observation space. Otherwise False.
"""
return True
def sample_scene(self, force_advance_scene: bool):
if force_advance_scene:
if self.scene_period != "manual":
get_logger().warning(
"When sampling scene, have `force_advance_scene == True`"
"but `self.scene_period` is not equal to 'manual',"
"this may cause unexpected behavior."
)
self.scene_id = (1 + self.scene_id) % len(self.scenes)
if self.scene_id == 0:
random.shuffle(self.scene_order)
if self.scene_period is None:
# Random scene
self.scene_id = random.randint(0, len(self.scenes) - 1)
elif self.scene_period == "manual":
pass
elif self.scene_counter >= cast(int, self.scene_period):
if self.scene_id == len(self.scene_order) - 1:
# Randomize scene order for next iteration
random.shuffle(self.scene_order)
# Move to next scene
self.scene_id = 0
else:
# Move to next scene
self.scene_id += 1
# Reset scene counter
self.scene_counter = 1
elif isinstance(self.scene_period, int):
# Stay in current scene
self.scene_counter += 1
else:
raise NotImplementedError(
"Invalid scene_period {}".format(self.scene_period)
)
if self.max_tasks is not None:
self.max_tasks -= 1
return self.scenes[int(self.scene_order[self.scene_id])]
def next_task(
self, force_advance_scene: bool = False
) -> Optional[ObjectNaviThorGridTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
return None
scene = self.sample_scene(force_advance_scene)
if self.env is not None:
if scene.replace("_physics", "") != self.env.scene_name.replace(
"_physics", ""
):
self.env.reset(scene)
else:
self.env = self._create_environment()
self.env.reset(scene_name=scene)
pose = self.env.randomize_agent_location()
object_types_in_scene = set(
[o["objectType"] for o in self.env.last_event.metadata["objects"]]
)
task_info: Dict[str, Any] = {}
for ot in random.sample(self.object_types, len(self.object_types)):
if ot in object_types_in_scene:
task_info["object_type"] = ot
break
if len(task_info) == 0:
get_logger().warning(
"Scene {} does not contain any"
" objects of any of the types {}.".format(scene, self.object_types)
)
task_info["start_pose"] = copy.copy(pose)
task_info["id"] = (
f"{scene}__{'_'.join(list(map(str, self.env.get_key(pose))))}__{task_info['object_type']}"
)
self._last_sampled_task = ObjectNaviThorGridTask(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
)
return self._last_sampled_task
def reset(self):
self.scene_counter = 0
self.scene_order = list(range(len(self.scenes)))
random.shuffle(self.scene_order)
self.scene_id = 0
self.max_tasks = self.reset_tasks
def set_seed(self, seed: int):
self.seed = seed
if seed is not None:
set_seed(seed)
================================================
FILE: allenact_plugins/ithor_plugin/ithor_tasks.py
================================================
import random
from typing import Dict, Tuple, List, Any, Optional, Union, Sequence, cast
import gym
import numpy as np
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_constants import (
MOVE_AHEAD,
ROTATE_LEFT,
ROTATE_RIGHT,
LOOK_DOWN,
LOOK_UP,
END,
)
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.ithor_plugin.ithor_util import round_to_factor
class ObjectNaviThorGridTask(Task[IThorEnvironment]):
"""Defines the object navigation task in AI2-THOR.
In object navigation an agent is randomly initialized into an AI2-THOR scene and must
find an object of a given type (e.g. tomato, television, etc). An object is considered
found if the agent takes an `End` action and the object is visible to the agent (see
[here](https://ai2thor.allenai.org/documentation/concepts) for a definition of visibiliy
in AI2-THOR).
The actions available to an agent in this task are:
1. Move ahead
* Moves agent ahead by 0.25 meters.
1. Rotate left / rotate right
* Rotates the agent by 90 degrees counter-clockwise / clockwise.
1. Look down / look up
* Changes agent view angle by 30 degrees up or down. An agent cannot look more than 30
degrees above horizontal or less than 60 degrees below horizontal.
1. End
* Ends the task and the agent receives a positive reward if the object type is visible to the agent,
otherwise it receives a negative reward.
# Attributes
env : The ai2thor environment.
sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer.
task_info : The task info. Must contain a field "object_type" that specifies, as a string,
the goal object type.
max_steps : The maximum number of steps an agent can take an in the task before it is considered failed.
observation_space: The observation space returned on each step from the sensors.
"""
_actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END)
_CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE: Dict[
Tuple[str, str], List[Tuple[float, float, int, int]]
] = {}
def __init__(
self,
env: IThorEnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
**kwargs,
) -> None:
"""Initializer.
See class documentation for parameter definitions.
"""
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self._took_end_action: bool = False
self._success: Optional[bool] = False
self._subsampled_locations_from_which_obj_visible: Optional[
List[Tuple[float, float, int, int]]
] = None
self.task_info["followed_path"] = [self.env.get_agent_location()]
self.task_info["action_names"] = self.class_action_names()
@property
def action_space(self):
return gym.spaces.Discrete(len(self._actions))
def reached_terminal_state(self) -> bool:
return self._took_end_action
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return cls._actions
def close(self) -> None:
self.env.stop()
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
action_str = self.class_action_names()[action]
if action_str == END:
self._took_end_action = True
self._success = self.is_goal_object_visible()
self.last_action_success = self._success
else:
self.env.step({"action": action_str})
self.last_action_success = self.env.last_action_success
if (
not self.last_action_success
) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None:
self.env.update_graph_with_failed_action(failed_action=action_str)
self.task_info["followed_path"].append(self.env.get_agent_location())
step_result = RLStepResult(
observation=self.get_observations(),
reward=self.judge(),
done=self.is_done(),
info={"last_action_success": self.last_action_success},
)
return step_result
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
assert mode == "rgb", "only rgb rendering is implemented"
return self.env.current_frame
def is_goal_object_visible(self) -> bool:
"""Is the goal object currently visible?"""
return any(
o["objectType"] == self.task_info["object_type"]
for o in self.env.visible_objects()
)
def judge(self) -> float:
"""Compute the reward after having taken a step."""
reward = -0.01
if not self.last_action_success:
reward += -0.03
if self._took_end_action:
reward += 1.0 if self._success else -1.0
return float(reward)
def metrics(self) -> Dict[str, Any]:
if not self.is_done():
return {}
else:
return {
"success": self._success,
**super(ObjectNaviThorGridTask, self).metrics(),
}
def query_expert(self, **kwargs) -> Tuple[int, bool]:
target = self.task_info["object_type"]
if self.is_goal_object_visible():
return self.class_action_names().index(END), True
else:
key = (self.env.scene_name, target)
if self._subsampled_locations_from_which_obj_visible is None:
if key not in self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE:
obj_ids: List[str] = []
obj_ids.extend(
o["objectId"]
for o in self.env.last_event.metadata["objects"]
if o["objectType"] == target
)
assert len(obj_ids) != 0, "No objects to get an expert path to."
locations_from_which_object_is_visible: List[
Tuple[float, float, int, int]
] = []
y = self.env.last_event.metadata["agent"]["position"]["y"]
positions_to_check_interactionable_from = [
{"x": x, "y": y, "z": z}
for x, z in set((x, z) for x, z, _, _ in self.env.graph.nodes)
]
for obj_id in set(obj_ids):
self.env.controller.step(
{
"action": "PositionsFromWhichItemIsInteractable",
"objectId": obj_id,
"positions": positions_to_check_interactionable_from,
}
)
assert (
self.env.last_action_success
), "Could not get positions from which item was interactable."
returned = self.env.last_event.metadata["actionReturn"]
locations_from_which_object_is_visible.extend(
(
round(x, 2),
round(z, 2),
round_to_factor(rot, 90) % 360,
round_to_factor(hor, 30) % 360,
)
for x, z, rot, hor, standing in zip(
returned["x"],
returned["z"],
returned["rotation"],
returned["horizon"],
returned["standing"],
)
if standing == 1
)
self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key] = (
locations_from_which_object_is_visible
)
self._subsampled_locations_from_which_obj_visible = (
self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key]
)
if len(self._subsampled_locations_from_which_obj_visible) > 5:
self._subsampled_locations_from_which_obj_visible = random.sample(
self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key], 5
)
current_loc_key = self.env.get_key(self.env.last_event.metadata["agent"])
paths = []
for goal_key in self._subsampled_locations_from_which_obj_visible:
path = self.env.shortest_state_path(
source_state_key=current_loc_key, goal_state_key=goal_key
)
if path is not None:
paths.append(path)
if len(paths) == 0:
return 0, False
shortest_path_ind = int(np.argmin([len(p) for p in paths]))
if len(paths[shortest_path_ind]) == 1:
get_logger().warning(
"Shortest path computations suggest we are at the target but episode does not think so."
)
return 0, False
next_key_on_shortest_path = paths[shortest_path_ind][1]
return (
self.class_action_names().index(
self.env.action_transitioning_between_keys(
current_loc_key, next_key_on_shortest_path
)
),
True,
)
================================================
FILE: allenact_plugins/ithor_plugin/ithor_util.py
================================================
import glob
import math
import os
import platform
import traceback
import warnings
from contextlib import contextmanager
from typing import Sequence
import Xlib
import Xlib.display
import ai2thor.controller
@contextmanager
def include_object_data(controller: ai2thor.controller.Controller):
needs_reset = len(controller.last_event.metadata["objects"]) == 0
try:
if needs_reset:
controller.step("ResetObjectFilter")
assert controller.last_event.metadata["lastActionSuccess"]
yield None
finally:
if needs_reset:
controller.step("SetObjectFilter", objectIds=[])
assert controller.last_event.metadata["lastActionSuccess"]
def vertical_to_horizontal_fov(
vertical_fov_in_degrees: float, height: float, width: float
):
assert 0 < vertical_fov_in_degrees < 180
aspect_ratio = width / height
vertical_fov_in_rads = (math.pi / 180) * vertical_fov_in_degrees
return (
(180 / math.pi)
* math.atan(math.tan(vertical_fov_in_rads * 0.5) * aspect_ratio)
* 2
)
def horizontal_to_vertical_fov(
horizontal_fov_in_degrees: float, height: float, width: float
):
return vertical_to_horizontal_fov(
vertical_fov_in_degrees=horizontal_fov_in_degrees,
height=width,
width=height,
)
def round_to_factor(num: float, base: int) -> int:
"""Rounds floating point number to the nearest integer multiple of the
given base. E.g., for floating number 90.1 and integer base 45, the result
is 90.
# Attributes
num : floating point number to be rounded.
base: integer base
"""
return round(num / base) * base
def get_open_x_displays(throw_error_if_empty: bool = False) -> Sequence[str]:
assert platform.system() == "Linux", "Can only get X-displays for Linux systems."
displays = []
open_display_strs = [
os.path.basename(s)[1:] for s in glob.glob("/tmp/.X11-unix/X*")
]
for open_display_str in sorted(open_display_strs):
try:
open_display_str = str(int(open_display_str))
display = Xlib.display.Display(f":{open_display_str}")
except Exception:
warnings.warn(
f"Encountered error when attempting to open display :{open_display_str},"
f" error message:\n{traceback.format_exc()}"
)
continue
displays.extend(
[f"{open_display_str}.{i}" for i in range(display.screen_count())]
)
if throw_error_if_empty and len(displays) == 0:
raise IOError(
"Could not find any open X-displays on which to run AI2-THOR processes. "
" Please see the AI2-THOR installation instructions at"
" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin"
" for information as to how to start such displays."
)
return displays
================================================
FILE: allenact_plugins/ithor_plugin/ithor_viz.py
================================================
import copy
import json
import math
import os
from typing import Tuple, Sequence, Union, Dict, Optional, Any, cast, Generator, List
import colour as col
import cv2
import numpy as np
from PIL import Image, ImageDraw
from ai2thor.controller import Controller
from matplotlib import pyplot as plt
from matplotlib.figure import Figure
from allenact.utils.system import get_logger
from allenact.utils.viz_utils import TrajectoryViz
ITHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR = os.path.join(
os.path.expanduser("~"), ".allenact", "ithor", "top_down_viz_cache"
)
class ThorPositionTo2DFrameTranslator(object):
def __init__(
self,
frame_shape_rows_cols: Tuple[int, int],
cam_position: Sequence[float],
orth_size: float,
):
self.frame_shape = frame_shape_rows_cols
self.lower_left = np.array((cam_position[0], cam_position[2])) - orth_size
self.span = 2 * orth_size
def __call__(self, position: Sequence[float]):
if len(position) == 3:
x, _, z = position
else:
x, z = position
camera_position = (np.array((x, z)) - self.lower_left) / self.span
return np.array(
(
round(self.frame_shape[0] * (1.0 - camera_position[1])),
round(self.frame_shape[1] * camera_position[0]),
),
dtype=int,
)
class ThorViz(TrajectoryViz):
def __init__(
self,
path_to_trajectory: Sequence[str] = ("task_info", "followed_path"),
label: str = "thor_trajectory",
figsize: Tuple[float, float] = (8, 8), # width, height
fontsize: float = 10,
scenes: Union[Tuple[str, int, int], Sequence[Tuple[str, int, int]]] = (
("FloorPlan{}_physics", 1, 30),
("FloorPlan{}_physics", 201, 230),
("FloorPlan{}_physics", 301, 330),
("FloorPlan{}_physics", 401, 430),
),
viz_rows_cols: Tuple[int, int] = (448, 448),
single_color: bool = False,
view_triangle_only_on_last: bool = True,
disable_view_triangle: bool = False,
line_opacity: float = 1.0,
path_to_rot_degrees: Sequence[str] = ("rotation",),
**kwargs,
):
super().__init__(
path_to_trajectory=path_to_trajectory,
label=label,
figsize=figsize,
fontsize=fontsize,
path_to_rot_degrees=path_to_rot_degrees,
**kwargs,
)
if isinstance(scenes[0], str):
scenes = [cast(Tuple[str, int, int], scenes)] # make it list of tuples
self.scenes = cast(List[Tuple[str, int, int]], scenes)
self.room_path = ITHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR
os.makedirs(self.room_path, exist_ok=True)
self.viz_rows_cols = viz_rows_cols
self.single_color = single_color
self.view_triangle_only_on_last = view_triangle_only_on_last
self.disable_view_triangle = disable_view_triangle
self.line_opacity = line_opacity
# Only needed for rendering
self.map_data: Optional[Dict[str, Any]] = None
self.thor_top_downs: Optional[Dict[str, np.ndarray]] = None
self.controller: Optional[Controller] = None
def init_top_down_render(self):
self.map_data = self.get_translator()
self.thor_top_downs = self.make_top_down_views()
# No controller needed after this point
if self.controller is not None:
self.controller.stop()
self.controller = None
@staticmethod
def iterate_scenes(
all_scenes: Sequence[Tuple[str, int, int]]
) -> Generator[str, None, None]:
for scenes in all_scenes:
for wall in range(scenes[1], scenes[2] + 1):
roomname = scenes[0].format(wall)
yield roomname
def cached_map_data_path(self, roomname: str) -> str:
return os.path.join(self.room_path, "map_data__{}.json".format(roomname))
def get_translator(self) -> Dict[str, Any]:
# roomname = list(ThorViz.iterate_scenes(self.scenes))[0]
all_map_data = {}
for roomname in ThorViz.iterate_scenes(self.scenes):
json_file = self.cached_map_data_path(roomname)
if not os.path.exists(json_file):
self.make_controller()
self.controller.reset(roomname)
map_data = self.get_agent_map_data()
get_logger().info("Dumping {}".format(json_file))
with open(json_file, "w") as f:
json.dump(map_data, f, indent=4, sort_keys=True)
else:
with open(json_file, "r") as f:
map_data = json.load(f)
pos_translator = ThorPositionTo2DFrameTranslator(
self.viz_rows_cols,
self.position_to_tuple(map_data["cam_position"]),
map_data["cam_orth_size"],
)
map_data["pos_translator"] = pos_translator
all_map_data[roomname] = map_data
get_logger().debug("Using map_data {}".format(all_map_data))
return all_map_data
def cached_image_path(self, roomname: str) -> str:
return os.path.join(
self.room_path, "{}__r{}_c{}.png".format(roomname, *self.viz_rows_cols)
)
def make_top_down_views(self) -> Dict[str, np.ndarray]:
top_downs = {}
for roomname in self.iterate_scenes(self.scenes):
fname = self.cached_image_path(roomname)
if not os.path.exists(fname):
self.make_controller()
self.dump_top_down_view(roomname, fname)
top_downs[roomname] = cv2.imread(fname)
return top_downs
def crop_viz_image(self, viz_image: np.ndarray) -> np.ndarray:
y_min = int(self.viz_rows_cols[0] * 0)
y_max = int(self.viz_rows_cols[0] * 1)
# But it covers approximately the entire width:
x_min = 0
x_max = self.viz_rows_cols[1]
cropped_viz_image = viz_image[y_min:y_max, x_min:x_max, :]
return cropped_viz_image
def make_controller(self):
if self.controller is None:
self.controller = Controller()
self.controller.step({"action": "ChangeQuality", "quality": "Very High"})
self.controller.step(
{
"action": "ChangeResolution",
"x": self.viz_rows_cols[1],
"y": self.viz_rows_cols[0],
}
)
def get_agent_map_data(self):
self.controller.step({"action": "ToggleMapView"})
cam_position = self.controller.last_event.metadata["cameraPosition"]
cam_orth_size = self.controller.last_event.metadata["cameraOrthSize"]
to_return = {
"cam_position": cam_position,
"cam_orth_size": cam_orth_size,
}
self.controller.step({"action": "ToggleMapView"})
return to_return
@staticmethod
def position_to_tuple(position: Dict[str, float]) -> Tuple[float, float, float]:
return position["x"], position["y"], position["z"]
@staticmethod
def add_lines_to_map(
ps: Sequence[Any],
frame: np.ndarray,
pos_translator: ThorPositionTo2DFrameTranslator,
opacity: float,
color: Optional[Tuple[int, ...]] = None,
) -> np.ndarray:
if len(ps) <= 1:
return frame
if color is None:
color = (255, 0, 0)
img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA
opacity = int(round(255 * opacity)) # Define transparency for the triangle.
draw = ImageDraw.Draw(img2)
for i in range(len(ps) - 1):
draw.line(
tuple(reversed(pos_translator(ps[i])))
+ tuple(reversed(pos_translator(ps[i + 1]))),
fill=color + (opacity,),
width=int(frame.shape[0] / 100),
)
img = Image.alpha_composite(img1, img2)
return np.array(img.convert("RGB"))
@staticmethod
def add_line_to_map(
p0: Any,
p1: Any,
frame: np.ndarray,
pos_translator: ThorPositionTo2DFrameTranslator,
opacity: float,
color: Optional[Tuple[int, ...]] = None,
) -> np.ndarray:
if p0 == p1:
return frame
if color is None:
color = (255, 0, 0)
img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA
opacity = int(round(255 * opacity)) # Define transparency for the triangle.
draw = ImageDraw.Draw(img2)
draw.line(
tuple(reversed(pos_translator(p0))) + tuple(reversed(pos_translator(p1))),
fill=color + (opacity,),
width=int(frame.shape[0] / 100),
)
img = Image.alpha_composite(img1, img2)
return np.array(img.convert("RGB"))
@staticmethod
def add_agent_view_triangle(
position: Any,
rotation: float,
frame: np.ndarray,
pos_translator: ThorPositionTo2DFrameTranslator,
scale: float = 1.0,
opacity: float = 0.1,
) -> np.ndarray:
p0 = np.array((position[0], position[2]))
p1 = copy.copy(p0)
p2 = copy.copy(p0)
theta = -2 * math.pi * (rotation / 360.0)
rotation_mat = np.array(
[[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]]
)
offset1 = scale * np.array([-1 / 2.0, 1])
offset2 = scale * np.array([1 / 2.0, 1])
p1 += np.matmul(rotation_mat, offset1)
p2 += np.matmul(rotation_mat, offset2)
img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA
opacity = int(round(255 * opacity)) # Define transparency for the triangle.
points = [tuple(reversed(pos_translator(p))) for p in [p0, p1, p2]]
draw = ImageDraw.Draw(img2)
draw.polygon(points, fill=(255, 255, 255, opacity))
img = Image.alpha_composite(img1, img2)
return np.array(img.convert("RGB"))
@staticmethod
def visualize_agent_path(
positions: Sequence[Any],
frame: np.ndarray,
pos_translator: ThorPositionTo2DFrameTranslator,
single_color: bool = False,
view_triangle_only_on_last: bool = False,
disable_view_triangle: bool = False,
line_opacity: float = 1.0,
trajectory_start_end_color_str: Tuple[str, str] = ("red", "green"),
) -> np.ndarray:
if single_color:
frame = ThorViz.add_lines_to_map(
list(map(ThorViz.position_to_tuple, positions)),
frame,
pos_translator,
line_opacity,
tuple(
map(
lambda x: int(round(255 * x)),
col.Color(trajectory_start_end_color_str[0]).rgb,
)
),
)
else:
if len(positions) > 1:
colors = list(
col.Color(trajectory_start_end_color_str[0]).range_to(
col.Color(trajectory_start_end_color_str[1]), len(positions) - 1
)
)
for i in range(len(positions) - 1):
frame = ThorViz.add_line_to_map(
ThorViz.position_to_tuple(positions[i]),
ThorViz.position_to_tuple(positions[i + 1]),
frame,
pos_translator,
opacity=line_opacity,
color=tuple(map(lambda x: int(round(255 * x)), colors[i].rgb)),
)
if view_triangle_only_on_last:
positions = [positions[-1]]
if disable_view_triangle:
positions = []
for position in positions:
frame = ThorViz.add_agent_view_triangle(
ThorViz.position_to_tuple(position),
rotation=position["rotation"],
frame=frame,
pos_translator=pos_translator,
opacity=0.05 + view_triangle_only_on_last * 0.2,
)
return frame
def dump_top_down_view(self, room_name: str, image_path: str):
get_logger().debug("Dumping {}".format(image_path))
self.controller.reset(room_name)
self.controller.step(
{"action": "Initialize", "gridSize": 0.1, "makeAgentsVisible": False}
)
self.controller.step({"action": "ToggleMapView"})
top_down_view = self.controller.last_event.cv2img
cv2.imwrite(image_path, top_down_view)
def make_fig(self, episode: Any, episode_id: str) -> Figure:
trajectory: Sequence[Dict[str, Any]] = self._access(
episode, self.path_to_trajectory
)
if self.thor_top_downs is None:
self.init_top_down_render()
roomname = "_".join(episode_id.split("_")[:2])
im = self.visualize_agent_path(
trajectory,
self.thor_top_downs[roomname],
self.map_data[roomname]["pos_translator"],
single_color=self.single_color,
view_triangle_only_on_last=self.view_triangle_only_on_last,
disable_view_triangle=self.disable_view_triangle,
line_opacity=self.line_opacity,
)
fig, ax = plt.subplots(figsize=self.figsize)
ax.set_title(episode_id, fontsize=self.fontsize)
ax.imshow(self.crop_viz_image(im)[:, :, ::-1])
ax.axis("off")
return fig
class ThorMultiViz(ThorViz):
def __init__(
self,
path_to_trajectory_prefix: Sequence[str] = ("task_info", "followed_path"),
agent_suffixes: Sequence[str] = ("1", "2"),
label: str = "thor_trajectories",
trajectory_start_end_color_strs: Sequence[Tuple[str, str]] = (
("red", "green"),
("cyan", "purple"),
),
**kwargs,
):
super().__init__(label=label, **kwargs)
self.path_to_trajectory_prefix = list(path_to_trajectory_prefix)
self.agent_suffixes = list(agent_suffixes)
self.trajectory_start_end_color_strs = list(trajectory_start_end_color_strs)
def make_fig(self, episode: Any, episode_id: str) -> Figure:
if self.thor_top_downs is None:
self.init_top_down_render()
roomname = "_".join(episode_id.split("_")[:2])
im = self.thor_top_downs[roomname]
for agent, start_end_color in zip(
self.agent_suffixes, self.trajectory_start_end_color_strs
):
path = self.path_to_trajectory_prefix[:]
path[-1] = path[-1] + agent
trajectory = self._access(episode, path)
im = self.visualize_agent_path(
trajectory,
im,
self.map_data[roomname]["pos_translator"],
single_color=self.single_color,
view_triangle_only_on_last=self.view_triangle_only_on_last,
disable_view_triangle=self.disable_view_triangle,
line_opacity=self.line_opacity,
trajectory_start_end_color_str=start_end_color,
)
fig, ax = plt.subplots(figsize=self.figsize)
ax.set_title(episode_id, fontsize=self.fontsize)
ax.imshow(self.crop_viz_image(im)[:, :, ::-1])
ax.axis("off")
return fig
================================================
FILE: allenact_plugins/ithor_plugin/scripts/__init__.py
================================================
================================================
FILE: allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.py
================================================
import os
from allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import (
create_debug_dataset_from_train_dataset,
)
if __name__ == "__main__":
CURRENT_PATH = os.getcwd()
SCENE = "FloorPlan1"
TARGET = "Apple"
EPISODES = [0, 7, 11, 12]
BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "ithor-objectnav", "debug")
create_debug_dataset_from_train_dataset(
scene=SCENE,
target_object_type=TARGET,
episodes_subset=EPISODES,
train_dataset_path=os.path.join(
CURRENT_PATH, "datasets", "ithor-objectnav", "train"
),
base_debug_output_path=BASE_OUT,
)
================================================
FILE: allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.py
================================================
import os
from allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import (
create_debug_dataset_from_train_dataset,
)
if __name__ == "__main__":
CURRENT_PATH = os.getcwd()
SCENE = "FloorPlan1"
EPISODES = [0, 7, 11, 12]
BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "ithor-pointnav", "debug")
create_debug_dataset_from_train_dataset(
scene=SCENE,
target_object_type=None,
episodes_subset=EPISODES,
train_dataset_path=os.path.join(
CURRENT_PATH, "datasets", "ithor-pointnav", "train"
),
base_debug_output_path=BASE_OUT,
)
================================================
FILE: allenact_plugins/lighthouse_plugin/__init__.py
================================================
================================================
FILE: allenact_plugins/lighthouse_plugin/configs/__init__.py
================================================
================================================
FILE: allenact_plugins/lighthouse_plugin/data/__init__.py
================================================
================================================
FILE: allenact_plugins/lighthouse_plugin/extra_environment.yml
================================================
dependencies:
- patsy>=0.5.1
- pip
- pip:
- gym-minigrid>=1.0.1
================================================
FILE: allenact_plugins/lighthouse_plugin/extra_requirements.txt
================================================
patsy>=0.5.1
gym-minigrid>=1.0.1
================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_environment.py
================================================
import copy
import curses
import itertools
import time
from functools import lru_cache
from typing import Optional, Tuple, Any, List, Union, cast
import numpy as np
from gym.utils import seeding
from gym_minigrid import minigrid
EMPTY = 0
GOAL = 1
WRONG_CORNER = 2
WALL = 3
@lru_cache(1000)
def _get_world_corners(world_dim: int, world_radius: int):
if world_radius == 0:
return ((0,) * world_dim,)
def combination_to_vec(comb) -> Tuple[int, ...]:
vec = [world_radius] * world_dim
for k in comb:
vec[k] *= -1
return tuple(vec)
return tuple(
sorted(
combination_to_vec(comb)
for i in range(world_dim + 1)
for comb in itertools.combinations(list(range(world_dim)), i)
)
)
@lru_cache(1000)
def _base_world_tensor(world_dim: int, world_radius: int):
tensor = np.full((2 * world_radius + 1,) * world_dim, fill_value=EMPTY)
slices: List[Union[slice, int]] = [slice(0, 2 * world_radius + 1)] * world_dim
for i in range(world_dim):
tmp_slices = [*slices]
tmp_slices[i] = 0
tensor[tuple(tmp_slices)] = WALL
tmp_slices[i] = 2 * world_radius
tensor[tuple(tmp_slices)] = WALL
for corner in _get_world_corners(world_dim=world_dim, world_radius=world_radius):
tensor[tuple([loc + world_radius for loc in corner])] = WRONG_CORNER
return tensor
class LightHouseEnvironment(object):
EMPTY = 0
GOAL = 1
WRONG_CORNER = 2
WALL = 3
SPACE_LEVELS = [EMPTY, GOAL, WRONG_CORNER, WALL]
def __init__(self, world_dim: int, world_radius: int, **kwargs):
self.world_dim = world_dim
self.world_radius = world_radius
self.world_corners = np.array(
_get_world_corners(world_dim=world_dim, world_radius=world_radius),
dtype=int,
)
self.curses_screen: Optional[Any] = None
self.world_tensor: np.ndarray = copy.deepcopy(
_base_world_tensor(world_radius=world_radius, world_dim=world_dim)
)
self.current_position = np.zeros(world_dim, dtype=int)
self.closest_distance_to_corners = np.full(
2**world_dim, fill_value=world_radius, dtype=int
)
self.positions: List[Tuple[int, ...]] = [tuple(self.current_position)]
self.goal_position: Optional[np.ndarray] = None
self.last_action: Optional[int] = None
self.seed: Optional[int] = None
self.np_seeded_random_gen: Optional[np.random.RandomState] = None
self.set_seed(seed=int(kwargs.get("seed", np.random.randint(0, 2**31 - 1))))
self.random_reset()
def set_seed(self, seed: int):
# More information about why `np_seeded_random_gen` is used rather than just `np.random.seed`
# can be found at gym/utils/seeding.py
# There's literature indicating that having linear correlations between seeds of multiple
# PRNG's can correlate the outputs
self.seed = seed
self.np_seeded_random_gen, _ = cast(
Tuple[np.random.RandomState, Any], seeding.np_random(self.seed)
)
def random_reset(self, goal_position: Optional[bool] = None):
self.last_action = None
self.world_tensor = copy.deepcopy(
_base_world_tensor(world_radius=self.world_radius, world_dim=self.world_dim)
)
if goal_position is None:
self.goal_position = self.world_corners[
self.np_seeded_random_gen.randint(low=0, high=len(self.world_corners))
]
self.world_tensor[
tuple(cast(np.ndarray, self.world_radius + self.goal_position))
] = GOAL
if self.curses_screen is not None:
curses.nocbreak()
self.curses_screen.keypad(False)
curses.echo()
curses.endwin()
self.curses_screen = None
self.current_position = np.zeros(self.world_dim, dtype=int)
self.closest_distance_to_corners = np.abs(
(self.world_corners - self.current_position.reshape(1, -1))
).max(1)
self.positions = [tuple(self.current_position)]
def step(self, action: int) -> bool:
assert 0 <= action < 2 * self.world_dim
self.last_action = action
delta = -1 if action >= self.world_dim else 1
ind = action % self.world_dim
old = self.current_position[ind]
new = min(max(delta + old, -self.world_radius), self.world_radius)
if new == old:
self.positions.append(self.positions[-1])
return False
else:
self.current_position[ind] = new
self.closest_distance_to_corners = np.minimum(
np.abs((self.world_corners - self.current_position.reshape(1, -1))).max(
1
),
self.closest_distance_to_corners,
)
self.positions.append(tuple(self.current_position))
return True
def render(self, mode="array", **kwargs):
if mode == "array":
arr = copy.deepcopy(self.world_tensor)
arr[tuple(self.world_radius + self.current_position)] = 9
return arr
elif mode == "curses":
if self.world_dim == 1:
space_list = ["_"] * (1 + 2 * self.world_radius)
goal_ind = self.goal_position[0] + self.world_radius
space_list[goal_ind] = "G"
space_list[2 * self.world_radius - goal_ind] = "W"
space_list[self.current_position[0] + self.world_radius] = "X"
to_print = " ".join(space_list)
if self.curses_screen is None:
self.curses_screen = curses.initscr()
self.curses_screen.addstr(0, 0, to_print)
if "extra_text" in kwargs:
self.curses_screen.addstr(1, 0, kwargs["extra_text"])
self.curses_screen.refresh()
elif self.world_dim == 2:
space_list = [
["_"] * (1 + 2 * self.world_radius)
for _ in range(1 + 2 * self.world_radius)
]
for row_ind in range(1 + 2 * self.world_radius):
for col_ind in range(1 + 2 * self.world_radius):
if self.world_tensor[row_ind][col_ind] == self.GOAL:
space_list[row_ind][col_ind] = "G"
if self.world_tensor[row_ind][col_ind] == self.WRONG_CORNER:
space_list[row_ind][col_ind] = "C"
if self.world_tensor[row_ind][col_ind] == self.WALL:
space_list[row_ind][col_ind] = "W"
if (
(row_ind, col_ind)
== self.world_radius + self.current_position
).all():
space_list[row_ind][col_ind] = "X"
if self.curses_screen is None:
self.curses_screen = curses.initscr()
for i, sl in enumerate(space_list):
self.curses_screen.addstr(i, 0, " ".join(sl))
self.curses_screen.addstr(len(space_list), 0, str(self.state()))
if "extra_text" in kwargs:
self.curses_screen.addstr(
len(space_list) + 1, 0, kwargs["extra_text"]
)
self.curses_screen.refresh()
else:
raise NotImplementedError("Cannot render worlds of > 2 dimensions.")
elif mode == "minigrid":
height = width = 2 * self.world_radius + 2
grid = minigrid.Grid(width, height)
# Generate the surrounding walls
grid.horz_wall(0, 0)
grid.horz_wall(0, height - 1)
grid.vert_wall(0, 0)
grid.vert_wall(width - 1, 0)
# Place fake agent at the center
agent_pos = np.array(self.positions[-1]) + 1 + self.world_radius
# grid.set(*agent_pos, None)
agent = minigrid.Goal()
agent.color = "red"
grid.set(agent_pos[0], agent_pos[1], agent)
agent.init_pos = tuple(agent_pos)
agent.cur_pos = tuple(agent_pos)
goal_pos = self.goal_position + self.world_radius
goal = minigrid.Goal()
grid.set(goal_pos[0], goal_pos[1], goal)
goal.init_pos = tuple(goal_pos)
goal.cur_pos = tuple(goal_pos)
highlight_mask = np.zeros((height, width), dtype=bool)
minx, maxx = max(1, agent_pos[0] - 5), min(height - 1, agent_pos[0] + 5)
miny, maxy = max(1, agent_pos[1] - 5), min(height - 1, agent_pos[1] + 5)
highlight_mask[minx : (maxx + 1), miny : (maxy + 1)] = True
img = grid.render(
minigrid.TILE_PIXELS, agent_pos, None, highlight_mask=highlight_mask
)
return img
else:
raise NotImplementedError("Unknown render mode {}.".format(mode))
time.sleep(0.0 if "sleep_time" not in kwargs else kwargs["sleep_time"])
def close(self):
if self.curses_screen is not None:
curses.nocbreak()
self.curses_screen.keypad(False)
curses.echo()
curses.endwin()
@staticmethod
def optimal_ave_ep_length(world_dim: int, world_radius: int, view_radius: int):
if world_dim == 1:
max_steps_wrong_dir = max(world_radius - view_radius, 0)
return max_steps_wrong_dir + world_radius
elif world_dim == 2:
tau = 2 * (world_radius - view_radius)
average_steps_needed = 0.25 * (4 * 2 * view_radius + 10 * tau)
return average_steps_needed
else:
raise NotImplementedError(
"`optimal_average_ep_length` is only implemented"
" for when the `world_dim` is 1 or 2 ({} given).".format(world_dim)
)
================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_models.py
================================================
from typing import Optional, Tuple, cast
import gym
import torch
import torch.nn as nn
from gym.spaces.dict import Dict as SpaceDict
from allenact.algorithms.onpolicy_sync.policy import (
ActorCriticModel,
Memory,
ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput, DistributionType
class LinearAdvisorActorCritic(ActorCriticModel[CategoricalDistr]):
def __init__(
self,
input_uuid: str,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
ensure_same_init_aux_weights: bool = True,
):
super().__init__(action_space=action_space, observation_space=observation_space)
assert (
input_uuid in observation_space.spaces
), "LinearActorCritic expects only a single observational input."
self.input_uuid = input_uuid
box_space: gym.spaces.Box = observation_space[self.input_uuid]
assert isinstance(box_space, gym.spaces.Box), (
"LinearActorCritic requires that"
"observation space corresponding to the input key is a Box space."
)
assert len(box_space.shape) == 1
self.in_dim = box_space.shape[0]
self.num_actions = action_space.n
self.linear = nn.Linear(self.in_dim, 2 * self.num_actions + 1)
nn.init.orthogonal_(self.linear.weight)
if ensure_same_init_aux_weights:
# Ensure main actor / auxiliary actor start with the same weights
self.linear.weight.data[self.num_actions : -1, :] = self.linear.weight[
: self.num_actions, :
]
nn.init.constant_(self.linear.bias, 0)
# noinspection PyMethodMayBeStatic
def _recurrent_memory_specification(self):
return None
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
out = self.linear(cast(torch.Tensor, observations[self.input_uuid]))
main_logits = out[..., : self.num_actions]
aux_logits = out[..., self.num_actions : -1]
values = out[..., -1:]
# noinspection PyArgumentList
return (
ActorCriticOutput(
distributions=cast(
DistributionType, CategoricalDistr(logits=main_logits)
), # step x sampler x ...
values=cast(
torch.FloatTensor, values.view(values.shape[:2] + (-1,))
), # step x sampler x flattened
extras={"auxiliary_distributions": CategoricalDistr(logits=aux_logits)},
),
None,
)
================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_sensors.py
================================================
import itertools
from typing import Any, Dict, Optional, Tuple, Sequence
import gym
import numpy as np
import pandas as pd
import patsy
from allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super
from allenact.base_abstractions.task import Task
from allenact_plugins.lighthouse_plugin.lighthouse_environment import (
LightHouseEnvironment,
)
def get_corner_observation(
env: LightHouseEnvironment,
view_radius: int,
view_corner_offsets: Optional[np.array],
):
if view_corner_offsets is None:
view_corner_offsets = view_radius * (2 * (env.world_corners > 0) - 1)
world_corners_offset = env.world_corners + env.world_radius
multidim_view_corner_indices = np.clip(
np.reshape(env.current_position, (1, -1))
+ view_corner_offsets
+ env.world_radius,
a_min=0,
a_max=2 * env.world_radius,
)
flat_view_corner_indices = np.ravel_multi_index(
np.transpose(multidim_view_corner_indices), env.world_tensor.shape
)
view_values = env.world_tensor.reshape(-1)[flat_view_corner_indices]
last_action = 2 * env.world_dim if env.last_action is None else env.last_action
on_border_bools = np.concatenate(
(
env.current_position == env.world_radius,
env.current_position == -env.world_radius,
),
axis=0,
)
if last_action == 2 * env.world_dim or on_border_bools[last_action]:
on_border_value = last_action
elif on_border_bools.any():
on_border_value = np.argwhere(on_border_bools).reshape(-1)[0]
else:
on_border_value = 2 * env.world_dim
seen_mask = np.array(env.closest_distance_to_corners <= view_radius, dtype=int)
seen_corner_values = (
env.world_tensor.reshape(-1)[
np.ravel_multi_index(
np.transpose(world_corners_offset), env.world_tensor.shape
)
]
* seen_mask
)
return np.concatenate(
(
seen_corner_values + view_values * (1 - seen_mask),
[on_border_value, last_action],
),
axis=0,
out=np.zeros(
(seen_corner_values.shape[0] + 2,),
dtype=np.float32,
),
)
class CornerSensor(Sensor[LightHouseEnvironment, Any]):
def __init__(
self,
view_radius: int,
world_dim: int,
uuid: str = "corner_fixed_radius",
**kwargs: Any
):
self.view_radius = view_radius
self.world_dim = world_dim
self.view_corner_offsets: Optional[np.ndarray] = None
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self):
return gym.spaces.Box(
low=min(LightHouseEnvironment.SPACE_LEVELS),
high=max(LightHouseEnvironment.SPACE_LEVELS),
shape=(2**self.world_dim + 2,),
dtype=int,
)
def get_observation(
self,
env: LightHouseEnvironment,
task: Optional[Task],
*args: Any,
**kwargs: Any
) -> Any:
if self.view_corner_offsets is None:
self.view_corner_offsets = self.view_radius * (
2 * (env.world_corners > 0) - 1
)
return get_corner_observation(
env=env,
view_radius=self.view_radius,
view_corner_offsets=self.view_corner_offsets,
)
class FactorialDesignCornerSensor(Sensor[LightHouseEnvironment, Any]):
_DESIGN_MAT_CACHE: Dict[Tuple, Any] = {}
def __init__(
self,
view_radius: int,
world_dim: int,
degree: int,
uuid: str = "corner_fixed_radius_categorical",
**kwargs: Any
):
self.view_radius = view_radius
self.world_dim = world_dim
self.degree = degree
if self.world_dim > 2:
raise NotImplementedError(
"When using the `FactorialDesignCornerSensor`,"
"`world_dim` must be <= 2 due to memory constraints."
"In the current implementation, creating the design"
"matrix in the `world_dim == 3` case would require"
"instantiating a matrix of size ~ 3Mx3M (9 trillion entries)."
)
self.view_corner_offsets: Optional[np.ndarray] = None
# self.world_corners_offset: Optional[List[typing.Tuple[int, ...]]] = None
self.corner_sensor = CornerSensor(self.view_radius, self.world_dim)
self.variables_and_levels = self._get_variables_and_levels(
world_dim=self.world_dim
)
self._design_mat_formula = self._create_formula(
variables_and_levels=self._get_variables_and_levels(
world_dim=self.world_dim
),
degree=self.degree,
)
self.single_row_df = pd.DataFrame(
data=[[0] * len(self.variables_and_levels)],
columns=[x[0] for x in self.variables_and_levels],
)
self._view_tuple_to_design_array: Dict[Tuple[int, ...], np.ndarray] = {}
(
design_matrix,
tuple_to_ind,
) = self._create_full_design_matrix_and_tuple_to_ind_dict(
variables_and_levels=tuple(self.variables_and_levels), degree=self.degree
)
self.design_matrix = design_matrix
self.tuple_to_ind = tuple_to_ind
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self):
return gym.spaces.Box(
low=min(LightHouseEnvironment.SPACE_LEVELS),
high=max(LightHouseEnvironment.SPACE_LEVELS),
shape=(
len(
self.view_tuple_to_design_array(
(0,) * len(self.variables_and_levels)
)
),
),
dtype=int,
)
def view_tuple_to_design_array(self, view_tuple: Tuple):
return np.array(
self.design_matrix[self.tuple_to_ind[view_tuple], :], dtype=np.float32
)
@classmethod
def output_dim(cls, world_dim: int):
return ((3 if world_dim == 1 else 4) ** (2**world_dim)) * (
2 * world_dim + 1
) ** 2
@classmethod
def _create_full_design_matrix_and_tuple_to_ind_dict(
cls, variables_and_levels: Sequence[Tuple[str, Sequence[int]]], degree: int
):
variables_and_levels = tuple((x, tuple(y)) for x, y in variables_and_levels)
key = (variables_and_levels, degree)
if key not in cls._DESIGN_MAT_CACHE:
all_tuples = [
tuple(x)
for x in itertools.product(
*[levels for _, levels in variables_and_levels]
)
]
tuple_to_ind = {}
for i, t in enumerate(all_tuples):
tuple_to_ind[t] = i
df = pd.DataFrame(
data=all_tuples,
columns=[var_name for var_name, _ in variables_and_levels],
)
cls._DESIGN_MAT_CACHE[key] = (
np.array(
1.0
* patsy.dmatrix(
cls._create_formula(
variables_and_levels=variables_and_levels, degree=degree
),
data=df,
),
dtype=bool,
),
tuple_to_ind,
)
return cls._DESIGN_MAT_CACHE[key]
@staticmethod
def _get_variables_and_levels(world_dim: int):
return (
[
("s{}".format(i), list(range(3 if world_dim == 1 else 4)))
for i in range(2**world_dim)
]
+ [("b{}".format(i), list(range(2 * world_dim + 1))) for i in range(1)]
+ [("a{}".format(i), list(range(2 * world_dim + 1))) for i in range(1)]
)
@classmethod
def _create_formula(
cls, variables_and_levels: Sequence[Tuple[str, Sequence[int]]], degree: int
):
def make_categorial(var_name, levels):
return "C({}, levels={})".format(var_name, levels)
if degree == -1:
return ":".join(
make_categorial(var_name, levels)
for var_name, levels in variables_and_levels
)
else:
return "({})**{}".format(
"+".join(
make_categorial(var_name, levels)
for var_name, levels in variables_and_levels
),
degree,
)
def get_observation(
self,
env: LightHouseEnvironment,
task: Optional[Task],
*args: Any,
**kwargs: Any
) -> Any:
kwargs["as_tuple"] = True
view_array = self.corner_sensor.get_observation(env, task, *args, **kwargs)
return self.view_tuple_to_design_array(tuple(view_array))
================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_tasks.py
================================================
import abc
import string
from typing import List, Dict, Any, Optional, Tuple, Union, Sequence, cast
import gym
import numpy as np
from gym.utils import seeding
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.base_abstractions.task import Task, TaskSampler
from allenact.utils.experiment_utils import set_seed
from allenact.utils.system import get_logger
from allenact_plugins.lighthouse_plugin.lighthouse_environment import (
LightHouseEnvironment,
)
from allenact_plugins.lighthouse_plugin.lighthouse_sensors import get_corner_observation
DISCOUNT_FACTOR = 0.99
STEP_PENALTY = -0.01
FOUND_TARGET_REWARD = 1.0
class LightHouseTask(Task[LightHouseEnvironment], abc.ABC):
"""Defines an abstract embodied task in the light house gridworld.
# Attributes
env : The light house environment.
sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer.
task_info : Dictionary of (k, v) pairs defining task goals and other task information.
max_steps : The maximum number of steps an agent can take an in the task before it is considered failed.
observation_space: The observation space returned on each step from the sensors.
"""
def __init__(
self,
env: LightHouseEnvironment,
sensors: Union[SensorSuite, List[Sensor]],
task_info: Dict[str, Any],
max_steps: int,
**kwargs,
) -> None:
"""Initializer.
See class documentation for parameter definitions.
"""
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self._last_action: Optional[int] = None
@property
def last_action(self) -> int:
return self._last_action
@last_action.setter
def last_action(self, value: int):
self._last_action = value
def step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
self.last_action = action
return super(LightHouseTask, self).step(action=action)
def render(self, mode: str = "array", *args, **kwargs) -> np.ndarray:
if mode == "array":
return self.env.render(mode, **kwargs)
elif mode in ["rgb", "rgb_array", "human"]:
arr = self.env.render("array", **kwargs)
colors = np.array(
[
(31, 119, 180),
(255, 127, 14),
(44, 160, 44),
(214, 39, 40),
(148, 103, 189),
(140, 86, 75),
(227, 119, 194),
(127, 127, 127),
(188, 189, 34),
(23, 190, 207),
],
dtype=np.uint8,
)
return colors[arr]
else:
raise NotImplementedError("Render mode '{}' is not supported.".format(mode))
class FindGoalLightHouseTask(LightHouseTask):
_CACHED_ACTION_NAMES: Dict[int, Tuple[str, ...]] = {}
def __init__(
self,
env: LightHouseEnvironment,
sensors: Union[SensorSuite, List[Sensor]],
task_info: Dict[str, Any],
max_steps: int,
**kwargs,
):
super().__init__(env, sensors, task_info, max_steps, **kwargs)
self._found_target = False
@property
def action_space(self) -> gym.spaces.Discrete:
return gym.spaces.Discrete(2 * self.env.world_dim)
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
self.env.step(action)
reward = STEP_PENALTY
if np.all(self.env.current_position == self.env.goal_position):
self._found_target = True
reward += FOUND_TARGET_REWARD
elif self.num_steps_taken() == self.max_steps - 1:
reward = STEP_PENALTY / (1 - DISCOUNT_FACTOR)
return RLStepResult(
observation=self.get_observations(),
reward=reward,
done=self.is_done(),
info=None,
)
def reached_terminal_state(self) -> bool:
return self._found_target
@classmethod
def class_action_names(cls, world_dim: int = 2, **kwargs) -> Tuple[str, ...]:
assert 1 <= world_dim <= 26, "Too many dimensions."
if world_dim not in cls._CACHED_ACTION_NAMES:
action_names = [
"{}(+1)".format(string.ascii_lowercase[i] for i in range(world_dim))
]
action_names.extend(
"{}(-1)".format(string.ascii_lowercase[i] for i in range(world_dim))
)
cls._CACHED_ACTION_NAMES[world_dim] = tuple(action_names)
return cls._CACHED_ACTION_NAMES[world_dim]
def action_names(self) -> Tuple[str, ...]:
return self.class_action_names(world_dim=self.env.world_dim)
def close(self) -> None:
pass
def query_expert(
self,
expert_view_radius: int,
return_policy: bool = False,
deterministic: bool = False,
**kwargs,
) -> Tuple[Any, bool]:
view_tuple = get_corner_observation(
env=self.env,
view_radius=expert_view_radius,
view_corner_offsets=None,
)
goal = self.env.GOAL
wrong = self.env.WRONG_CORNER
if self.env.world_dim == 1:
left_view, right_view, hitting, last_action = view_tuple
left = 1
right = 0
expert_action: Optional[int] = None
policy: Optional[np.ndarray] = None
if left_view == goal:
expert_action = left
elif right_view == goal:
expert_action = right
elif hitting != 2 * self.env.world_dim:
expert_action = left if last_action == right else right
elif left_view == wrong:
expert_action = right
elif right_view == wrong:
expert_action = left
elif last_action == 2 * self.env.world_dim:
policy = np.array([0.5, 0.5])
else:
expert_action = last_action
if policy is None:
policy = np.array([expert_action == right, expert_action == left])
elif self.env.world_dim == 2:
tl, tr, bl, br, hitting, last_action = view_tuple
wall = self.env.WALL
d, r, u, l, none = 0, 1, 2, 3, 4
if tr == goal:
if hitting != r:
expert_action = r
else:
expert_action = u
elif br == goal:
if hitting != d:
expert_action = d
else:
expert_action = r
elif bl == goal:
if hitting != l:
expert_action = l
else:
expert_action = d
elif tl == goal:
if hitting != u:
expert_action = u
else:
expert_action = l
elif tr == wrong and not any(x == wrong for x in [br, bl, tl]):
expert_action = l
elif br == wrong and not any(x == wrong for x in [bl, tl, tr]):
expert_action = u
elif bl == wrong and not any(x == wrong for x in [tl, tr, br]):
expert_action = r
elif tl == wrong and not any(x == wrong for x in [tr, br, bl]):
expert_action = d
elif all(x == wrong for x in [tr, br]) and not any(
x == wrong for x in [bl, tl]
):
expert_action = l
elif all(x == wrong for x in [br, bl]) and not any(
x == wrong for x in [tl, tr]
):
expert_action = u
elif all(x == wrong for x in [bl, tl]) and not any(
x == wrong for x in [tr, br]
):
expert_action = r
elif all(x == wrong for x in [tl, tr]) and not any(
x == wrong for x in [br, bl]
):
expert_action = d
elif hitting != none and tr == br == bl == tl:
# Only possible if in 0 vis setting
if tr == self.env.WRONG_CORNER or last_action == hitting:
if last_action == r:
expert_action = u
elif last_action == u:
expert_action = l
elif last_action == l:
expert_action = d
elif last_action == d:
expert_action = r
else:
raise NotImplementedError()
else:
expert_action = last_action
elif last_action == r and tr == wall:
expert_action = u
elif last_action == u and tl == wall:
expert_action = l
elif last_action == l and bl == wall:
expert_action = d
elif last_action == d and br == wall:
expert_action = r
elif last_action == none:
expert_action = r
else:
expert_action = last_action
policy = np.array(
[
expert_action == d,
expert_action == r,
expert_action == u,
expert_action == l,
]
)
else:
raise NotImplementedError("Can only query expert for world dims of 1 or 2.")
if return_policy:
return policy, True
elif deterministic:
return int(np.argmax(policy)), True
else:
return (
int(np.argmax(np.random.multinomial(1, policy / (1.0 * policy.sum())))),
True,
)
class FindGoalLightHouseTaskSampler(TaskSampler):
def __init__(
self,
world_dim: int,
world_radius: int,
sensors: Union[SensorSuite, List[Sensor]],
max_steps: int,
max_tasks: Optional[int] = None,
num_unique_seeds: Optional[int] = None,
task_seeds_list: Optional[List[int]] = None,
deterministic_sampling: bool = False,
seed: Optional[int] = None,
**kwargs,
):
self.env = LightHouseEnvironment(world_dim=world_dim, world_radius=world_radius)
self._last_sampled_task: Optional[FindGoalLightHouseTask] = None
self.sensors = (
SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors
)
self.max_steps = max_steps
self.max_tasks = max_tasks
self.num_tasks_generated = 0
self.deterministic_sampling = deterministic_sampling
self.num_unique_seeds = num_unique_seeds
self.task_seeds_list = task_seeds_list
assert (self.num_unique_seeds is None) or (
0 < self.num_unique_seeds
), "`num_unique_seeds` must be a positive integer."
self.num_unique_seeds = num_unique_seeds
self.task_seeds_list = task_seeds_list
if self.task_seeds_list is not None:
if self.num_unique_seeds is not None:
assert self.num_unique_seeds == len(
self.task_seeds_list
), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
self.num_unique_seeds = len(self.task_seeds_list)
elif self.num_unique_seeds is not None:
self.task_seeds_list = list(range(self.num_unique_seeds))
assert (not deterministic_sampling) or (
self.num_unique_seeds is not None
), "Cannot use deterministic sampling when `num_unique_seeds` is `None`."
if (not deterministic_sampling) and self.max_tasks:
get_logger().warning(
"`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
" this might be a mistake when running testing."
)
self.seed: int = int(
seed if seed is not None else np.random.randint(0, 2**31 - 1)
)
self.np_seeded_random_gen: Optional[np.random.RandomState] = None
self.set_seed(self.seed)
@property
def world_dim(self):
return self.env.world_dim
@property
def world_radius(self):
return self.env.world_radius
@property
def length(self) -> Union[int, float]:
return (
float("inf")
if self.max_tasks is None
else self.max_tasks - self.num_tasks_generated
)
@property
def total_unique(self) -> Optional[Union[int, float]]:
n = 2**self.world_dim
return n if self.num_unique_seeds is None else min(n, self.num_unique_seeds)
@property
def last_sampled_task(self) -> Optional[Task]:
return self._last_sampled_task
def next_task(self, force_advance_scene: bool = False) -> Optional[Task]:
if self.length <= 0:
return None
if self.num_unique_seeds is not None:
if self.deterministic_sampling:
seed = self.task_seeds_list[
self.num_tasks_generated % len(self.task_seeds_list)
]
else:
seed = self.np_seeded_random_gen.choice(self.task_seeds_list)
else:
seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)
self.num_tasks_generated += 1
self.env.set_seed(seed)
self.env.random_reset()
return FindGoalLightHouseTask(
env=self.env, sensors=self.sensors, task_info={}, max_steps=self.max_steps
)
def close(self) -> None:
pass
@property
def all_observation_spaces_equal(self) -> bool:
return True
def reset(self) -> None:
self.num_tasks_generated = 0
self.set_seed(seed=self.seed)
def set_seed(self, seed: int) -> None:
set_seed(seed)
self.np_seeded_random_gen, _ = seeding.np_random(seed)
self.seed = seed
================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_util.py
================================================
import numpy as np
from allenact.utils.experiment_utils import EarlyStoppingCriterion, ScalarMeanTracker
class StopIfNearOptimal(EarlyStoppingCriterion):
def __init__(self, optimal: float, deviation: float, min_memory_size: int = 100):
self.optimal = optimal
self.deviation = deviation
self.current_pos = 0
self.has_filled = False
self.memory: np.ndarray = np.zeros(min_memory_size)
def __call__(
self,
stage_steps: int,
total_steps: int,
training_metrics: ScalarMeanTracker,
) -> bool:
sums = training_metrics.sums()
counts = training_metrics.counts()
k = "ep_length"
if k in sums:
count = counts[k]
ep_length_ave = sums[k] / count
n = self.memory.shape[0]
if count >= n:
if count > n:
# Increase memory size to fit all of the new values
self.memory = np.full(count, fill_value=ep_length_ave)
else:
# We have exactly as many values as the memory size,
# simply set the whole memory to be equal to the new
# average ep length.
self.memory[:] = ep_length_ave
self.current_pos = 0
self.has_filled = True
else:
self.memory[self.current_pos : (self.current_pos + count)] = (
ep_length_ave
)
if self.current_pos + count > n:
self.has_filled = True
self.current_pos = self.current_pos + count % n
self.memory[: self.current_pos] = ep_length_ave
if not self.has_filled:
return False
return self.memory.mean() < self.optimal + self.deviation
================================================
FILE: allenact_plugins/lighthouse_plugin/scripts/__init__.py
================================================
================================================
FILE: allenact_plugins/manipulathor_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker
with ImportChecker(
"Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`)."
):
# noinspection PyUnresolvedReferences
import ai2thor
================================================
FILE: allenact_plugins/manipulathor_plugin/arm_calculation_utils.py
================================================
"""Utility classes and functions for calculating the arm relative and absolute
position."""
from typing import Dict
import numpy as np
import torch
from scipy.spatial.transform import Rotation as R
from allenact.utils.system import get_logger
def state_dict_to_tensor(state: Dict):
result = []
if "position" in state:
result += [
state["position"]["x"],
state["position"]["y"],
state["position"]["z"],
]
if "rotation" in state:
result += [
state["rotation"]["x"],
state["rotation"]["y"],
state["rotation"]["z"],
]
return torch.Tensor(result)
def diff_position(state_goal, state_curr, absolute: bool = True):
p1 = state_goal["position"]
p2 = state_curr["position"]
if absolute:
result = {k: abs(p1[k] - p2[k]) for k in p1.keys()}
else:
result = {k: (p1[k] - p2[k]) for k in p1.keys()}
return result
def coord_system_transform(position: Dict, coord_system: str):
assert coord_system in [
"xyz_unsigned",
"xyz_signed",
"polar_radian",
"polar_trigo",
]
if "xyz" in coord_system:
result = [
position["x"],
position["y"],
position["z"],
]
result = torch.Tensor(result)
if coord_system == "xyz_unsigned":
return torch.abs(result)
else: # xyz_signed
return result
else:
hxy = np.hypot(position["x"], position["y"])
r = np.hypot(hxy, position["z"])
el = np.arctan2(position["z"], hxy) # elevation angle: [-pi/2, pi/2]
az = np.arctan2(position["y"], position["x"]) # azimuthal angle: [-pi, pi]
if coord_system == "polar_radian":
result = [
r,
el / (0.5 * np.pi),
az / np.pi,
] # normalize to [-1, 1]
return torch.Tensor(result)
else: # polar_trigo
result = [
r,
np.cos(el),
np.sin(el),
np.cos(az),
np.sin(az),
]
return torch.Tensor(result)
def position_rotation_to_matrix(position, rotation):
result = np.zeros((4, 4))
r = R.from_euler("xyz", [rotation["x"], rotation["y"], rotation["z"]], degrees=True)
result[:3, :3] = r.as_matrix()
result[3, 3] = 1
result[:3, 3] = [position["x"], position["y"], position["z"]]
return result
def inverse_rot_trans_matrix(mat):
mat = np.linalg.inv(mat)
return mat
def matrix_to_position_rotation(matrix):
result = {"position": None, "rotation": None}
rotation = R.from_matrix(matrix[:3, :3]).as_euler("xyz", degrees=True)
rotation_dict = {"x": rotation[0], "y": rotation[1], "z": rotation[2]}
result["rotation"] = rotation_dict
position = matrix[:3, 3]
result["position"] = {"x": position[0], "y": position[1], "z": position[2]}
return result
def find_closest_inverse(deg, use_cache):
if use_cache:
for k in _saved_inverse_rotation_mats.keys():
if abs(k - deg) < 5:
return _saved_inverse_rotation_mats[k]
# if it reaches here it means it had not calculated the degree before
rotation = R.from_euler("xyz", [0, deg, 0], degrees=True)
result = rotation.as_matrix()
inverse = inverse_rot_trans_matrix(result)
if use_cache:
get_logger().warning(f"Had to calculate the matrix for {deg}")
return inverse
def calc_inverse(deg):
rotation = R.from_euler("xyz", [0, deg, 0], degrees=True)
result = rotation.as_matrix()
inverse = inverse_rot_trans_matrix(result)
return inverse
_saved_inverse_rotation_mats = {i: calc_inverse(i) for i in range(0, 360, 45)}
_saved_inverse_rotation_mats[360] = _saved_inverse_rotation_mats[0]
def world_coords_to_agent_coords(world_obj, agent_state, use_cache=True):
position = agent_state["position"]
rotation = agent_state["rotation"]
agent_translation = [position["x"], position["y"], position["z"]]
assert abs(rotation["x"]) < 0.01 and abs(rotation["z"]) < 0.01
inverse_agent_rotation = find_closest_inverse(rotation["y"], use_cache=use_cache)
obj_matrix = position_rotation_to_matrix(
world_obj["position"], world_obj["rotation"]
)
obj_translation = np.matmul(
inverse_agent_rotation, (obj_matrix[:3, 3] - agent_translation)
)
# add rotation later
obj_matrix[:3, 3] = obj_translation
result = matrix_to_position_rotation(obj_matrix)
return result
================================================
FILE: allenact_plugins/manipulathor_plugin/armpointnav_constants.py
================================================
import json
import os
from typing import Dict, Optional, Any
from constants import ABS_PATH_OF_TOP_LEVEL_DIR
TRAIN_OBJECTS = ["Apple", "Bread", "Tomato", "Lettuce", "Pot", "Mug"]
TEST_OBJECTS = ["Potato", "SoapBottle", "Pan", "Egg", "Spatula", "Cup"]
MOVE_ARM_CONSTANT = 0.05
MOVE_ARM_HEIGHT_CONSTANT = MOVE_ARM_CONSTANT
UNWANTED_MOVE_THR = 0.01
DISTANCE_EPS = 1e-9
DISTANCE_MAX = 10.0
dataset_json_file = os.path.join(
ABS_PATH_OF_TOP_LEVEL_DIR, "datasets", "apnd-dataset", "starting_pose.json"
)
_ARM_START_POSITIONS: Optional[Dict[str, Any]] = None
def get_agent_start_positions():
global _ARM_START_POSITIONS
if _ARM_START_POSITIONS is not None:
try:
with open(dataset_json_file) as f:
_ARM_START_POSITIONS = json.load(f)
except Exception:
raise Exception(f"Dataset not found in {dataset_json_file}")
return _ARM_START_POSITIONS
================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_constants.py
================================================
"""Constant values and hyperparameters that are used by the environment."""
import ai2thor.fifo_server
ARM_MIN_HEIGHT = 0.450998873
ARM_MAX_HEIGHT = 1.8009994
ADDITIONAL_ARM_ARGS = {
"disableRendering": True,
"returnToStart": True,
"speed": 1,
}
MOVE_AHEAD = "MoveAheadContinuous"
MOVE_BACK = "MoveBackContinuous"
ROTATE_LEFT = "RotateLeftContinuous"
ROTATE_RIGHT = "RotateRightContinuous"
MOVE_ARM_HEIGHT_P = "MoveArmHeightP"
MOVE_ARM_HEIGHT_M = "MoveArmHeightM"
MOVE_ARM_X_P = "MoveArmXP"
MOVE_ARM_X_M = "MoveArmXM"
MOVE_ARM_Y_P = "MoveArmYP"
MOVE_ARM_Y_M = "MoveArmYM"
MOVE_ARM_Z_P = "MoveArmZP"
MOVE_ARM_Z_M = "MoveArmZM"
ROTATE_WRIST_PITCH_P = "RotateArmWristPitchP"
ROTATE_WRIST_PITCH_M = "RotateArmWristPitchM"
ROTATE_WRIST_YAW_P = "RotateArmWristYawP"
ROTATE_WRIST_YAW_M = "RotateArmWristYawM"
ROTATE_WRIST_ROLL_P = "RotateArmWristRollP"
ROTATE_WRIST_ROLL_M = "RotateArmWristRollM"
ROTATE_ELBOW_P = "RotateArmElbowP"
ROTATE_ELBOW_M = "RotateArmElbowM"
LOOK_UP = "LookUp"
LOOK_DOWN = "LookDown"
PICKUP = "PickUpMidLevel"
DROP = "DropMidLevel"
DONE = "DoneMidLevel"
ENV_ARGS = dict(
gridSize=0.25,
width=224,
height=224,
visibilityDistance=1.0,
agentMode="arm",
fieldOfView=100,
agentControllerType="mid-level",
server_class=ai2thor.fifo_server.FifoServer,
useMassThreshold=True,
massThreshold=10,
autoSimulation=False,
autoSyncTransforms=True,
)
VALID_OBJECT_LIST = [
"Knife",
"Bread",
"Fork",
"Potato",
"SoapBottle",
"Pan",
"Plate",
"Tomato",
"Egg",
"Pot",
"Spatula",
"Cup",
"Bowl",
"SaltShaker",
"PepperShaker",
"Lettuce",
"ButterKnife",
"Apple",
"DishSponge",
"Spoon",
"Mug",
]
================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_environment.py
================================================
"""A wrapper for engaging with the ManipulaTHOR environment."""
import copy
import math
import warnings
from typing import Dict, Union, Any, Optional, cast
import ai2thor.server
import numpy as np
from ai2thor.controller import Controller
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.manipulathor_plugin.armpointnav_constants import (
MOVE_ARM_HEIGHT_CONSTANT,
MOVE_ARM_CONSTANT,
UNWANTED_MOVE_THR,
DISTANCE_MAX,
)
from allenact_plugins.manipulathor_plugin.manipulathor_constants import (
ADDITIONAL_ARM_ARGS,
ARM_MIN_HEIGHT,
ARM_MAX_HEIGHT,
)
from allenact_plugins.manipulathor_plugin.manipulathor_constants import (
ROTATE_WRIST_PITCH_P,
ROTATE_WRIST_PITCH_M,
ROTATE_WRIST_YAW_P,
ROTATE_WRIST_YAW_M,
ROTATE_ELBOW_P,
ROTATE_ELBOW_M,
LOOK_UP,
LOOK_DOWN,
MOVE_AHEAD,
ROTATE_RIGHT,
ROTATE_LEFT,
PICKUP,
DONE,
)
from allenact_plugins.manipulathor_plugin.manipulathor_utils import (
reset_environment_and_additional_commands,
)
def position_distance(s1, s2, filter_nan: bool = False):
position1 = s1["position"]
position2 = s2["position"]
dist = (
(position1["x"] - position2["x"]) ** 2
+ (position1["y"] - position2["y"]) ** 2
+ (position1["z"] - position2["z"]) ** 2
) ** 0.5
if filter_nan:
dist = DISTANCE_MAX if math.isnan(dist) or dist > DISTANCE_MAX else dist
return dist
def rotation_distance(s1: Dict[str, Dict[str, float]], s2: Dict[str, Dict[str, float]]):
"""Distance between rotations."""
rotation1 = s1["rotation"]
rotation2 = s2["rotation"]
def deg_dist(d0: float, d1: float):
dist = (d0 - d1) % 360
return min(dist, 360 - dist)
return sum(deg_dist(rotation1[k], rotation2[k]) for k in ["x", "y", "z"])
class ManipulaTHOREnvironment(IThorEnvironment):
"""Wrapper for the manipulathor controller providing arm functionality and
bookkeeping.
See [here](https://ai2thor.allenai.org/documentation/installation) for comprehensive
documentation on AI2-THOR.
# Attributes
controller : The ai2thor controller.
"""
def __init__(
self,
x_display: Optional[str] = None,
docker_enabled: bool = False,
local_thor_build: Optional[str] = None,
visibility_distance: float = VISIBILITY_DISTANCE,
fov: float = FOV,
player_screen_width: int = 224,
player_screen_height: int = 224,
quality: str = "Very Low",
restrict_to_initially_reachable_points: bool = False,
make_agents_visible: bool = True,
object_open_speed: float = 1.0,
simplify_physics: bool = False,
verbose: bool = False,
env_args=None,
) -> None:
"""Initializer.
# Parameters
x_display : The x display into which to launch ai2thor (possibly necessarily if you are running on a server
without an attached display).
docker_enabled : Whether or not to run thor in a docker container (useful on a server without an attached
display so that you don't have to start an x display).
local_thor_build : The path to a local build of ai2thor. This is probably not necessary for your use case
and can be safely ignored.
visibility_distance : The distance (in meters) at which objects, in the viewport of the agent,
are considered visible by ai2thor and will have their "visible" flag be set to `True` in the metadata.
fov : The agent's camera's field of view.
width : The width resolution (in pixels) of the images returned by ai2thor.
height : The height resolution (in pixels) of the images returned by ai2thor.
quality : The quality at which to render. Possible quality settings can be found in
`ai2thor._quality_settings.QUALITY_SETTINGS`.
restrict_to_initially_reachable_points : Whether or not to restrict the agent to locations in ai2thor
that were found to be (initially) reachable by the agent (i.e. reachable by the agent after resetting
the scene). This can be useful if you want to ensure there are only a fixed set of locations where the
agent can go.
make_agents_visible : Whether or not the agent should be visible. Most noticable when there are multiple agents
or when quality settings are high so that the agent casts a shadow.
object_open_speed : How quickly objects should be opened. High speeds mean faster simulation but also mean
that opening objects have a lot of kinetic energy and can, possibly, knock other objects away.
simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object
interactions when opening drawers (when simplified, objects within a drawer do not slide around on
their own when the drawer is opened or closed, instead they are effectively glued down).
"""
self._verbose = verbose
self.env_args = env_args
del verbose
del env_args
super(ManipulaTHOREnvironment, self).__init__(
**prepare_locals_for_super(locals())
)
def create_controller(self):
controller = Controller(**self.env_args)
return controller
def start(
self,
scene_name: Optional[str],
move_mag: float = 0.25,
**kwargs,
) -> None:
"""Starts the ai2thor controller if it was previously stopped.
After starting, `reset` will be called with the scene name and move magnitude.
# Parameters
scene_name : The scene to load.
move_mag : The amount of distance the agent moves in a single `MoveAhead` step.
kwargs : additional kwargs, passed to reset.
"""
if self._started:
raise RuntimeError(
"Trying to start the environment but it is already started."
)
self.controller = self.create_controller()
self._started = True
self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs)
def reset(
self,
scene_name: Optional[str],
move_mag: float = 0.25,
**kwargs,
):
self._move_mag = move_mag
self._grid_size = self._move_mag
if scene_name is None:
scene_name = self.controller.last_event.metadata["sceneName"]
# self.reset_init_params()#**kwargs) removing this fixes one of the crashing problem
# to solve the crash issue
# TODO why do we still have this crashing problem?
try:
reset_environment_and_additional_commands(self.controller, scene_name)
except Exception as e:
print("RESETTING THE SCENE,", scene_name, "because of", str(e))
self.controller = ai2thor.controller.Controller(**self.env_args)
reset_environment_and_additional_commands(self.controller, scene_name)
if self.object_open_speed != 1.0:
self.controller.step(
{"action": "ChangeOpenSpeed", "x": self.object_open_speed}
)
self._initially_reachable_points = None
self._initially_reachable_points_set = None
self.controller.step({"action": "GetReachablePositions"})
if not self.controller.last_event.metadata["lastActionSuccess"]:
warnings.warn(
"Error when getting reachable points: {}".format(
self.controller.last_event.metadata["errorMessage"]
)
)
self._initially_reachable_points = self.last_action_return
self.list_of_actions_so_far = []
def randomize_agent_location(
self, seed: int = None, partial_position: Optional[Dict[str, float]] = None
) -> Dict:
raise NotImplementedError
def is_object_at_low_level_hand(self, object_id):
current_objects_in_hand = self.controller.last_event.metadata["arm"][
"heldObjects"
]
return object_id in current_objects_in_hand
def object_in_hand(self):
"""Object metadata for the object in the agent's hand."""
inv_objs = self.last_event.metadata["inventoryObjects"]
if len(inv_objs) == 0:
return None
elif len(inv_objs) == 1:
return self.get_object_by_id(
self.last_event.metadata["inventoryObjects"][0]["objectId"]
)
else:
raise AttributeError("Must be <= 1 inventory objects.")
@classmethod
def correct_nan_inf(cls, flawed_dict, extra_tag=""):
corrected_dict = copy.deepcopy(flawed_dict)
for k, v in corrected_dict.items():
if math.isnan(v) or math.isinf(v):
corrected_dict[k] = 0
return corrected_dict
def get_object_by_id(self, object_id: str) -> Optional[Dict[str, Any]]:
for o in self.last_event.metadata["objects"]:
if o["objectId"] == object_id:
o["position"] = self.correct_nan_inf(o["position"], "obj id")
return o
return None
def get_current_arm_state(self):
h_min = ARM_MIN_HEIGHT
h_max = ARM_MAX_HEIGHT
agent_base_location = 0.9009995460510254
event = self.controller.last_event
offset = event.metadata["agent"]["position"]["y"] - agent_base_location
h_max += offset
h_min += offset
joints = event.metadata["arm"]["joints"]
arm = joints[-1]
assert arm["name"] == "robot_arm_4_jnt"
xyz_dict = copy.deepcopy(arm["rootRelativePosition"])
height_arm = joints[0]["position"]["y"]
xyz_dict["h"] = (height_arm - h_min) / (h_max - h_min)
xyz_dict = self.correct_nan_inf(xyz_dict, "realtive hand")
return xyz_dict
def get_absolute_hand_state(self):
event = self.controller.last_event
joints = event.metadata["arm"]["joints"]
arm = copy.deepcopy(joints[-1])
assert arm["name"] == "robot_arm_4_jnt"
xyz_dict = arm["position"]
xyz_dict = self.correct_nan_inf(xyz_dict, "absolute hand")
return dict(position=xyz_dict, rotation={"x": 0, "y": 0, "z": 0})
def get_pickupable_objects(self):
event = self.controller.last_event
object_list = event.metadata["arm"]["pickupableObjects"]
return object_list
def get_current_object_locations(self):
obj_loc_dict = {}
metadata = self.controller.last_event.metadata["objects"]
for o in metadata:
obj_loc_dict[o["objectId"]] = dict(
position=o["position"],
rotation=o["rotation"],
visible=o["visible"],
)
return copy.deepcopy(obj_loc_dict)
def close_enough(self, current_obj_pose, init_obj_pose, threshold):
position_close = [
abs(current_obj_pose["position"][k] - init_obj_pose["position"][k])
<= threshold
for k in ["x", "y", "z"]
]
position_is_close = sum(position_close) == 3
return position_is_close
def get_objects_moved(
self,
previous_object_locations,
current_object_locations,
target_object_id,
thres_dict: Optional[Dict] = None,
):
moved_objects = []
scene_id = self.scene_name.split("_")[0]
for object_id in current_object_locations.keys():
if object_id == target_object_id:
continue
if object_id not in previous_object_locations:
continue
threshold = UNWANTED_MOVE_THR
if thres_dict is not None:
threshold = max(threshold, thres_dict[scene_id + "-" + object_id])
if not self.close_enough(
current_object_locations[object_id],
previous_object_locations[object_id],
threshold=threshold,
):
moved_objects.append(object_id)
return moved_objects
def get_objects_move_distance(
self,
initial_object_locations,
previous_object_locations,
current_object_locations,
target_object_id,
only_visible: bool = False,
thres_dict: Optional[Dict] = None,
):
moved_objects_position_distance = {}
scene_id = self.scene_name.split("_")[0]
for object_id in current_object_locations.keys():
if object_id == target_object_id:
continue
if object_id not in previous_object_locations:
continue
if only_visible:
# current is visible
if not current_object_locations[object_id]["visible"]:
continue
p_initial2current = position_distance(
current_object_locations[object_id],
initial_object_locations[object_id],
filter_nan=True,
)
p_initial2previous = position_distance(
previous_object_locations[object_id],
initial_object_locations[object_id],
filter_nan=True,
)
threshold = 0.0
if thres_dict is not None:
threshold = max(threshold, thres_dict[scene_id + "-" + object_id])
p_initial2current = max(0.0, p_initial2current - threshold)
p_initial2previous = max(0.0, p_initial2previous - threshold)
moved_objects_position_distance[object_id] = (
p_initial2current - p_initial2previous
)
return sum(moved_objects_position_distance.values())
def step(
self, action_dict: Dict[str, Union[str, int, float]]
) -> ai2thor.server.Event:
"""Take a step in the ai2thor environment."""
action = cast(str, action_dict["action"])
skip_render = "renderImage" in action_dict and not action_dict["renderImage"]
last_frame: Optional[np.ndarray] = None
if skip_render:
last_frame = self.current_frame
if self.simplify_physics:
action_dict["simplifyPhysics"] = True
if action in [PICKUP, DONE]:
if action == PICKUP:
object_id = action_dict["object_id"]
if not self.is_object_at_low_level_hand(object_id):
pickupable_objects = self.get_pickupable_objects()
#
if object_id in pickupable_objects:
# This version of the task is actually harder # consider making it easier, are we penalizing failed pickup? yes
self.step(dict(action="PickupObject"))
# we are doing an additional pass here, label is not right and if we fail we will do it twice
object_inventory = self.controller.last_event.metadata["arm"][
"heldObjects"
]
if (
len(object_inventory) > 0
and object_id not in object_inventory
):
self.step(dict(action="ReleaseObject"))
action_dict = {"action": "Pass"}
elif action in [MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT]:
copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)
action_dict = {**action_dict, **copy_additions}
if action in [MOVE_AHEAD]:
action_dict["action"] = "MoveAgent"
action_dict["ahead"] = 0.2
elif action in [ROTATE_RIGHT]:
action_dict["action"] = "RotateAgent"
action_dict["degrees"] = 45
elif action in [ROTATE_LEFT]:
action_dict["action"] = "RotateAgent"
action_dict["degrees"] = -45
elif "MoveArm" in action:
copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)
action_dict = {**action_dict, **copy_additions}
base_position = self.get_current_arm_state()
if "MoveArmHeight" in action:
action_dict["action"] = "MoveArmBase"
if action == "MoveArmHeightP":
base_position["h"] += MOVE_ARM_HEIGHT_CONSTANT
if action == "MoveArmHeightM":
base_position[
"h"
] -= MOVE_ARM_HEIGHT_CONSTANT # height is pretty big!
action_dict["y"] = base_position["h"]
else:
action_dict["action"] = "MoveArm"
if action == "MoveArmXP":
base_position["x"] += MOVE_ARM_CONSTANT
elif action == "MoveArmXM":
base_position["x"] -= MOVE_ARM_CONSTANT
elif action == "MoveArmYP":
base_position["y"] += MOVE_ARM_CONSTANT
elif action == "MoveArmYM":
base_position["y"] -= MOVE_ARM_CONSTANT
elif action == "MoveArmZP":
base_position["z"] += MOVE_ARM_CONSTANT
elif action == "MoveArmZM":
base_position["z"] -= MOVE_ARM_CONSTANT
action_dict["position"] = {
k: v for (k, v) in base_position.items() if k in ["x", "y", "z"]
}
elif "RotateArm" in action:
copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)
action_dict = {**action_dict, **copy_additions}
if action == ROTATE_WRIST_PITCH_P:
action_dict["action"] = "RotateWristRelative"
action_dict["pitch"] = 15
elif action == ROTATE_WRIST_PITCH_M:
action_dict["action"] = "RotateWristRelative"
action_dict["pitch"] = -15
elif action == ROTATE_WRIST_YAW_P:
action_dict["action"] = "RotateWristRelative"
action_dict["yaw"] = 15
elif action == ROTATE_WRIST_YAW_M:
action_dict["action"] = "RotateWristRelative"
action_dict["yaw"] = -15
elif action == ROTATE_ELBOW_P:
action_dict["action"] = "RotateElbowRelative"
action_dict["degrees"] = 15
elif action == ROTATE_ELBOW_M:
action_dict["action"] = "RotateElbowRelative"
action_dict["degrees"] = -15
else:
raise ValueError("invalid action " + str(action))
elif action in [LOOK_UP, LOOK_DOWN]:
copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)
action_dict = {**action_dict, **copy_additions}
if action == LOOK_UP:
action_dict["action"] = LOOK_UP
elif action == LOOK_DOWN:
action_dict["action"] = LOOK_DOWN
# there exists other actions e.g. "PlaceObjectAtPoint"
sr = self.controller.step(action_dict)
self.list_of_actions_so_far.append(action_dict)
if self._verbose:
print(self.controller.last_event)
if self.restrict_to_initially_reachable_points:
self._snap_agent_to_initially_reachable()
if skip_render:
assert last_frame is not None
self.last_event.frame = last_frame
return sr
================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_sensors.py
================================================
"""Utility classes and functions for sensory inputs used by the models."""
from typing import Any, Union, Optional
import gym
import numpy as np
from allenact.base_abstractions.sensor import Sensor
from allenact.embodiedai.sensors.vision_sensors import DepthSensor, RGBSensor
from allenact.base_abstractions.task import Task
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.manipulathor_plugin.arm_calculation_utils import (
world_coords_to_agent_coords,
state_dict_to_tensor,
diff_position,
coord_system_transform,
)
from allenact_plugins.manipulathor_plugin.manipulathor_environment import (
ManipulaTHOREnvironment,
)
class DepthSensorThor(
DepthSensor[
Union[ManipulaTHOREnvironment],
Union[Task[ManipulaTHOREnvironment]],
]
):
"""Sensor for Depth images in THOR.
Returns from a running ManipulaTHOREnvironment instance, the current
RGB frame corresponding to the agent's egocentric view.
"""
def frame_from_env(
self, env: ManipulaTHOREnvironment, task: Optional[Task]
) -> np.ndarray:
return env.controller.last_event.depth_frame.copy()
class NoVisionSensorThor(
RGBSensor[
Union[ManipulaTHOREnvironment],
Union[Task[ManipulaTHOREnvironment]],
]
):
"""Sensor for RGB images in THOR.
Returns from a running ManipulaTHOREnvironment instance, the current
RGB frame corresponding to the agent's egocentric view.
"""
def frame_from_env(
self, env: ManipulaTHOREnvironment, task: Optional[Task]
) -> np.ndarray:
return np.zeros_like(env.current_frame)
class AgentRelativeCurrentObjectStateThorSensor(Sensor):
def __init__(self, uuid: str = "relative_current_obj_state", **kwargs: Any):
observation_space = gym.spaces.Box(
low=-100, high=100, shape=(6,), dtype=np.float32
) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation(
self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
) -> Any:
object_id = task.task_info["objectId"]
current_object_state = env.get_object_by_id(object_id)
relative_current_obj = world_coords_to_agent_coords(
current_object_state, env.controller.last_event.metadata["agent"]
)
result = state_dict_to_tensor(
dict(
position=relative_current_obj["position"],
rotation=relative_current_obj["rotation"],
)
)
return result
class RelativeObjectToGoalSensor(Sensor):
def __init__(
self,
uuid: str = "relative_obj_to_goal",
coord_system: str = "xyz_unsigned",
**kwargs: Any
):
assert coord_system in [
"xyz_unsigned",
"xyz_signed",
"polar_radian",
"polar_trigo",
]
self.coord_system = coord_system
if coord_system == "polar_trigo":
obs_dim = 5
else:
obs_dim = 3
observation_space = gym.spaces.Box(
low=-100, high=100, shape=(obs_dim,), dtype=np.float32
)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation(
self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
) -> Any:
goal_obj_id = task.task_info["objectId"]
object_info = env.get_object_by_id(goal_obj_id)
target_state = task.task_info["target_location"]
agent_state = env.controller.last_event.metadata["agent"]
relative_current_obj = world_coords_to_agent_coords(object_info, agent_state)
relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)
relative_distance = diff_position(
relative_current_obj,
relative_goal_state,
absolute=False,
)
result = coord_system_transform(relative_distance, self.coord_system)
return result
class InitialObjectToGoalSensor(Sensor):
def __init__(self, uuid: str = "initial_obj_to_goal", **kwargs: Any):
# observation_space = gym.spaces.Discrete(len(self.detector_types))
observation_space = gym.spaces.Box(
low=-100, high=100, shape=(3,), dtype=np.float32
) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation(
self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
) -> Any:
object_source_location = task.task_info["initial_object_location"]
target_state = task.task_info["target_location"]
agent_state = task.task_info["agent_initial_state"]
relative_current_obj = world_coords_to_agent_coords(
object_source_location, agent_state
)
relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)
relative_distance = diff_position(relative_current_obj, relative_goal_state)
result = state_dict_to_tensor(dict(position=relative_distance))
return result
class DistanceObjectToGoalSensor(Sensor):
def __init__(self, uuid: str = "distance_obj_to_goal", **kwargs: Any):
# observation_space = gym.spaces.Discrete(len(self.detector_types))
observation_space = gym.spaces.Box(
low=-100, high=100, shape=(3,), dtype=np.float32
) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation(
self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
) -> Any:
goal_obj_id = task.task_info["objectId"]
object_info = env.get_object_by_id(goal_obj_id)
target_state = task.task_info["target_location"]
agent_state = env.controller.last_event.metadata["agent"]
relative_current_obj = world_coords_to_agent_coords(object_info, agent_state)
relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)
relative_distance = diff_position(relative_current_obj, relative_goal_state)
result = state_dict_to_tensor(dict(position=relative_distance))
result = ((result**2).sum() ** 0.5).view(1)
return result
class RelativeAgentArmToObjectSensor(Sensor):
def __init__(
self,
uuid: str = "relative_agent_arm_to_obj",
coord_system: str = "xyz_unsigned",
**kwargs: Any
):
assert coord_system in [
"xyz_unsigned",
"xyz_signed",
"polar_radian",
"polar_trigo",
]
self.coord_system = coord_system
if coord_system == "polar_trigo":
obs_dim = 5
else:
obs_dim = 3
observation_space = gym.spaces.Box(
low=-100, high=100, shape=(obs_dim,), dtype=np.float32
)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation(
self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
) -> Any:
goal_obj_id = task.task_info["objectId"]
object_info = env.get_object_by_id(goal_obj_id)
hand_state = env.get_absolute_hand_state()
relative_goal_obj = world_coords_to_agent_coords(
object_info, env.controller.last_event.metadata["agent"]
)
relative_hand_state = world_coords_to_agent_coords(
hand_state, env.controller.last_event.metadata["agent"]
)
relative_distance = diff_position(
relative_goal_obj,
relative_hand_state,
absolute=False,
)
result = coord_system_transform(relative_distance, self.coord_system)
return result
class InitialAgentArmToObjectSensor(Sensor):
def __init__(self, uuid: str = "initial_agent_arm_to_obj", **kwargs: Any):
observation_space = gym.spaces.Box(
low=-100, high=100, shape=(3,), dtype=np.float32
) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation(
self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
) -> Any:
object_source_location = task.task_info["initial_object_location"]
initial_hand_state = task.task_info["initial_hand_state"]
relative_goal_obj = world_coords_to_agent_coords(
object_source_location, env.controller.last_event.metadata["agent"]
)
relative_hand_state = world_coords_to_agent_coords(
initial_hand_state, env.controller.last_event.metadata["agent"]
)
relative_distance = diff_position(relative_goal_obj, relative_hand_state)
result = state_dict_to_tensor(dict(position=relative_distance))
return result
class DistanceAgentArmToObjectSensor(Sensor):
def __init__(self, uuid: str = "distance_agent_arm_to_obj", **kwargs: Any):
observation_space = gym.spaces.Box(
low=-100, high=100, shape=(3,), dtype=np.float32
) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation(
self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
) -> Any:
goal_obj_id = task.task_info["objectId"]
object_info = env.get_object_by_id(goal_obj_id)
hand_state = env.get_absolute_hand_state()
relative_goal_obj = world_coords_to_agent_coords(
object_info, env.controller.last_event.metadata["agent"]
)
relative_hand_state = world_coords_to_agent_coords(
hand_state, env.controller.last_event.metadata["agent"]
)
relative_distance = diff_position(relative_goal_obj, relative_hand_state)
result = state_dict_to_tensor(dict(position=relative_distance))
result = ((result**2).sum() ** 0.5).view(1)
return result
class PickedUpObjSensor(Sensor):
def __init__(self, uuid: str = "pickedup_object", **kwargs: Any):
observation_space = gym.spaces.Box(
low=0, high=1, shape=(1,), dtype=np.float32
) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
super().__init__(**prepare_locals_for_super(locals()))
def get_observation(
self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
) -> Any:
return task.object_picked_up
================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.py
================================================
"""Task Samplers for the task of ArmPointNav."""
import json
import random
from typing import List, Dict, Optional, Any, Union
import gym
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import set_deterministic_cudnn, set_seed
from allenact_plugins.manipulathor_plugin.manipulathor_environment import (
ManipulaTHOREnvironment,
)
from allenact_plugins.manipulathor_plugin.manipulathor_tasks import (
AbstractPickUpDropOffTask,
ArmPointNavTask,
RotateArmPointNavTask,
CamRotateArmPointNavTask,
EasyArmPointNavTask,
)
from allenact_plugins.manipulathor_plugin.manipulathor_utils import (
transport_wrapper,
initialize_arm,
)
from allenact_plugins.manipulathor_plugin.manipulathor_viz import (
ImageVisualizer,
LoggerVisualizer,
)
class AbstractMidLevelArmTaskSampler(TaskSampler):
_TASK_TYPE = Task
def __init__(
self,
scenes: List[str],
sensors: List[Sensor],
max_steps: int,
env_args: Dict[str, Any],
action_space: gym.Space,
rewards_config: Dict,
objects: List[str],
scene_period: Optional[Union[int, str]] = None,
max_tasks: Optional[int] = None,
num_task_per_scene: Optional[int] = None,
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
fixed_tasks: Optional[List[Dict[str, Any]]] = None,
visualizers: Optional[List[LoggerVisualizer]] = None,
*args,
**kwargs
) -> None:
self.rewards_config = rewards_config
self.env_args = env_args
self.scenes = scenes
self.grid_size = 0.25
self.env: Optional[ManipulaTHOREnvironment] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_space = action_space
self.objects = objects
self.num_task_per_scene = num_task_per_scene
self.scene_counter: Optional[int] = None
self.scene_order: Optional[List[str]] = None
self.scene_id: Optional[int] = None
self.scene_period: Optional[Union[str, int]] = (
scene_period # default makes a random choice
)
self.max_tasks: Optional[int] = None
self.reset_tasks = max_tasks
self._last_sampled_task: Optional[Task] = None
self.seed: Optional[int] = None
self.set_seed(seed)
if deterministic_cudnn:
set_deterministic_cudnn()
self.reset()
self.visualizers = visualizers if visualizers is not None else []
self.sampler_mode = kwargs["sampler_mode"]
self.cap_training = kwargs["cap_training"]
def _create_environment(self, **kwargs) -> ManipulaTHOREnvironment:
env = ManipulaTHOREnvironment(
make_agents_visible=False,
object_open_speed=0.05,
env_args=self.env_args,
)
return env
@property
def last_sampled_task(self) -> Optional[Task]:
return self._last_sampled_task
def close(self) -> None:
if self.env is not None:
self.env.stop()
@property
def all_observation_spaces_equal(self) -> bool:
"""Check if observation spaces equal.
# Returns
True if all Tasks that can be sampled by this sampler have the
same observation space. Otherwise False.
"""
return True
def reset(self):
self.scene_counter = 0
self.scene_order = list(range(len(self.scenes)))
random.shuffle(self.scene_order)
self.scene_id = 0
self.sampler_index = 0
self.max_tasks = self.reset_tasks
def set_seed(self, seed: int):
self.seed = seed
if seed is not None:
set_seed(seed)
class SimpleArmPointNavGeneralSampler(AbstractMidLevelArmTaskSampler):
_TASK_TYPE = AbstractPickUpDropOffTask
def __init__(self, **kwargs) -> None:
super(SimpleArmPointNavGeneralSampler, self).__init__(**kwargs)
self.all_possible_points = []
for scene in self.scenes:
for object in self.objects:
valid_position_adr = "datasets/apnd-dataset/valid_object_positions/valid_{}_positions_in_{}.json".format(
object, scene
)
try:
with open(valid_position_adr) as f:
data_points = json.load(f)
except Exception:
print("Failed to load", valid_position_adr)
continue
visible_data = [
data for data in data_points[scene] if data["visibility"]
]
self.all_possible_points += visible_data
self.countertop_object_to_data_id = self.calc_possible_trajectories(
self.all_possible_points
)
scene_names = set(
[
self.all_possible_points[counter[0]]["scene_name"]
for counter in self.countertop_object_to_data_id.values()
if len(counter) > 1
]
)
if len(set(scene_names)) < len(self.scenes):
print("Not all scenes appear")
print(
"Len dataset",
len(self.all_possible_points),
"total_remained",
sum([len(v) for v in self.countertop_object_to_data_id.values()]),
)
if (
self.sampler_mode != "train"
): # Be aware that this totally overrides some stuff
self.deterministic_data_list = []
for scene in self.scenes:
for object in self.objects:
valid_position_adr = "datasets/apnd-dataset/deterministic_tasks/tasks_{}_positions_in_{}.json".format(
object, scene
)
try:
with open(valid_position_adr) as f:
data_points = json.load(f)
except Exception:
print("Failed to load", valid_position_adr)
continue
visible_data = [
dict(scene=scene, index=i, datapoint=data)
for (i, data) in enumerate(data_points[scene])
]
if self.num_task_per_scene is None:
self.deterministic_data_list += visible_data
else: # select a small number of data points for fast evaluation
self.deterministic_data_list += visible_data[
: min(self.num_task_per_scene, len(visible_data))
]
if self.sampler_mode == "test":
random.shuffle(self.deterministic_data_list)
self.max_tasks = self.reset_tasks = len(self.deterministic_data_list)
def next_task(
self, force_advance_scene: bool = False
) -> Optional[AbstractPickUpDropOffTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
return None
if self.sampler_mode != "train" and self.length <= 0:
return None
source_data_point, target_data_point = self.get_source_target_indices()
scene = source_data_point["scene_name"]
assert source_data_point["object_id"] == target_data_point["object_id"]
assert source_data_point["scene_name"] == target_data_point["scene_name"]
if self.env is None:
self.env = self._create_environment()
self.env.reset(
scene_name=scene, agentMode="arm", agentControllerType="mid-level"
)
initialize_arm(self.env.controller)
source_location = source_data_point
target_location = dict(
position=target_data_point["object_location"],
rotation={"x": 0, "y": 0, "z": 0},
)
task_info = {
"objectId": source_location["object_id"],
"countertop_id": source_location["countertop_id"],
"source_location": source_location,
"target_location": target_location,
}
this_controller = self.env
transport_wrapper(
this_controller,
source_location["object_id"],
source_location["object_location"],
)
agent_state = source_location["agent_pose"]
this_controller.step(
dict(
action="TeleportFull",
standing=True,
x=agent_state["position"]["x"],
y=agent_state["position"]["y"],
z=agent_state["position"]["z"],
rotation=dict(
x=agent_state["rotation"]["x"],
y=agent_state["rotation"]["y"],
z=agent_state["rotation"]["z"],
),
horizon=agent_state["cameraHorizon"],
)
)
should_visualize_goal_start = [
x for x in self.visualizers if issubclass(type(x), ImageVisualizer)
]
if len(should_visualize_goal_start) > 0:
task_info["visualization_source"] = source_data_point
task_info["visualization_target"] = target_data_point
self._last_sampled_task = self._TASK_TYPE(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
visualizers=self.visualizers,
reward_configs=self.rewards_config,
)
return self._last_sampled_task
@property
def total_unique(self) -> Optional[Union[int, float]]:
if self.sampler_mode == "train":
return None
else:
return min(self.max_tasks, len(self.deterministic_data_list))
@property
def length(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled. Can be float('inf').
"""
return (
self.total_unique - self.sampler_index
if self.sampler_mode != "train"
else (float("inf") if self.max_tasks is None else self.max_tasks)
)
def get_source_target_indices(self):
if self.sampler_mode == "train":
valid_countertops = [
k for (k, v) in self.countertop_object_to_data_id.items() if len(v) > 1
]
countertop_id = random.choice(valid_countertops)
indices = random.sample(self.countertop_object_to_data_id[countertop_id], 2)
result = (
self.all_possible_points[indices[0]],
self.all_possible_points[indices[1]],
)
else:
result = self.deterministic_data_list[self.sampler_index]["datapoint"]
self.sampler_index += 1
return result
def calc_possible_trajectories(self, all_possible_points):
object_to_data_id = {}
for i in range(len(all_possible_points)):
object_id = all_possible_points[i]["object_id"]
object_to_data_id.setdefault(object_id, [])
object_to_data_id[object_id].append(i)
return object_to_data_id
class ArmPointNavTaskSampler(SimpleArmPointNavGeneralSampler):
_TASK_TYPE = ArmPointNavTask
def __init__(self, **kwargs) -> None:
super(ArmPointNavTaskSampler, self).__init__(**kwargs)
possible_initial_locations = (
"datasets/apnd-dataset/valid_agent_initial_locations.json"
)
if self.sampler_mode == "test":
possible_initial_locations = (
"datasets/apnd-dataset/deterministic_valid_agent_initial_locations.json"
)
with open(possible_initial_locations) as f:
self.possible_agent_reachable_poses = json.load(f)
def next_task(
self, force_advance_scene: bool = False
) -> Optional[AbstractPickUpDropOffTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
return None
if self.sampler_mode != "train" and self.length <= 0:
return None
source_data_point, target_data_point = self.get_source_target_indices()
scene = source_data_point["scene_name"]
assert source_data_point["object_id"] == target_data_point["object_id"]
assert source_data_point["scene_name"] == target_data_point["scene_name"]
if self.env is None:
self.env = self._create_environment()
self.env.reset(
scene_name=scene, agentMode="arm", agentControllerType="mid-level"
)
initialize_arm(self.env.controller)
source_location = source_data_point
target_location = dict(
position=target_data_point["object_location"],
rotation={"x": 0, "y": 0, "z": 0},
countertop_id=target_data_point["countertop_id"],
)
this_controller = self.env
transport_wrapper(
this_controller,
source_location["object_id"],
source_location["object_location"],
)
agent_state = source_location[
"initial_agent_pose"
] # THe only line different from father
this_controller.step(
dict(
action="TeleportFull",
standing=True,
x=agent_state["position"]["x"],
y=agent_state["position"]["y"],
z=agent_state["position"]["z"],
rotation=dict(
x=agent_state["rotation"]["x"],
y=agent_state["rotation"]["y"],
z=agent_state["rotation"]["z"],
),
horizon=agent_state["cameraHorizon"],
)
)
should_visualize_goal_start = [
x for x in self.visualizers if issubclass(type(x), ImageVisualizer)
]
initial_object_info = self.env.get_object_by_id(source_location["object_id"])
initial_agent_location = self.env.controller.last_event.metadata["agent"]
initial_hand_state = self.env.get_absolute_hand_state()
task_info = {
"objectId": source_location["object_id"],
"source_location": source_location, # used in analysis
"target_location": target_location, # used in analysis
"agent_initial_state": initial_agent_location, # not used
"initial_object_location": initial_object_info, # not used
"initial_hand_state": initial_hand_state,
}
if len(should_visualize_goal_start) > 0:
task_info["visualization_source"] = source_data_point
task_info["visualization_target"] = target_data_point
self._last_sampled_task = self._TASK_TYPE(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
visualizers=self.visualizers,
reward_configs=self.rewards_config,
)
return self._last_sampled_task
def get_source_target_indices(self):
if self.sampler_mode == "train":
valid_countertops = [
k for (k, v) in self.countertop_object_to_data_id.items() if len(v) > 1
]
countertop_id = random.choice(valid_countertops)
indices = random.sample(self.countertop_object_to_data_id[countertop_id], 2)
result = (
self.all_possible_points[indices[0]],
self.all_possible_points[indices[1]],
)
scene_name = result[0]["scene_name"]
selected_agent_init_loc = random.choice(
self.possible_agent_reachable_poses[scene_name]
)
initial_agent_pose = {
"name": "agent",
"position": {
"x": selected_agent_init_loc["x"],
"y": selected_agent_init_loc["y"],
"z": selected_agent_init_loc["z"],
},
"rotation": {
"x": -0.0,
"y": selected_agent_init_loc["rotation"],
"z": 0.0,
},
"cameraHorizon": selected_agent_init_loc["horizon"],
"isStanding": True,
}
result[0]["initial_agent_pose"] = initial_agent_pose
else: # agent init location needs to be fixed, therefore we load a fixed valid agent init that is previously randomized
result = self.deterministic_data_list[self.sampler_index]["datapoint"]
scene_name = self.deterministic_data_list[self.sampler_index]["scene"]
datapoint_original_index = self.deterministic_data_list[self.sampler_index][
"index"
]
selected_agent_init_loc = self.possible_agent_reachable_poses[scene_name][
datapoint_original_index
]
initial_agent_pose = {
"name": "agent",
"position": {
"x": selected_agent_init_loc["x"],
"y": selected_agent_init_loc["y"],
"z": selected_agent_init_loc["z"],
},
"rotation": {
"x": -0.0,
"y": selected_agent_init_loc["rotation"],
"z": 0.0,
},
"cameraHorizon": selected_agent_init_loc["horizon"],
"isStanding": True,
}
result[0]["initial_agent_pose"] = initial_agent_pose
self.sampler_index += 1
return result
class RotateArmPointNavTaskSampler(ArmPointNavTaskSampler):
_TASK_TYPE = RotateArmPointNavTask
class CamRotateArmPointNavTaskSampler(ArmPointNavTaskSampler):
_TASK_TYPE = CamRotateArmPointNavTask
class EasyArmPointNavTaskSampler(ArmPointNavTaskSampler):
_TASK_TYPE = EasyArmPointNavTask
def get_all_tuples_from_list(list):
result = []
for first_ind in range(len(list) - 1):
for second_ind in range(first_ind + 1, len(list)):
result.append([list[first_ind], list[second_ind]])
return result
================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_tasks.py
================================================
"""Task Definions for the task of ArmPointNav."""
import copy
from typing import Dict, Tuple, List, Any, Optional
import gym
import numpy as np
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact_plugins.manipulathor_plugin.armpointnav_constants import (
MOVE_ARM_CONSTANT,
DISTANCE_EPS,
)
from allenact_plugins.manipulathor_plugin.manipulathor_constants import (
MOVE_ARM_HEIGHT_P,
MOVE_ARM_HEIGHT_M,
MOVE_ARM_X_P,
MOVE_ARM_X_M,
MOVE_ARM_Y_P,
MOVE_ARM_Y_M,
MOVE_ARM_Z_P,
MOVE_ARM_Z_M,
ROTATE_WRIST_PITCH_P,
ROTATE_WRIST_PITCH_M,
ROTATE_WRIST_YAW_P,
ROTATE_WRIST_YAW_M,
ROTATE_ELBOW_P,
ROTATE_ELBOW_M,
LOOK_UP,
LOOK_DOWN,
MOVE_AHEAD,
ROTATE_RIGHT,
ROTATE_LEFT,
PICKUP,
DONE,
)
from allenact_plugins.manipulathor_plugin.manipulathor_environment import (
ManipulaTHOREnvironment,
position_distance,
)
from allenact_plugins.manipulathor_plugin.manipulathor_viz import LoggerVisualizer
class AbstractPickUpDropOffTask(Task[ManipulaTHOREnvironment]):
_actions = (
MOVE_ARM_HEIGHT_P,
MOVE_ARM_HEIGHT_M,
MOVE_ARM_X_P,
MOVE_ARM_X_M,
MOVE_ARM_Y_P,
MOVE_ARM_Y_M,
MOVE_ARM_Z_P,
MOVE_ARM_Z_M,
MOVE_AHEAD,
ROTATE_RIGHT,
ROTATE_LEFT,
)
# New commit of AI2THOR has some issue that the objects will vibrate a bit
# without any external force. To eliminate the vibration effect, we have to
# introduce _vibration_dist_dict when checking the disturbance, from an external csv file.
# By default it is None, i.e. we assume there is no vibration.
_vibration_dist_dict: Optional[Dict] = None
def __init__(
self,
env: ManipulaTHOREnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
visualizers: Optional[List[LoggerVisualizer]] = None,
**kwargs
) -> None:
"""Initializer.
See class documentation for parameter definitions.
"""
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self._took_end_action: bool = False
self._success: Optional[bool] = False
self._subsampled_locations_from_which_obj_visible: Optional[
List[Tuple[float, float, int, int]]
] = None
self.visualizers = visualizers if visualizers is not None else []
self.start_visualize()
self.action_sequence_and_success = []
self._took_end_action: bool = False
self._success: Optional[bool] = False
self._subsampled_locations_from_which_obj_visible: Optional[
List[Tuple[float, float, int, int]]
] = None
# in allenact initialization is with 0.2
self.last_obj_to_goal_distance = None
self.last_arm_to_obj_distance = None
self.object_picked_up = False
self.got_reward_for_pickup = False
self.reward_configs = kwargs["reward_configs"]
self.initial_object_locations = self.env.get_current_object_locations()
@property
def action_space(self):
return gym.spaces.Discrete(len(self._actions))
def reached_terminal_state(self) -> bool:
return self._took_end_action
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return cls._actions
def close(self) -> None:
self.env.stop()
def obj_state_aproximity(self, s1, s2):
# KIANA ignore rotation for now
position1 = s1["position"]
position2 = s2["position"]
eps = MOVE_ARM_CONSTANT * 2
return (
abs(position1["x"] - position2["x"]) < eps
and abs(position1["y"] - position2["y"]) < eps
and abs(position1["z"] - position2["z"]) < eps
)
def start_visualize(self):
for visualizer in self.visualizers:
if not visualizer.is_empty():
print("OH NO VISUALIZER WAS NOT EMPTY")
visualizer.finish_episode(self.env, self, self.task_info)
visualizer.finish_episode_metrics(self, self.task_info, None)
visualizer.log(self.env)
def visualize(self, action_str):
for vizualizer in self.visualizers:
vizualizer.log(self.env, action_str)
def finish_visualizer(self):
for visualizer in self.visualizers:
visualizer.finish_episode(self.env, self, self.task_info)
def finish_visualizer_metrics(self, metric_results):
for visualizer in self.visualizers:
visualizer.finish_episode_metrics(self, self.task_info, metric_results)
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
assert mode == "rgb", "only rgb rendering is implemented"
return self.env.current_frame
def calc_action_stat_metrics(self) -> Dict[str, Any]:
action_stat = {"action_stat/" + action_str: 0.0 for action_str in self._actions}
action_success_stat = {
"action_success/" + action_str: 0.0 for action_str in self._actions
}
action_success_stat["action_success/total"] = 0.0
seq_len = len(self.action_sequence_and_success)
for action_name, action_success in self.action_sequence_and_success:
action_stat["action_stat/" + action_name] += 1.0
action_success_stat[
"action_success/{}".format(action_name)
] += action_success
action_success_stat["action_success/total"] += action_success
action_success_stat["action_success/total"] /= seq_len
for action_name in self._actions:
action_success_stat["action_success/{}".format(action_name)] /= max(
action_stat["action_stat/" + action_name], 1.0
)
action_stat["action_stat/" + action_name] /= seq_len
result = {**action_stat, **action_success_stat}
return result
def metrics(self) -> Dict[str, Any]:
result = super(AbstractPickUpDropOffTask, self).metrics()
if self.is_done():
result = {**result, **self.calc_action_stat_metrics()}
# 1. goal object metrics
final_obj_distance_from_goal = self.obj_distance_from_goal()
result["average/final_obj_distance_from_goal"] = (
final_obj_distance_from_goal
)
final_arm_distance_from_obj = self.arm_distance_from_obj()
result["average/final_arm_distance_from_obj"] = final_arm_distance_from_obj
final_obj_pickup = 1 if self.object_picked_up else 0
result["average/final_obj_pickup"] = final_obj_pickup
original_distance = self.get_original_object_distance() + DISTANCE_EPS
result["average/original_distance"] = original_distance
# this ratio can be more than 1
if self.object_picked_up:
ratio_distance_left = final_obj_distance_from_goal / original_distance
result["average/ratio_distance_left"] = ratio_distance_left
result["average/eplen_pickup"] = self.eplen_pickup
# 2. disturbance with other objects
current_object_locations = self.env.get_current_object_locations()
objects_moved = self.env.get_objects_moved(
self.initial_object_locations,
current_object_locations,
self.task_info["objectId"],
self._vibration_dist_dict,
)
result["disturbance/objects_moved_num"] = len(objects_moved)
# 3. conditioned on success
if self._success:
result["average/eplen_success"] = result["ep_length"]
result["average/success_wo_disturb"] = len(objects_moved) == 0
else:
result["average/success_wo_disturb"] = 0.0
result["success"] = self._success
self.finish_visualizer_metrics(result)
self.finish_visualizer()
self.action_sequence_and_success = []
return result
def _step(self, action: int) -> RLStepResult:
raise Exception("Not implemented")
def arm_distance_from_obj(self):
goal_obj_id = self.task_info["objectId"]
object_info = self.env.get_object_by_id(goal_obj_id)
hand_state = self.env.get_absolute_hand_state()
return position_distance(object_info, hand_state)
def obj_distance_from_goal(self):
goal_obj_id = self.task_info["objectId"]
object_info = self.env.get_object_by_id(goal_obj_id)
goal_state = self.task_info["target_location"]
return position_distance(object_info, goal_state)
def get_original_object_distance(self):
goal_obj_id = self.task_info["objectId"]
s_init = dict(position=self.task_info["source_location"]["object_location"])
current_location = self.env.get_object_by_id(goal_obj_id)
original_object_distance = position_distance(s_init, current_location)
return original_object_distance
def judge(self) -> float:
"""Compute the reward after having taken a step."""
raise Exception("Not implemented")
class ArmPointNavTask(AbstractPickUpDropOffTask):
_actions = (
MOVE_ARM_HEIGHT_P,
MOVE_ARM_HEIGHT_M,
MOVE_ARM_X_P,
MOVE_ARM_X_M,
MOVE_ARM_Y_P,
MOVE_ARM_Y_M,
MOVE_ARM_Z_P,
MOVE_ARM_Z_M,
MOVE_AHEAD,
ROTATE_RIGHT,
ROTATE_LEFT,
PICKUP,
DONE,
)
def __init__(
self,
env: ManipulaTHOREnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
visualizers: Optional[List[LoggerVisualizer]] = None,
**kwargs
) -> None:
super().__init__(
env=env,
sensors=sensors,
task_info=task_info,
max_steps=max_steps,
visualizers=visualizers,
**kwargs
)
self.cumulated_disturb_distance_all = 0.0
self.cumulated_disturb_distance_visible = 0.0
# NOTE: visible distance can be negative, no determinitic relation with
# all distance
self.previous_object_locations = copy.deepcopy(self.initial_object_locations)
self.current_penalized_distance = 0.0 # used in Sensor for auxiliary task
def metrics(self) -> Dict[str, Any]:
result = super(ArmPointNavTask, self).metrics()
if self.is_done():
# add disturbance distance metrics
result["disturbance/objects_moved_distance"] = (
self.cumulated_disturb_distance_all
)
result["disturbance/objects_moved_distance_vis"] = (
self.cumulated_disturb_distance_visible
)
return result
def visualize(self, **kwargs):
for vizualizer in self.visualizers:
vizualizer.log(self.env, **kwargs)
def _step(self, action: int) -> RLStepResult:
action_str = self.class_action_names()[action]
self._last_action_str = action_str
action_dict = {"action": action_str}
object_id = self.task_info["objectId"]
if action_str == PICKUP:
action_dict = {**action_dict, "object_id": object_id}
self.env.step(action_dict)
self.last_action_success = self.env.last_action_success
last_action_name = self._last_action_str
last_action_success = float(self.last_action_success)
self.action_sequence_and_success.append((last_action_name, last_action_success))
# If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward
if not self.object_picked_up:
if self.env.is_object_at_low_level_hand(object_id):
self.object_picked_up = True
self.eplen_pickup = (
self._num_steps_taken + 1
) # plus one because this step has not been counted yet
if action_str == DONE:
self._took_end_action = True
object_state = self.env.get_object_by_id(object_id)
goal_state = self.task_info["target_location"]
goal_achieved = self.object_picked_up and self.obj_state_aproximity(
object_state, goal_state
)
self.last_action_success = goal_achieved
self._success = goal_achieved
step_result = RLStepResult(
observation=self.get_observations(),
reward=self.judge(),
done=self.is_done(),
info={"last_action_success": self.last_action_success},
)
return step_result
def judge(self) -> float:
"""Compute the reward after having taken a step."""
reward = self.reward_configs["step_penalty"]
if not self.last_action_success or (
self._last_action_str == PICKUP and not self.object_picked_up
):
reward += self.reward_configs["failed_action_penalty"]
if self._took_end_action:
reward += (
self.reward_configs["goal_success_reward"]
if self._success
else self.reward_configs["failed_stop_reward"]
)
# increase reward if object pickup and only do it once
if not self.got_reward_for_pickup and self.object_picked_up:
reward += self.reward_configs["pickup_success_reward"]
self.got_reward_for_pickup = True
current_obj_to_arm_distance = self.arm_distance_from_obj()
if self.last_arm_to_obj_distance is None:
delta_arm_to_obj_distance_reward = 0
else:
delta_arm_to_obj_distance_reward = (
self.last_arm_to_obj_distance - current_obj_to_arm_distance
)
self.last_arm_to_obj_distance = current_obj_to_arm_distance
reward += delta_arm_to_obj_distance_reward
current_obj_to_goal_distance = self.obj_distance_from_goal()
if self.last_obj_to_goal_distance is None:
delta_obj_to_goal_distance_reward = 0
else:
delta_obj_to_goal_distance_reward = (
self.last_obj_to_goal_distance - current_obj_to_goal_distance
)
self.last_obj_to_goal_distance = current_obj_to_goal_distance
reward += delta_obj_to_goal_distance_reward
# add disturbance cost
## here we measure disturbance by the sum of moving distance of all objects
## note that collided object may move for a while wo external force due to inertia
## and we may also consider mass
current_object_locations = self.env.get_current_object_locations()
disturb_distance_visible = self.env.get_objects_move_distance(
initial_object_locations=self.initial_object_locations,
previous_object_locations=self.previous_object_locations,
current_object_locations=current_object_locations,
target_object_id=self.task_info["objectId"],
only_visible=True,
thres_dict=self._vibration_dist_dict,
)
disturb_distance_all = self.env.get_objects_move_distance(
initial_object_locations=self.initial_object_locations,
previous_object_locations=self.previous_object_locations,
current_object_locations=current_object_locations,
target_object_id=self.task_info["objectId"],
only_visible=False,
thres_dict=self._vibration_dist_dict,
)
self.cumulated_disturb_distance_all += disturb_distance_all
self.cumulated_disturb_distance_visible += disturb_distance_visible
penalized_distance = (
disturb_distance_visible
if self.reward_configs["disturb_visible"]
else disturb_distance_all
)
reward += self.reward_configs["disturb_penalty"] * penalized_distance
self.current_penalized_distance = penalized_distance
self.previous_object_locations = current_object_locations
self.visualize(
action_str=self._last_action_str,
disturbance_str=str(round(penalized_distance, 4)),
)
return float(reward)
class RotateArmPointNavTask(ArmPointNavTask):
_actions = (
MOVE_ARM_HEIGHT_P,
MOVE_ARM_HEIGHT_M,
MOVE_ARM_X_P,
MOVE_ARM_X_M,
MOVE_ARM_Y_P,
MOVE_ARM_Y_M,
MOVE_ARM_Z_P,
MOVE_ARM_Z_M,
ROTATE_WRIST_PITCH_P,
ROTATE_WRIST_PITCH_M,
ROTATE_WRIST_YAW_P,
ROTATE_WRIST_YAW_M,
ROTATE_ELBOW_P,
ROTATE_ELBOW_M,
MOVE_AHEAD,
ROTATE_RIGHT,
ROTATE_LEFT,
PICKUP,
DONE,
)
class CamRotateArmPointNavTask(ArmPointNavTask):
_actions = (
MOVE_ARM_HEIGHT_P,
MOVE_ARM_HEIGHT_M,
MOVE_ARM_X_P,
MOVE_ARM_X_M,
MOVE_ARM_Y_P,
MOVE_ARM_Y_M,
MOVE_ARM_Z_P,
MOVE_ARM_Z_M,
ROTATE_WRIST_PITCH_P,
ROTATE_WRIST_PITCH_M,
ROTATE_WRIST_YAW_P,
ROTATE_WRIST_YAW_M,
ROTATE_ELBOW_P,
ROTATE_ELBOW_M,
LOOK_UP,
LOOK_DOWN,
MOVE_AHEAD,
ROTATE_RIGHT,
ROTATE_LEFT,
PICKUP,
DONE,
)
class EasyArmPointNavTask(ArmPointNavTask):
_actions = (
MOVE_ARM_HEIGHT_P,
MOVE_ARM_HEIGHT_M,
MOVE_ARM_X_P,
MOVE_ARM_X_M,
MOVE_ARM_Y_P,
MOVE_ARM_Y_M,
MOVE_ARM_Z_P,
MOVE_ARM_Z_M,
MOVE_AHEAD,
ROTATE_RIGHT,
ROTATE_LEFT,
# PICKUP,
# DONE,
)
def _step(self, action: int) -> RLStepResult:
action_str = self.class_action_names()[action]
self._last_action_str = action_str
action_dict = {"action": action_str}
object_id = self.task_info["objectId"]
if action_str == PICKUP:
action_dict = {**action_dict, "object_id": object_id}
self.env.step(action_dict)
self.last_action_success = self.env.last_action_success
last_action_name = self._last_action_str
last_action_success = float(self.last_action_success)
self.action_sequence_and_success.append((last_action_name, last_action_success))
self.visualize(last_action_name)
# If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward
if not self.object_picked_up:
if (
object_id
in self.env.controller.last_event.metadata["arm"]["pickupableObjects"]
):
self.env.step(dict(action="PickupObject"))
# we are doing an additional pass here, label is not right and if we fail we will do it twice
object_inventory = self.env.controller.last_event.metadata["arm"][
"heldObjects"
]
if len(object_inventory) > 0 and object_id not in object_inventory:
self.env.step(dict(action="ReleaseObject"))
if self.env.is_object_at_low_level_hand(object_id):
self.object_picked_up = True
self.eplen_pickup = (
self._num_steps_taken + 1
) # plus one because this step has not been counted yet
if self.object_picked_up:
object_state = self.env.get_object_by_id(object_id)
goal_state = self.task_info["target_location"]
goal_achieved = self.object_picked_up and self.obj_state_aproximity(
object_state, goal_state
)
if goal_achieved:
self._took_end_action = True
self.last_action_success = goal_achieved
self._success = goal_achieved
step_result = RLStepResult(
observation=self.get_observations(),
reward=self.judge(),
done=self.is_done(),
info={"last_action_success": self.last_action_success},
)
return step_result
# def judge(self) -> float: Seems like we are fine on this
================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_utils.py
================================================
import ai2thor
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.manipulathor_plugin.armpointnav_constants import (
get_agent_start_positions,
)
from allenact_plugins.manipulathor_plugin.manipulathor_constants import (
ADDITIONAL_ARM_ARGS,
)
def make_all_objects_unbreakable(controller):
all_breakable_objects = [
o["objectType"]
for o in controller.last_event.metadata["objects"]
if o["breakable"] is True
]
all_breakable_objects = set(all_breakable_objects)
for obj_type in all_breakable_objects:
controller.step(action="MakeObjectsOfTypeUnbreakable", objectType=obj_type)
def reset_environment_and_additional_commands(controller, scene_name):
controller.reset(scene_name)
controller.step(action="MakeAllObjectsMoveable")
controller.step(action="MakeObjectsStaticKinematicMassThreshold")
make_all_objects_unbreakable(controller)
return
def transport_wrapper(controller, target_object, target_location):
transport_detail = dict(
action="PlaceObjectAtPoint",
objectId=target_object,
position=target_location,
forceKinematic=True,
)
advance_detail = dict(action="AdvancePhysicsStep", simSeconds=1.0)
if issubclass(type(controller), IThorEnvironment):
event = controller.step(transport_detail)
controller.step(advance_detail)
elif type(controller) == ai2thor.controller.Controller:
event = controller.step(**transport_detail)
controller.step(**advance_detail)
else:
raise NotImplementedError
return event
def initialize_arm(controller):
# for start arm from high up,
scene = controller.last_event.metadata["sceneName"]
initial_pose = get_agent_start_positions()[scene]
event1 = controller.step(
dict(
action="TeleportFull",
standing=True,
x=initial_pose["x"],
y=initial_pose["y"],
z=initial_pose["z"],
rotation=dict(x=0, y=initial_pose["rotation"], z=0),
horizon=initial_pose["horizon"],
)
)
event2 = controller.step(
dict(action="MoveArm", position=dict(x=0.0, y=0, z=0.35), **ADDITIONAL_ARM_ARGS)
)
event3 = controller.step(dict(action="MoveArmBase", y=0.8, **ADDITIONAL_ARM_ARGS))
return event1, event2, event3
================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_viz.py
================================================
"""Utility functions and classes for visualization and logging."""
import os
from datetime import datetime
import cv2
import imageio
import matplotlib
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
from allenact_plugins.manipulathor_plugin.manipulathor_utils import initialize_arm
from allenact_plugins.manipulathor_plugin.manipulathor_utils import (
reset_environment_and_additional_commands,
transport_wrapper,
)
class LoggerVisualizer:
def __init__(self, exp_name="", log_dir=""):
if log_dir == "":
log_dir = self.__class__.__name__
if exp_name == "":
exp_name = "NoNameExp"
self.exp_name = exp_name
log_dir = os.path.join(
exp_name,
log_dir,
)
self.log_dir = log_dir
os.makedirs(self.log_dir, exist_ok=True)
self.log_queue = []
self.action_queue = []
self.logger_index = 0
def log(self, environment, action_str):
raise Exception("Not Implemented")
def is_empty(self):
return len(self.log_queue) == 0
def finish_episode_metrics(self, episode_info, task_info, metric_results):
pass
def finish_episode(self, environment, episode_info, task_info):
pass
class TestMetricLogger(LoggerVisualizer):
def __init__(self, exp_name="", log_dir="", **kwargs):
super().__init__(exp_name=exp_name, log_dir=log_dir)
self.total_metric_dict = {}
log_file_name = os.path.join(self.log_dir, "test_metric.txt")
self.metric_log_file = open(log_file_name, "w")
self.disturbance_distance_queue = []
def average_dict(self):
result = {}
for k, v in self.total_metric_dict.items():
result[k] = sum(v) / len(v)
return result
def finish_episode_metrics(self, episode_info, task_info, metric_results=None):
if metric_results is None:
print("had to reset")
self.action_queue = []
self.disturbance_distance_queue = []
return
for k in metric_results.keys():
if "metric" in k or k in ["ep_length", "reward", "success"]:
self.total_metric_dict.setdefault(k, [])
self.total_metric_dict[k].append(metric_results[k])
print(
"total",
len(self.total_metric_dict["success"]),
"average test metric",
self.average_dict(),
)
# save the task info and all the action queue and results
log_dict = {
"logger_number": self.logger_index,
"action_sequence": self.action_queue,
"disturbance_sequence": self.disturbance_distance_queue,
"task_info_metrics": metric_results,
}
self.logger_index += 1
self.metric_log_file.write(str(log_dict))
self.metric_log_file.write("\n")
self.metric_log_file.flush()
print("Logging to", self.metric_log_file.name)
self.action_queue = []
self.disturbance_distance_queue = []
def log(self, environment, action_str="", disturbance_str=""):
# We can add agent arm and state location if needed
self.action_queue.append(action_str)
self.disturbance_distance_queue.append(disturbance_str)
class BringObjImageVisualizer(LoggerVisualizer):
def finish_episode(self, environment, episode_info, task_info):
now = datetime.now()
time_to_write = now.strftime("%Y_%m_%d_%H_%M_%S_%f")
time_to_write += "log_ind_{}".format(self.logger_index)
self.logger_index += 1
print("Loggigng", time_to_write, "len", len(self.log_queue))
source_object_id = task_info["source_object_id"]
goal_object_id = task_info["goal_object_id"]
pickup_success = episode_info.object_picked_up
episode_success = episode_info._success
# Put back if you want the images
# for i, img in enumerate(self.log_queue):
# image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i)))
# cv2.imwrite(image_dir, img[:,:,[2,1,0]])
episode_success_offset = "succ" if episode_success else "fail"
pickup_success_offset = "succ" if pickup_success else "fail"
gif_name = (
time_to_write
+ "_from_"
+ source_object_id.split("|")[0]
+ "_to_"
+ goal_object_id.split("|")[0]
+ "_pickup_"
+ pickup_success_offset
+ "_episode_"
+ episode_success_offset
+ ".gif"
)
concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1)
save_image_list_to_gif(concat_all_images, gif_name, self.log_dir)
this_controller = environment.controller
scene = this_controller.last_event.metadata["sceneName"]
reset_environment_and_additional_commands(this_controller, scene)
self.log_start_goal(
environment,
task_info["visualization_source"],
tag="start",
img_adr=os.path.join(self.log_dir, time_to_write),
)
self.log_start_goal(
environment,
task_info["visualization_target"],
tag="goal",
img_adr=os.path.join(self.log_dir, time_to_write),
)
self.log_queue = []
self.action_queue = []
def log(self, environment, action_str):
image_tensor = environment.current_frame
self.action_queue.append(action_str)
self.log_queue.append(image_tensor)
def log_start_goal(self, env, task_info, tag, img_adr):
object_location = task_info["object_location"]
object_id = task_info["object_id"]
agent_state = task_info["agent_pose"]
this_controller = env.controller
# We should not reset here
# for start arm from high up as a cheating, this block is very important. never remove
event1, event2, event3 = initialize_arm(this_controller)
if not (
event1.metadata["lastActionSuccess"]
and event2.metadata["lastActionSuccess"]
and event3.metadata["lastActionSuccess"]
):
print("ERROR: ARM MOVEMENT FAILED in logging! SHOULD NEVER HAPPEN")
event = transport_wrapper(this_controller, object_id, object_location)
if not event.metadata["lastActionSuccess"]:
print("ERROR: oh no could not transport in logging")
event = this_controller.step(
dict(
action="TeleportFull",
standing=True,
x=agent_state["position"]["x"],
y=agent_state["position"]["y"],
z=agent_state["position"]["z"],
rotation=dict(
x=agent_state["rotation"]["x"],
y=agent_state["rotation"]["y"],
z=agent_state["rotation"]["z"],
),
horizon=agent_state["cameraHorizon"],
)
)
if not event.metadata["lastActionSuccess"]:
print("ERROR: oh no could not teleport in logging")
image_tensor = this_controller.last_event.frame
image_dir = (
img_adr + "_obj_" + object_id.split("|")[0] + "_pickup_" + tag + ".png"
)
cv2.imwrite(image_dir, image_tensor[:, :, [2, 1, 0]])
# Saving the mask
target_object_id = task_info["object_id"]
all_visible_masks = this_controller.last_event.instance_masks
if target_object_id in all_visible_masks:
mask_frame = all_visible_masks[target_object_id]
else:
mask_frame = np.zeros(env.controller.last_event.frame[:, :, 0].shape)
mask_dir = (
img_adr + "_obj_" + object_id.split("|")[0] + "_pickup_" + tag + "_mask.png"
)
cv2.imwrite(mask_dir, mask_frame.astype(float) * 255.0)
class ImageVisualizer(LoggerVisualizer):
def __init__(
self,
exp_name="",
log_dir="",
add_top_down_view: bool = False,
add_depth_map: bool = False,
):
super().__init__(exp_name=exp_name, log_dir=log_dir)
self.add_top_down_view = add_top_down_view
self.add_depth_map = add_depth_map
if self.add_top_down_view:
self.top_down_queue = []
self.disturbance_distance_queue = []
def finish_episode(self, environment, episode_info, task_info):
time_to_write = "log_ind_{:03d}".format(self.logger_index)
self.logger_index += 1
print("Logging", time_to_write, "len", len(self.log_queue))
object_id = task_info["objectId"]
scene_name = task_info["source_location"]["scene_name"]
source_countertop = task_info["source_location"]["countertop_id"]
target_countertop = task_info["target_location"]["countertop_id"]
pickup_success = episode_info.object_picked_up
episode_success = episode_info._success
# Put back if you want the images
# for i, img in enumerate(self.log_queue):
# image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i)))
# cv2.imwrite(image_dir, img[:,:,[2,1,0]])
episode_success_offset = "succ" if episode_success else "fail"
pickup_success_offset = "succ" if pickup_success else "fail"
gif_name = (
time_to_write
+ "_pickup_"
+ pickup_success_offset
+ "_episode_"
+ episode_success_offset
+ "_"
+ scene_name.split("_")[0]
+ "_obj_"
+ object_id.split("|")[0]
+ "_from_"
+ source_countertop.split("|")[0]
+ "_to_"
+ target_countertop.split("|")[0]
+ ".gif"
)
self.log_queue = put_annotation_on_image(
self.log_queue, self.disturbance_distance_queue
)
concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1)
if self.add_top_down_view:
topdown_all_images = np.expand_dims(
np.stack(self.top_down_queue, axis=0), axis=1
) # (T, 1, H, W, 3)
concat_all_images = np.concatenate(
[concat_all_images, topdown_all_images], axis=1
) # (T, 2, H, W, 3)
save_image_list_to_gif(concat_all_images, gif_name, self.log_dir)
self.log_start_goal(
environment,
task_info["visualization_source"],
tag="start",
img_adr=os.path.join(self.log_dir, time_to_write),
)
self.log_start_goal(
environment,
task_info["visualization_target"],
tag="goal",
img_adr=os.path.join(self.log_dir, time_to_write),
)
self.log_queue = []
self.action_queue = []
self.disturbance_distance_queue = []
if self.add_top_down_view:
self.top_down_queue = []
def log(self, environment, action_str="", disturbance_str=""):
self.action_queue.append(action_str)
self.disturbance_distance_queue.append(disturbance_str)
image_tensor = environment.current_frame
self.log_queue.append(image_tensor)
if self.add_top_down_view:
# Reference: https://github.com/allenai/ai2thor/pull/814
event = environment.controller.step(action="GetMapViewCameraProperties")
event = environment.controller.step(
action="AddThirdPartyCamera", **event.metadata["actionReturn"]
)
self.top_down_queue.append(event.third_party_camera_frames[0])
def log_start_goal(self, env, task_info, tag, img_adr):
object_location = task_info["object_location"]
object_id = task_info["object_id"]
agent_state = task_info["agent_pose"]
this_controller = env.controller
scene = this_controller.last_event.metadata[
"sceneName"
] # maybe we need to reset env actually]
reset_environment_and_additional_commands(this_controller, scene)
# for start arm from high up as a cheating, this block is very important. never remove
event1, event2, event3 = initialize_arm(this_controller)
if not (
event1.metadata["lastActionSuccess"]
and event2.metadata["lastActionSuccess"]
and event3.metadata["lastActionSuccess"]
):
print("ERROR: ARM MOVEMENT FAILED in logging! SHOULD NEVER HAPPEN")
event = transport_wrapper(this_controller, object_id, object_location)
if not event.metadata["lastActionSuccess"]:
print("ERROR: oh no could not transport in logging")
event = this_controller.step(
dict(
action="TeleportFull",
standing=True,
x=agent_state["position"]["x"],
y=agent_state["position"]["y"],
z=agent_state["position"]["z"],
rotation=dict(
x=agent_state["rotation"]["x"],
y=agent_state["rotation"]["y"],
z=agent_state["rotation"]["z"],
),
horizon=agent_state["cameraHorizon"],
)
)
if not event.metadata["lastActionSuccess"]:
print("ERROR: oh no could not teleport in logging")
image_tensor = this_controller.last_event.frame
image_dir = img_adr + "_" + tag + ".png"
cv2.imwrite(image_dir, image_tensor[:, :, [2, 1, 0]])
if self.add_depth_map:
depth = this_controller.last_event.depth_frame.copy() # (H, W)
depth[depth > 5.0] = 5.0
norm = matplotlib.colors.Normalize(vmin=depth.min(), vmax=depth.max())
rgb = cm.get_cmap(plt.get_cmap("viridis"))(norm(depth))[:, :, :3] # [0,1]
rgb = (rgb * 255).astype(np.uint8)
depth_dir = img_adr + "_" + tag + "_depth.png"
cv2.imwrite(depth_dir, rgb[:, :, [2, 1, 0]])
def save_image_list_to_gif(image_list, gif_name, gif_dir):
gif_adr = os.path.join(gif_dir, gif_name)
seq_len, cols, w, h, c = image_list.shape
pallet = np.zeros(
(seq_len, w, h * cols, c)
) # to support multiple animations in one gif
for col_ind in range(cols):
pallet[:, :, col_ind * h : (col_ind + 1) * h, :] = image_list[:, col_ind]
if not os.path.exists(gif_dir):
os.makedirs(gif_dir)
imageio.mimsave(gif_adr, pallet.astype(np.uint8), format="GIF", duration=1 / 5)
print("Saved result in ", gif_adr)
def put_annotation_on_image(images, annotations):
all_images = []
for img, annot in zip(images, annotations):
position = (10, 10)
from PIL import Image, ImageDraw
pil_img = Image.fromarray(img)
draw = ImageDraw.Draw(pil_img)
draw.text(position, annot, (0, 0, 0))
all_images.append(np.array(pil_img))
return all_images
================================================
FILE: allenact_plugins/minigrid_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker
with ImportChecker(
"\n\nPlease install babyai with:\n\n"
"pip install -e git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\n",
):
import babyai
================================================
FILE: allenact_plugins/minigrid_plugin/configs/__init__.py
================================================
================================================
FILE: allenact_plugins/minigrid_plugin/configs/minigrid_nomemory.py
================================================
"""Experiment Config for MiniGrid tutorial."""
import gym
import torch.nn as nn
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConv
from allenact_plugins.minigrid_plugin.minigrid_tasks import MiniGridTask
from projects.tutorials.minigrid_tutorial import MiniGridTutorialExperimentConfig
class MiniGridNoMemoryExperimentConfig(MiniGridTutorialExperimentConfig):
@classmethod
def tag(cls) -> str:
return "MiniGridNoMemory"
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return MiniGridSimpleConv(
action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())),
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
num_objects=cls.SENSORS[0].num_objects,
num_colors=cls.SENSORS[0].num_colors,
num_states=cls.SENSORS[0].num_states,
)
================================================
FILE: allenact_plugins/minigrid_plugin/data/__init__.py
================================================
================================================
FILE: allenact_plugins/minigrid_plugin/extra_environment.yml
================================================
dependencies:
- patsy>=0.5.1
- pip
- pip:
- gym-minigrid>=1.0.1
- pickle5
================================================
FILE: allenact_plugins/minigrid_plugin/extra_requirements.txt
================================================
patsy>=0.5.1
gym-minigrid>=1.0.1
pickle5
================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_environments.py
================================================
import copy
from typing import Optional, Set
import numpy as np
from gym import register
from gym_minigrid.envs import CrossingEnv
from gym_minigrid.minigrid import Lava, Wall
class FastCrossing(CrossingEnv):
"""Similar to `CrossingEnv`, but to support faster task sampling as per
`repeat_failed_task_for_min_steps` flag in MiniGridTaskSampler."""
def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None):
self.init_agent_pos: Optional[np.ndarray] = None
self.init_agent_dir: Optional[int] = None
self.step_count: Optional[int] = None
super(FastCrossing, self).__init__(
size=size,
num_crossings=num_crossings,
obstacle_type=obstacle_type,
seed=seed,
)
def same_seed_reset(self):
assert self.init_agent_pos is not None
# Current position and direction of the agent
self.agent_pos = self.init_agent_pos
self.agent_dir = self.init_agent_dir
# Check that the agent doesn't overlap with an object
start_cell = self.grid.get(*self.agent_pos)
assert start_cell is None or start_cell.can_overlap()
assert self.carrying is None
# Step count since episode start
self.step_count = 0
# Return first observation
obs = self.gen_obs()
return obs
def reset(self, partial_reset: bool = False):
super(FastCrossing, self).reset()
self.init_agent_pos = copy.deepcopy(self.agent_pos)
self.init_agent_dir = self.agent_dir
class AskForHelpSimpleCrossing(CrossingEnv):
"""Corresponds to WC FAULTY SWITCH environment."""
def __init__(
self,
size=9,
num_crossings=1,
obstacle_type=Wall,
seed=None,
exploration_reward: Optional[float] = None,
death_penalty: Optional[float] = None,
toggle_is_permenant: bool = False,
):
self.init_agent_pos: Optional[np.ndarray] = None
self.init_agent_dir: Optional[int] = None
self.should_reveal_image: bool = False
self.exploration_reward = exploration_reward
self.death_penalty = death_penalty
self.explored_points: Set = set()
self._was_successful = False
self.toggle_is_permanent = toggle_is_permenant
self.step_count: Optional[int] = None
super(AskForHelpSimpleCrossing, self).__init__(
size=size,
num_crossings=num_crossings,
obstacle_type=obstacle_type,
seed=seed,
)
@property
def was_successful(self) -> bool:
return self._was_successful
def gen_obs(self):
obs = super(AskForHelpSimpleCrossing, self).gen_obs()
if not self.should_reveal_image:
obs["image"] *= 0
return obs
def metrics(self):
return {
"explored_count": len(self.explored_points),
"final_distance": float(
min(
abs(x - (self.width - 2)) + abs(y - (self.height - 2))
for x, y in self.explored_points
)
),
}
def step(self, action: int):
"""Reveal the observation only if the `toggle` action is executed."""
if action == self.actions.toggle:
self.should_reveal_image = True
else:
self.should_reveal_image = (
self.should_reveal_image and self.toggle_is_permanent
)
minigrid_obs, reward, done, info = super(AskForHelpSimpleCrossing, self).step(
action=action
)
assert not self._was_successful, "Called step after done."
self._was_successful = self._was_successful or (reward > 0)
if (
done
and self.steps_remaining != 0
and (not self._was_successful)
and self.death_penalty is not None
):
reward += self.death_penalty
t = tuple(self.agent_pos)
if self.exploration_reward is not None:
if t not in self.explored_points:
reward += self.exploration_reward
self.explored_points.add(t)
return minigrid_obs, reward, done, info
def same_seed_reset(self):
assert self.init_agent_pos is not None
self._was_successful = False
# Current position and direction of the agent
self.agent_pos = self.init_agent_pos
self.agent_dir = self.init_agent_dir
self.explored_points.clear()
self.explored_points.add(tuple(self.agent_pos))
self.should_reveal_image = False
# Check that the agent doesn't overlap with an object
start_cell = self.grid.get(*self.agent_pos)
assert start_cell is None or start_cell.can_overlap()
assert self.carrying is None
# Step count since episode start
self.step_count = 0
# Return first observation
obs = self.gen_obs()
return obs
def reset(self, partial_reset: bool = False):
super(AskForHelpSimpleCrossing, self).reset()
self.explored_points.clear()
self.explored_points.add(tuple(self.agent_pos))
self.init_agent_pos = copy.deepcopy(self.agent_pos)
self.init_agent_dir = self.agent_dir
self._was_successful = False
self.should_reveal_image = False
class LavaCrossingS25N10(CrossingEnv):
def __init__(self):
super(LavaCrossingS25N10, self).__init__(size=25, num_crossings=10)
class LavaCrossingS15N7(CrossingEnv):
def __init__(self):
super(LavaCrossingS15N7, self).__init__(size=15, num_crossings=7)
class LavaCrossingS11N7(CrossingEnv):
def __init__(self):
super(LavaCrossingS11N7, self).__init__(size=9, num_crossings=4)
register(
id="MiniGrid-LavaCrossingS25N10-v0",
entry_point="allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS25N10",
)
register(
id="MiniGrid-LavaCrossingS15N7-v0",
entry_point="allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS15N7",
)
register(
id="MiniGrid-LavaCrossingS11N7-v0",
entry_point="allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS11N7",
)
================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_models.py
================================================
import abc
from typing import Callable, Dict, Optional, Tuple, cast
import gym
import numpy as np
import torch
from gym.spaces.dict import Dict as SpaceDict
import torch.nn as nn
from allenact.algorithms.onpolicy_sync.policy import (
ActorCriticModel,
Memory,
DistributionType,
ActorCriticOutput,
ObservationType,
)
from allenact.base_abstractions.distributions import Distr, CategoricalDistr
from allenact.embodiedai.models.basic_models import LinearActorCritic, RNNActorCritic
from allenact.utils.misc_utils import prepare_locals_for_super
class MiniGridSimpleConvBase(ActorCriticModel[Distr], abc.ABC):
actor_critic: ActorCriticModel
def __init__(
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
num_objects: int,
num_colors: int,
num_states: int,
object_embedding_dim: int = 8,
**kwargs,
):
super().__init__(action_space=action_space, observation_space=observation_space)
self.num_objects = num_objects
self.object_embedding_dim = object_embedding_dim
vis_input_shape = observation_space["minigrid_ego_image"].shape
agent_view_x, agent_view_y, view_channels = vis_input_shape
assert agent_view_x == agent_view_y
self.agent_view = agent_view_x
self.view_channels = view_channels
assert (np.array(vis_input_shape[:2]) >= 3).all(), (
"MiniGridSimpleConvRNN requires" "that the input size be at least 3x3."
)
self.num_channels = 0
if self.num_objects > 0:
# Object embedding
self.object_embedding = nn.Embedding(
num_embeddings=num_objects, embedding_dim=self.object_embedding_dim
)
self.object_channel = self.num_channels
self.num_channels += 1
self.num_colors = num_colors
if self.num_colors > 0:
# Same dimensionality used for colors and states
self.color_embedding = nn.Embedding(
num_embeddings=num_colors, embedding_dim=self.object_embedding_dim
)
self.color_channel = self.num_channels
self.num_channels += 1
self.num_states = num_states
if self.num_states > 0:
self.state_embedding = nn.Embedding(
num_embeddings=num_states, embedding_dim=self.object_embedding_dim
)
self.state_channel = self.num_channels
self.num_channels += 1
assert self.num_channels == self.view_channels > 0
self.ac_key = "enc"
self.observations_for_ac: Dict[str, Optional[torch.Tensor]] = {
self.ac_key: None
}
self.num_agents = 1
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
minigrid_ego_image = cast(torch.Tensor, observations["minigrid_ego_image"])
use_agent = minigrid_ego_image.shape == 6
nrow, ncol, nchannels = minigrid_ego_image.shape[-3:]
nsteps, nsamplers, nagents = masks.shape[:3]
assert nrow == ncol == self.agent_view
assert nchannels == self.view_channels == self.num_channels
embed_list = []
if self.num_objects > 0:
ego_object_embeds = self.object_embedding(
minigrid_ego_image[..., self.object_channel].long()
)
embed_list.append(ego_object_embeds)
if self.num_colors > 0:
ego_color_embeds = self.color_embedding(
minigrid_ego_image[..., self.color_channel].long()
)
embed_list.append(ego_color_embeds)
if self.num_states > 0:
ego_state_embeds = self.state_embedding(
minigrid_ego_image[..., self.state_channel].long()
)
embed_list.append(ego_state_embeds)
ego_embeds = torch.cat(embed_list, dim=-1)
if use_agent:
self.observations_for_ac[self.ac_key] = ego_embeds.view(
nsteps, nsamplers, nagents, -1
)
else:
self.observations_for_ac[self.ac_key] = ego_embeds.view(
nsteps, nsamplers * nagents, -1
)
# noinspection PyCallingNonCallable
out, mem_return = self.actor_critic(
observations=self.observations_for_ac,
memory=memory,
prev_actions=prev_actions,
masks=masks,
)
self.observations_for_ac[self.ac_key] = None
return out, mem_return
class MiniGridSimpleConvRNN(MiniGridSimpleConvBase):
def __init__(
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
num_objects: int,
num_colors: int,
num_states: int,
object_embedding_dim: int = 8,
hidden_size=512,
num_layers=1,
rnn_type="GRU",
head_type: Callable[
..., ActorCriticModel[CategoricalDistr]
] = LinearActorCritic,
**kwargs,
):
super().__init__(**prepare_locals_for_super(locals()))
self._hidden_size = hidden_size
agent_view_x, agent_view_y, view_channels = observation_space[
"minigrid_ego_image"
].shape
self.actor_critic = RNNActorCritic(
input_uuid=self.ac_key,
action_space=action_space,
observation_space=SpaceDict(
{
self.ac_key: gym.spaces.Box(
low=np.float32(-1.0),
high=np.float32(1.0),
shape=(
self.object_embedding_dim
* agent_view_x
* agent_view_y
* view_channels,
),
)
}
),
hidden_size=hidden_size,
num_layers=num_layers,
rnn_type=rnn_type,
head_type=head_type,
)
self.memory_key = "rnn"
self.train()
@property
def num_recurrent_layers(self):
return self.actor_critic.num_recurrent_layers
@property
def recurrent_hidden_state_size(self):
return self._hidden_size
def _recurrent_memory_specification(self):
return {
self.memory_key: (
(
("layer", self.num_recurrent_layers),
("sampler", None),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
}
class MiniGridSimpleConv(MiniGridSimpleConvBase):
def __init__(
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
num_objects: int,
num_colors: int,
num_states: int,
object_embedding_dim: int = 8,
**kwargs,
):
super().__init__(**prepare_locals_for_super(locals()))
agent_view_x, agent_view_y, view_channels = observation_space[
"minigrid_ego_image"
].shape
self.actor_critic = LinearActorCritic(
self.ac_key,
action_space=action_space,
observation_space=SpaceDict(
{
self.ac_key: gym.spaces.Box(
low=np.float32(-1.0),
high=np.float32(1.0),
shape=(
self.object_embedding_dim
* agent_view_x
* agent_view_y
* view_channels,
),
)
}
),
)
self.memory_key = None
self.train()
@property
def num_recurrent_layers(self):
return 0
@property
def recurrent_hidden_state_size(self):
return 0
# noinspection PyMethodMayBeStatic
def _recurrent_memory_specification(self):
return None
================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_offpolicy.py
================================================
import math
import queue
import random
from collections import defaultdict
from typing import Dict, Tuple, Any, cast, List, Union, Optional
import babyai
import blosc
import numpy as np
import pickle5 as pickle
import torch
from gym_minigrid.minigrid import MiniGridEnv
from allenact.algorithms.offpolicy_sync.losses.abstract_offpolicy_loss import Memory
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.algorithms.onpolicy_sync.storage import (
ExperienceStorage,
StreamingStorageMixin,
)
from allenact.base_abstractions.misc import GenericAbstractLoss, LossOutput, ModelType
from allenact.utils.misc_utils import partition_limits
from allenact.utils.system import get_logger
from allenact_plugins.minigrid_plugin.minigrid_sensors import MiniGridMissionSensor
_DATASET_CACHE: Dict[str, Any] = {}
class MiniGridOffPolicyExpertCELoss(GenericAbstractLoss):
def __init__(self, total_episodes_in_epoch: Optional[int] = None):
super().__init__()
self.total_episodes_in_epoch = total_episodes_in_epoch
def loss( # type: ignore
self,
*, # No positional arguments
model: ModelType,
batch: ObservationType,
batch_memory: Memory,
stream_memory: Memory,
) -> LossOutput:
rollout_len, nrollouts = cast(torch.Tensor, batch["minigrid_ego_image"]).shape[
:2
]
# Initialize Memory if empty
if len(stream_memory) == 0:
spec = model.recurrent_memory_specification
for key in spec:
dims_template, dtype = spec[key]
# get sampler_dim and all_dims from dims_template (and nrollouts)
dim_names = [d[0] for d in dims_template]
sampler_dim = dim_names.index("sampler")
all_dims = [d[1] for d in dims_template]
all_dims[sampler_dim] = nrollouts
stream_memory.check_append(
key=key,
tensor=torch.zeros(
*all_dims,
dtype=dtype,
device=cast(torch.Tensor, batch["minigrid_ego_image"]).device,
),
sampler_dim=sampler_dim,
)
# Forward data (through the actor and critic)
ac_out, stream_memory = model.forward(
observations=batch,
memory=stream_memory,
prev_actions=None, # type:ignore
masks=cast(torch.FloatTensor, batch["masks"]),
)
# Compute the loss from the actor's output and expert action
expert_ce_loss = -ac_out.distributions.log_prob(batch["expert_action"]).mean()
info = {"expert_ce": expert_ce_loss.item()}
return LossOutput(
value=expert_ce_loss,
info=info,
per_epoch_info={},
batch_memory=batch_memory,
stream_memory=stream_memory,
bsize=rollout_len * nrollouts,
)
def transform_demos(demos):
# A modified version of babyai.utils.demos.transform_demos
# where we use pickle 5 instead of standard pickle
new_demos = []
for demo in demos:
new_demo = []
mission = demo[0]
all_images = demo[1]
directions = demo[2]
actions = demo[3]
# First decompress the pickle
pickled_array = blosc.blosc_extension.decompress(all_images, False)
# ... and unpickle
all_images = pickle.loads(pickled_array)
n_observations = all_images.shape[0]
assert (
len(directions) == len(actions) == n_observations
), "error transforming demos"
for i in range(n_observations):
obs = {
"image": all_images[i],
"direction": directions[i],
"mission": mission,
}
action = actions[i]
done = i == n_observations - 1
new_demo.append((obs, action, done))
new_demos.append(new_demo)
return new_demos
class MiniGridExpertTrajectoryStorage(ExperienceStorage, StreamingStorageMixin):
def __init__(
self,
data_path: str,
num_samplers: int,
rollout_len: int,
instr_len: Optional[int],
restrict_max_steps_in_dataset: Optional[int] = None,
device: torch.device = torch.device("cpu"),
):
super(MiniGridExpertTrajectoryStorage, self).__init__()
self.data_path = data_path
self._data: Optional[
List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]
] = None
self.restrict_max_steps_in_dataset = restrict_max_steps_in_dataset
self.original_num_samplers = num_samplers
self.num_samplers = num_samplers
self.rollout_len = rollout_len
self.instr_len = instr_len
self.current_worker = 0
self.num_workers = 1
self.minigrid_mission_sensor: Optional[MiniGridMissionSensor] = None
if instr_len is not None:
self.minigrid_mission_sensor = MiniGridMissionSensor(instr_len)
self.rollout_queues = []
self._remaining_inds = []
self.sampler_to_num_steps_in_queue = []
self._total_experiences = 0
self.device = device
@property
def data(self) -> List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]:
if self._data is None:
if self.data_path not in _DATASET_CACHE:
get_logger().info(
f"Loading minigrid dataset from {self.data_path} for first time..."
)
_DATASET_CACHE[self.data_path] = babyai.utils.load_demos(self.data_path)
assert (
_DATASET_CACHE[self.data_path] is not None
and len(_DATASET_CACHE[self.data_path]) != 0
)
get_logger().info(
"Loading minigrid dataset complete, it contains {} trajectories".format(
len(_DATASET_CACHE[self.data_path])
)
)
self._data = _DATASET_CACHE[self.data_path]
if self.restrict_max_steps_in_dataset is not None:
restricted_data = []
cur_len = 0
for i, d in enumerate(self._data):
if cur_len >= self.restrict_max_steps_in_dataset:
break
restricted_data.append(d)
cur_len += len(d[2])
self._data = restricted_data
parts = partition_limits(len(self._data), self.num_workers)
self._data = self._data[
parts[self.current_worker] : parts[self.current_worker + 1]
]
self.rollout_queues = [queue.Queue() for _ in range(self.num_samplers)]
self.sampler_to_num_steps_in_queue = [0 for _ in range(self.num_samplers)]
for it, q in enumerate(self.rollout_queues):
self._fill_rollout_queue(q, it)
return self._data
def set_partition(self, index: int, num_parts: int):
self.current_worker = index
self.num_workers = num_parts
self.num_samplers = int(math.ceil(self.original_num_samplers / num_parts))
self._data = None
for q in self.rollout_queues:
try:
while True:
q.get_nowait()
except queue.Empty:
pass
self.rollout_queues = []
def initialize(self, *, observations: ObservationType, **kwargs):
self.reset_stream()
assert len(self.data) != 0
def add(
self,
observations: ObservationType,
memory: Optional[Memory],
actions: torch.Tensor,
action_log_probs: torch.Tensor,
value_preds: torch.Tensor,
rewards: torch.Tensor,
masks: torch.Tensor,
):
pass
def to(self, device: torch.device):
self.device = device
@property
def total_experiences(self) -> int:
return self._total_experiences
def reset_stream(self):
self.set_partition(index=self.current_worker, num_parts=self.num_workers)
def empty(self) -> bool:
return False
def _get_next_ind(self):
if len(self._remaining_inds) == 0:
self._remaining_inds = list(range(len(self.data)))
random.shuffle(self._remaining_inds)
return self._remaining_inds.pop()
def _fill_rollout_queue(self, q: queue.Queue, sampler: int):
assert q.empty()
while self.sampler_to_num_steps_in_queue[sampler] < self.rollout_len:
next_ind = self._get_next_ind()
for i, step in enumerate(transform_demos([self.data[next_ind]])[0]):
q.put((*step, i == 0))
self.sampler_to_num_steps_in_queue[sampler] += 1
return True
def get_data_for_rollout_ind(self, sampler_ind: int) -> Dict[str, np.ndarray]:
masks: List[bool] = []
minigrid_ego_image = []
minigrid_mission = []
expert_actions = []
q = self.rollout_queues[sampler_ind]
while len(masks) != self.rollout_len:
if q.empty():
assert self.sampler_to_num_steps_in_queue[sampler_ind] == 0
self._fill_rollout_queue(q, sampler_ind)
obs, expert_action, _, is_first_obs = cast(
Tuple[
Dict[str, Union[np.array, int, str]],
MiniGridEnv.Actions,
bool,
bool,
],
q.get_nowait(),
)
self.sampler_to_num_steps_in_queue[sampler_ind] -= 1
masks.append(not is_first_obs)
minigrid_ego_image.append(obs["image"])
if self.minigrid_mission_sensor is not None:
# noinspection PyTypeChecker
minigrid_mission.append(
self.minigrid_mission_sensor.get_observation(
env=None, task=None, minigrid_output_obs=obs
)
)
expert_actions.append([expert_action])
to_return = {
"masks": torch.tensor(masks, device=self.device, dtype=torch.float32).view(
self.rollout_len, 1 # steps x mask
),
"minigrid_ego_image": torch.stack(
[torch.tensor(img, device=self.device) for img in minigrid_ego_image],
dim=0,
), # steps x height x width x channels
"expert_action": torch.tensor(
expert_actions, device=self.device, dtype=torch.int64
).view(
self.rollout_len # steps
),
}
if self.minigrid_mission_sensor is not None:
to_return["minigrid_mission"] = torch.stack(
[torch.tensor(m, device=self.device) for m in minigrid_mission], dim=0
) # steps x mission_dims
return to_return
def next_batch(self) -> Dict[str, torch.Tensor]:
all_data = defaultdict(lambda: [])
for rollout_ind in range(self.num_samplers):
data_for_ind = self.get_data_for_rollout_ind(sampler_ind=rollout_ind)
for key in data_for_ind:
all_data[key].append(data_for_ind[key])
self._total_experiences += self.num_samplers * self.rollout_len
return {
key: torch.stack(
all_data[key],
dim=1,
) # new sampler dim
for key in all_data
}
================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_sensors.py
================================================
from typing import Optional, Any, cast
import gym
import gym_minigrid.minigrid
import numpy as np
import torch
from babyai.utils.format import InstructionsPreprocessor
from gym_minigrid.minigrid import MiniGridEnv
from allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super
from allenact.base_abstractions.task import Task, SubTaskType
# fmt: off
ALL_VOCAB_TOKENS = [
"a", "after", "and", "ball", "behind", "blue", "box",
"door", "front", "go", "green", "grey", "in", "key",
"left", "next", "of", "on", "open", "pick", "purple",
"put", "red", "right", "the", "then", "to", "up", "yellow",
"you", "your",
]
# fmt: on
class EgocentricMiniGridSensor(Sensor[MiniGridEnv, Task[MiniGridEnv]]):
def __init__(
self,
agent_view_size: int,
view_channels: int = 1,
uuid: str = "minigrid_ego_image",
**kwargs: Any
):
self.agent_view_size = agent_view_size
self.view_channels = view_channels
self.num_objects = (
cast(
int, max(map(abs, gym_minigrid.minigrid.OBJECT_TO_IDX.values())) # type: ignore
)
+ 1
)
self.num_colors = (
cast(int, max(map(abs, gym_minigrid.minigrid.COLOR_TO_IDX.values()))) # type: ignore
+ 1
)
self.num_states = (
cast(int, max(map(abs, gym_minigrid.minigrid.STATE_TO_IDX.values()))) # type: ignore
+ 1
)
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self) -> gym.Space:
return gym.spaces.Box(
low=0,
high=max(self.num_objects, self.num_colors, self.num_states) - 1,
shape=(self.agent_view_size, self.agent_view_size, self.view_channels),
dtype=int,
)
def get_observation(
self,
env: MiniGridEnv,
task: Optional[SubTaskType],
*args,
minigrid_output_obs: Optional[np.ndarray] = None,
**kwargs: Any
) -> Any:
if minigrid_output_obs is not None and minigrid_output_obs["image"].shape == (
self.agent_view_size,
self.agent_view_size,
):
img = minigrid_output_obs["image"][:, :, : self.view_channels]
else:
env.agent_view_size = self.agent_view_size
img = env.gen_obs()["image"][:, :, : self.view_channels]
assert img.dtype == np.uint8
return img
class MiniGridMissionSensor(Sensor[MiniGridEnv, Task[MiniGridEnv]]):
def __init__(self, instr_len: int, uuid: str = "minigrid_mission", **kwargs: Any):
self.instr_preprocessor = InstructionsPreprocessor(
model_name="TMP_SENSOR", load_vocab_from=None
)
# We initialize the vocabulary with a fixed collection of tokens
# and then ensure that the size cannot exceed this number. This
# guarantees that sensors on all processes will produce the same
# values.
for token in ALL_VOCAB_TOKENS:
_ = self.instr_preprocessor.vocab[token]
self.instr_preprocessor.vocab.max_size = len(ALL_VOCAB_TOKENS)
self.instr_len = instr_len
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self) -> gym.Space:
return gym.spaces.Box(
low=0,
high=self.instr_preprocessor.vocab.max_size,
shape=(self.instr_len,),
dtype=int,
)
def get_observation(
self,
env: MiniGridEnv,
task: Optional[SubTaskType],
*args,
minigrid_output_obs: Optional[np.ndarray] = None,
**kwargs: Any
) -> Any:
if minigrid_output_obs is None:
minigrid_output_obs = env.gen_obs()
out = self.instr_preprocessor([minigrid_output_obs]).view(-1)
n: int = out.shape[0]
if n > self.instr_len:
out = out[: self.instr_len]
elif n < self.instr_len:
out = torch.nn.functional.pad(
input=out,
pad=[0, self.instr_len - n],
value=0,
)
return out.long().numpy()
================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_tasks.py
================================================
import random
from typing import Tuple, Any, List, Dict, Optional, Union, Callable, Sequence, cast
import gym
import networkx as nx
import numpy as np
from gym.utils import seeding
from gym_minigrid.envs import CrossingEnv
from gym_minigrid.minigrid import (
DIR_TO_VEC,
IDX_TO_OBJECT,
MiniGridEnv,
OBJECT_TO_IDX,
)
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.base_abstractions.task import Task, TaskSampler
from allenact.utils.system import get_logger
from allenact_plugins.minigrid_plugin.minigrid_environments import (
AskForHelpSimpleCrossing,
)
class MiniGridTask(Task[CrossingEnv]):
_ACTION_NAMES: Tuple[str, ...] = ("left", "right", "forward")
_ACTION_IND_TO_MINIGRID_IND = tuple(
MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES
)
_CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}
_NEIGHBOR_OFFSETS = tuple(
[
(-1, 0, 0),
(0, -1, 0),
(0, 0, -1),
(1, 0, 0),
(0, 1, 0),
(0, 0, 1),
]
)
_XY_DIFF_TO_AGENT_DIR = {
tuple(vec): dir_ind for dir_ind, vec in enumerate(DIR_TO_VEC)
}
""" Task around a MiniGrid Env, allows interfacing allenact with
MiniGrid tasks. (currently focussed towards LavaCrossing)
"""
def __init__(
self,
env: Union[CrossingEnv],
sensors: Union[SensorSuite, List[Sensor]],
task_info: Dict[str, Any],
max_steps: int,
task_cache_uid: Optional[str] = None,
corrupt_expert_within_actions_of_goal: Optional[int] = None,
**kwargs,
):
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self._graph: Optional[nx.DiGraph] = None
self._minigrid_done = False
self._task_cache_uid = task_cache_uid
self.corrupt_expert_within_actions_of_goal = (
corrupt_expert_within_actions_of_goal
)
self.closest_agent_has_been_to_goal: Optional[float] = None
@property
def action_space(self) -> gym.spaces.Discrete:
return gym.spaces.Discrete(len(self._ACTION_NAMES))
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
return self.env.render(mode=mode)
def _step(self, action: int) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
minigrid_obs, reward, self._minigrid_done, info = self.env.step(
action=self._ACTION_IND_TO_MINIGRID_IND[action]
)
# self.env.render()
return RLStepResult(
observation=self.get_observations(minigrid_output_obs=minigrid_obs),
reward=reward,
done=self.is_done(),
info=info,
)
def get_observations(
self, *args, minigrid_output_obs: Optional[Dict[str, Any]] = None, **kwargs
) -> Any:
return self.sensor_suite.get_observations(
env=self.env, task=self, minigrid_output_obs=minigrid_output_obs
)
def reached_terminal_state(self) -> bool:
return self._minigrid_done
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return cls._ACTION_NAMES
def close(self) -> None:
pass
def metrics(self) -> Dict[str, Any]:
# noinspection PyUnresolvedReferences,PyCallingNonCallable
env_metrics = self.env.metrics() if hasattr(self.env, "metrics") else {}
return {
**super(MiniGridTask, self).metrics(),
**{k: float(v) for k, v in env_metrics.items()},
"success": int(
self.env.was_successful
if hasattr(self.env, "was_successful")
else self.cumulative_reward > 0
),
}
@property
def graph_created(self):
return self._graph is not None
@property
def graph(self):
if self._graph is None:
if self._task_cache_uid is not None:
if self._task_cache_uid not in self._CACHED_GRAPHS:
self._CACHED_GRAPHS[self._task_cache_uid] = self.generate_graph()
self._graph = self._CACHED_GRAPHS[self._task_cache_uid]
else:
self._graph = self.generate_graph()
return self._graph
@graph.setter
def graph(self, graph: nx.DiGraph):
self._graph = graph
@classmethod
def possible_neighbor_offsets(cls) -> Tuple[Tuple[int, int, int], ...]:
# Tuples of format:
# (X translation, Y translation, rotation by 90 degrees)
# A constant is returned, this function can be changed if anything
# more complex needs to be done.
# offsets_superset = itertools.product(
# [-1, 0, 1], [-1, 0, 1], [-1, 0, 1]
# )
#
# valid_offsets = []
# for off in offsets_superset:
# if (int(off[0] != 0) + int(off[1] != 0) + int(off[2] != 0)) == 1:
# valid_offsets.append(off)
#
# return tuple(valid_offsets)
return cls._NEIGHBOR_OFFSETS
@classmethod
def _add_from_to_edge(
cls,
g: nx.DiGraph,
s: Tuple[int, int, int],
t: Tuple[int, int, int],
):
"""Adds nodes and corresponding edges to existing nodes.
This approach avoids adding the same edge multiple times.
Pre-requisite knowledge about MiniGrid:
DIR_TO_VEC = [
# Pointing right (positive X)
np.array((1, 0)),
# Down (positive Y)
np.array((0, 1)),
# Pointing left (negative X)
np.array((-1, 0)),
# Up (negative Y)
np.array((0, -1)),
]
or
AGENT_DIR_TO_STR = {
0: '>',
1: 'V',
2: '<',
3: '^'
}
This also implies turning right (clockwise) means:
agent_dir += 1
"""
s_x, s_y, s_rot = s
t_x, t_y, t_rot = t
x_diff = t_x - s_x
y_diff = t_y - s_y
angle_diff = (t_rot - s_rot) % 4
# If source and target differ by more than one action, continue
if (x_diff != 0) + (y_diff != 0) + (angle_diff != 0) != 1 or angle_diff == 2:
return
action = None
if angle_diff == 1:
action = "right"
elif angle_diff == 3:
action = "left"
elif cls._XY_DIFF_TO_AGENT_DIR[(x_diff, y_diff)] == s_rot:
# if translation is the same direction as source
# orientation, then it's a valid forward action
action = "forward"
else:
# This is when the source and target aren't one action
# apart, despite having dx=1 or dy=1
pass
if action is not None:
g.add_edge(s, t, action=action)
def _add_node_to_graph(
self,
graph: nx.DiGraph,
s: Tuple[int, int, int],
valid_node_types: Tuple[str, ...],
attr_dict: Dict[Any, Any] = None,
include_rotation_free_leaves: bool = False,
):
if s in graph:
return
if attr_dict is None:
get_logger().warning("adding a node with neighbor checks and no attributes")
graph.add_node(s, **attr_dict)
if include_rotation_free_leaves:
rot_free_leaf = (*s[:-1], None)
if rot_free_leaf not in graph:
graph.add_node(rot_free_leaf)
graph.add_edge(s, rot_free_leaf, action="NA")
if attr_dict["type"] in valid_node_types:
for o in self.possible_neighbor_offsets():
t = (s[0] + o[0], s[1] + o[1], (s[2] + o[2]) % 4)
if t in graph and graph.nodes[t]["type"] in valid_node_types:
self._add_from_to_edge(graph, s, t)
self._add_from_to_edge(graph, t, s)
def generate_graph(
self,
) -> nx.DiGraph:
"""The generated graph is based on the fully observable grid (as the
expert sees it all).
env: environment to generate the graph over
"""
image = self.env.grid.encode()
width, height, _ = image.shape
graph = nx.DiGraph()
# In fully observable grid, there shouldn't be any "unseen"
# Currently dealing with "empty", "wall", "goal", "lava"
valid_object_ids = np.sort(
[OBJECT_TO_IDX[o] for o in ["empty", "wall", "lava", "goal"]]
)
assert np.all(np.union1d(image[:, :, 0], valid_object_ids) == valid_object_ids)
# Grid to nodes
for x in range(width):
for y in range(height):
for rotation in range(4):
type, color, state = image[x, y]
self._add_node_to_graph(
graph,
(x, y, rotation),
attr_dict={
"type": IDX_TO_OBJECT[type],
"color": color,
"state": state,
},
valid_node_types=("empty", "goal"),
)
if IDX_TO_OBJECT[type] == "goal":
if not graph.has_node("unified_goal"):
graph.add_node("unified_goal")
graph.add_edge((x, y, rotation), "unified_goal")
return graph
def query_expert(self, **kwargs) -> Tuple[int, bool]:
if self._minigrid_done:
get_logger().warning("Episode is completed, but expert is still queried.")
return -1, False
paths = []
agent_x, agent_y = self.env.agent_pos
agent_rot = self.env.agent_dir
source_state_key = (agent_x, agent_y, agent_rot)
assert source_state_key in self.graph
paths.append(nx.shortest_path(self.graph, source_state_key, "unified_goal"))
if len(paths) == 0:
return -1, False
shortest_path_ind = int(np.argmin([len(p) for p in paths]))
if self.closest_agent_has_been_to_goal is None:
self.closest_agent_has_been_to_goal = len(paths[shortest_path_ind]) - 1
else:
self.closest_agent_has_been_to_goal = min(
len(paths[shortest_path_ind]) - 1, self.closest_agent_has_been_to_goal
)
if (
self.corrupt_expert_within_actions_of_goal is not None
and self.corrupt_expert_within_actions_of_goal
>= self.closest_agent_has_been_to_goal
):
return (
int(self.env.np_random.randint(0, len(self.class_action_names()))),
True,
)
if len(paths[shortest_path_ind]) == 2:
# Since "unified_goal" is 1 step away from actual goals
# if a path like [actual_goal, unified_goal] exists, then
# you are already at a goal.
get_logger().warning(
"Shortest path computations suggest we are at"
" the target but episode does not think so."
)
return -1, False
next_key_on_shortest_path = paths[shortest_path_ind][1]
return (
self.class_action_names().index(
self.graph.get_edge_data(source_state_key, next_key_on_shortest_path)[
"action"
]
),
True,
)
class AskForHelpSimpleCrossingTask(MiniGridTask):
_ACTION_NAMES = ("left", "right", "forward", "toggle")
_ACTION_IND_TO_MINIGRID_IND = tuple(
MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES
)
_CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}
def __init__(
self,
env: AskForHelpSimpleCrossing,
sensors: Union[SensorSuite, List[Sensor]],
task_info: Dict[str, Any],
max_steps: int,
**kwargs,
):
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self.did_toggle: List[bool] = []
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
self.did_toggle.append(self._ACTION_NAMES[action] == "toggle")
return super(AskForHelpSimpleCrossingTask, self)._step(action=action)
def metrics(self) -> Dict[str, Any]:
return {
**super(AskForHelpSimpleCrossingTask, self).metrics(),
"toggle_percent": float(
sum(self.did_toggle) / max(len(self.did_toggle), 1)
),
}
class MiniGridTaskSampler(TaskSampler):
def __init__(
self,
env_class: Callable[..., Union[MiniGridEnv]],
sensors: Union[SensorSuite, List[Sensor]],
env_info: Optional[Dict[str, Any]] = None,
max_tasks: Optional[int] = None,
num_unique_seeds: Optional[int] = None,
task_seeds_list: Optional[List[int]] = None,
deterministic_sampling: bool = False,
cache_graphs: Optional[bool] = False,
task_class: Callable[..., MiniGridTask] = MiniGridTask,
repeat_failed_task_for_min_steps: int = 0,
extra_task_kwargs: Optional[Dict] = None,
**kwargs,
):
super(MiniGridTaskSampler, self).__init__()
self.sensors = (
SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors
)
self.max_tasks = max_tasks
self.num_unique_seeds = num_unique_seeds
self.cache_graphs = cache_graphs
self.deterministic_sampling = deterministic_sampling
self.repeat_failed_task_for_min_steps = repeat_failed_task_for_min_steps
self.extra_task_kwargs = (
extra_task_kwargs if extra_task_kwargs is not None else {}
)
self._last_env_seed: Optional[int] = None
self._last_task: Optional[MiniGridTask] = None
self._number_of_steps_taken_with_task_seed = 0
assert (not deterministic_sampling) or repeat_failed_task_for_min_steps <= 0, (
"If `deterministic_sampling` is True then we require"
" `repeat_failed_task_for_min_steps <= 0`"
)
assert (not self.cache_graphs) or self.num_unique_seeds is not None, (
"When caching graphs you must specify"
" a number of unique tasks to sample from."
)
assert (self.num_unique_seeds is None) or (
0 < self.num_unique_seeds
), "`num_unique_seeds` must be a positive integer."
self.num_unique_seeds = num_unique_seeds
self.task_seeds_list = task_seeds_list
if self.task_seeds_list is not None:
if self.num_unique_seeds is not None:
assert self.num_unique_seeds == len(
self.task_seeds_list
), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
self.num_unique_seeds = len(self.task_seeds_list)
elif self.num_unique_seeds is not None:
self.task_seeds_list = list(range(self.num_unique_seeds))
if num_unique_seeds is not None and repeat_failed_task_for_min_steps > 0:
raise NotImplementedError(
"`repeat_failed_task_for_min_steps` must be <=0 if number"
" of unique seeds is not None."
)
assert (
not self.cache_graphs
) or self.num_unique_seeds <= 1000, "Too many tasks (graphs) to cache"
assert (not deterministic_sampling) or (
self.num_unique_seeds is not None
), "Cannot use deterministic sampling when `num_unique_seeds` is `None`."
if (not deterministic_sampling) and self.max_tasks:
get_logger().warning(
"`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
" this might be a mistake when running testing."
)
self.env = env_class(**env_info)
self.task_class = task_class
self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1))
self.num_tasks_generated = 0
@property
def length(self) -> Union[int, float]:
return (
float("inf")
if self.max_tasks is None
else self.max_tasks - self.num_tasks_generated
)
@property
def total_unique(self) -> Optional[Union[int, float]]:
return None if self.num_unique_seeds is None else self.num_unique_seeds
@property
def last_sampled_task(self) -> Optional[Task]:
raise NotImplementedError
def next_task(self, force_advance_scene: bool = False) -> Optional[MiniGridTask]:
if self.length <= 0:
return None
task_cache_uid = None
repeating = False
if self.num_unique_seeds is not None:
if self.deterministic_sampling:
self._last_env_seed = self.task_seeds_list[
self.num_tasks_generated % len(self.task_seeds_list)
]
else:
self._last_env_seed = self.np_seeded_random_gen.choice(
self.task_seeds_list
)
else:
if self._last_task is not None:
self._number_of_steps_taken_with_task_seed += (
self._last_task.num_steps_taken()
)
if (
self._last_env_seed is not None
and self._number_of_steps_taken_with_task_seed
< self.repeat_failed_task_for_min_steps
and self._last_task.cumulative_reward == 0
):
repeating = True
else:
self._number_of_steps_taken_with_task_seed = 0
self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)
task_has_same_seed_reset = hasattr(self.env, "same_seed_reset")
if self.cache_graphs:
task_cache_uid = str(self._last_env_seed)
if repeating and task_has_same_seed_reset:
# noinspection PyUnresolvedReferences
self.env.same_seed_reset()
else:
self.env.seed(self._last_env_seed)
self.env.saved_seed = self._last_env_seed
self.env.reset()
self.num_tasks_generated += 1
task = self.task_class(
**dict(
env=self.env,
sensors=self.sensors,
task_info={},
max_steps=self.env.max_steps,
task_cache_uid=task_cache_uid,
),
**self.extra_task_kwargs,
)
if repeating and self._last_task.graph_created:
task.graph = self._last_task.graph
self._last_task = task
return task
def close(self) -> None:
self.env.close()
@property
def all_observation_spaces_equal(self) -> bool:
return True
def reset(self) -> None:
self.num_tasks_generated = 0
self.env.reset()
def set_seed(self, seed: int) -> None:
self.np_seeded_random_gen, _ = seeding.np_random(seed)
================================================
FILE: allenact_plugins/minigrid_plugin/scripts/__init__.py
================================================
================================================
FILE: allenact_plugins/navigation_plugin/__init__.py
================================================
================================================
FILE: allenact_plugins/navigation_plugin/objectnav/__init__.py
================================================
================================================
FILE: allenact_plugins/navigation_plugin/objectnav/models.py
================================================
"""Baseline models for use in the object navigation task.
Object navigation is currently available as a Task in AI2-THOR and
Facebook's Habitat.
"""
from typing import Optional, List, Dict, cast, Tuple, Sequence
import gym
import torch
import torch.nn as nn
from gym.spaces import Dict as SpaceDict
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.embodiedai.models import resnet as resnet
from allenact.embodiedai.models.basic_models import SimpleCNN
from allenact.embodiedai.models.visual_nav_models import (
VisualNavActorCritic,
FusionType,
)
class CatObservations(nn.Module):
def __init__(self, ordered_uuids: Sequence[str], dim: int):
super().__init__()
assert len(ordered_uuids) != 0
self.ordered_uuids = ordered_uuids
self.dim = dim
def forward(self, observations: ObservationType):
if len(self.ordered_uuids) == 1:
return observations[self.ordered_uuids[0]]
return torch.cat(
[observations[uuid] for uuid in self.ordered_uuids], dim=self.dim
)
class ObjectNavActorCritic(VisualNavActorCritic):
"""Baseline recurrent actor critic model for object-navigation.
# Attributes
action_space : The space of actions available to the agent. Currently only discrete
actions are allowed (so this space will always be of type `gym.spaces.Discrete`).
observation_space : The observation space expected by the agent. This observation space
should include (optionally) 'rgb' images and 'depth' images and is required to
have a component corresponding to the goal `goal_sensor_uuid`.
goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor`
as an example of such a sensor.
hidden_size : The hidden size of the GRU RNN.
object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal
object type.
"""
def __init__(
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
goal_sensor_uuid: str,
# RNN
hidden_size=512,
num_rnn_layers=1,
rnn_type="GRU",
add_prev_actions=False,
add_prev_action_null_token=False,
action_embed_size=6,
# Aux loss
multiple_beliefs=False,
beliefs_fusion: Optional[FusionType] = None,
auxiliary_uuids: Optional[Sequence[str]] = None,
# below are custom params
rgb_uuid: Optional[str] = None,
depth_uuid: Optional[str] = None,
object_type_embedding_dim=8,
trainable_masked_hidden_state: bool = False,
# perception backbone params,
backbone="gnresnet18",
resnet_baseplanes=32,
):
"""Initializer.
See class documentation for parameter definitions.
"""
super().__init__(
action_space=action_space,
observation_space=observation_space,
hidden_size=hidden_size,
multiple_beliefs=multiple_beliefs,
beliefs_fusion=beliefs_fusion,
auxiliary_uuids=auxiliary_uuids,
)
self.rgb_uuid = rgb_uuid
self.depth_uuid = depth_uuid
self.goal_sensor_uuid = goal_sensor_uuid
self._n_object_types = self.observation_space.spaces[self.goal_sensor_uuid].n
self.object_type_embedding_size = object_type_embedding_dim
self.backbone = backbone
if backbone == "simple_cnn":
self.visual_encoder = SimpleCNN(
observation_space=observation_space,
output_size=hidden_size,
rgb_uuid=rgb_uuid,
depth_uuid=depth_uuid,
)
self.visual_encoder_output_size = hidden_size
assert self.is_blind == self.visual_encoder.is_blind
elif backbone == "gnresnet18": # resnet family
self.visual_encoder = resnet.GroupNormResNetEncoder(
observation_space=observation_space,
output_size=hidden_size,
rgb_uuid=rgb_uuid,
depth_uuid=depth_uuid,
baseplanes=resnet_baseplanes,
ngroups=resnet_baseplanes // 2,
make_backbone=getattr(resnet, backbone),
)
self.visual_encoder_output_size = hidden_size
assert self.is_blind == self.visual_encoder.is_blind
elif backbone in ["identity", "projection"]:
good_uuids = [
uuid for uuid in [self.rgb_uuid, self.depth_uuid] if uuid is not None
]
cat_model = CatObservations(
ordered_uuids=good_uuids,
dim=-1,
)
after_cat_size = sum(
observation_space[uuid].shape[-1] for uuid in good_uuids
)
if backbone == "identity":
self.visual_encoder = cat_model
self.visual_encoder_output_size = after_cat_size
else:
self.visual_encoder = nn.Sequential(
cat_model, nn.Linear(after_cat_size, hidden_size), nn.ReLU(True)
)
self.visual_encoder_output_size = hidden_size
else:
raise NotImplementedError
self.create_state_encoders(
obs_embed_size=self.goal_visual_encoder_output_dims,
num_rnn_layers=num_rnn_layers,
rnn_type=rnn_type,
add_prev_actions=add_prev_actions,
add_prev_action_null_token=add_prev_action_null_token,
prev_action_embed_size=action_embed_size,
trainable_masked_hidden_state=trainable_masked_hidden_state,
)
self.create_actorcritic_head()
self.create_aux_models(
obs_embed_size=self.goal_visual_encoder_output_dims,
action_embed_size=action_embed_size,
)
self.object_type_embedding = nn.Embedding(
num_embeddings=self._n_object_types,
embedding_dim=object_type_embedding_dim,
)
self.train()
@property
def is_blind(self) -> bool:
"""True if the model is blind (e.g. neither 'depth' or 'rgb' is an
input observation type)."""
return self.rgb_uuid is None and self.depth_uuid is None
@property
def goal_visual_encoder_output_dims(self):
dims = self.object_type_embedding_size
if self.is_blind:
return dims
return dims + self.visual_encoder_output_size
def get_object_type_encoding(
self, observations: Dict[str, torch.Tensor]
) -> torch.Tensor:
"""Get the object type encoding from input batched observations."""
# noinspection PyTypeChecker
return self.object_type_embedding( # type:ignore
observations[self.goal_sensor_uuid].to(torch.int64)
)
def forward_encoder(self, observations: ObservationType) -> torch.Tensor:
target_encoding = self.get_object_type_encoding(
cast(Dict[str, torch.Tensor], observations)
)
obs_embeds = [target_encoding]
if not self.is_blind:
perception_embed = self.visual_encoder(observations)
obs_embeds = [perception_embed] + obs_embeds
obs_embeds = torch.cat(obs_embeds, dim=-1)
return obs_embeds
class ResnetTensorNavActorCritic(VisualNavActorCritic):
def __init__(
# base params
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
goal_sensor_uuid: str,
hidden_size=512,
num_rnn_layers=1,
rnn_type="GRU",
add_prev_actions=False,
add_prev_action_null_token=False,
action_embed_size=6,
multiple_beliefs=False,
beliefs_fusion: Optional[FusionType] = None,
auxiliary_uuids: Optional[List[str]] = None,
# custom params
rgb_resnet_preprocessor_uuid: Optional[str] = None,
depth_resnet_preprocessor_uuid: Optional[str] = None,
goal_dims: int = 32,
resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),
combiner_hidden_out_dims: Tuple[int, int] = (128, 32),
**kwargs,
):
super().__init__(
action_space=action_space,
observation_space=observation_space,
hidden_size=hidden_size,
multiple_beliefs=multiple_beliefs,
beliefs_fusion=beliefs_fusion,
auxiliary_uuids=auxiliary_uuids,
**kwargs,
)
if (
rgb_resnet_preprocessor_uuid is None
or depth_resnet_preprocessor_uuid is None
):
resnet_preprocessor_uuid = (
rgb_resnet_preprocessor_uuid
if rgb_resnet_preprocessor_uuid is not None
else depth_resnet_preprocessor_uuid
)
self.goal_visual_encoder = ResnetTensorGoalEncoder(
self.observation_space,
goal_sensor_uuid,
resnet_preprocessor_uuid,
goal_dims,
resnet_compressor_hidden_out_dims,
combiner_hidden_out_dims,
)
else:
self.goal_visual_encoder = ResnetDualTensorGoalEncoder( # type:ignore
self.observation_space,
goal_sensor_uuid,
rgb_resnet_preprocessor_uuid,
depth_resnet_preprocessor_uuid,
goal_dims,
resnet_compressor_hidden_out_dims,
combiner_hidden_out_dims,
)
self.create_state_encoders(
obs_embed_size=self.goal_visual_encoder.output_dims,
num_rnn_layers=num_rnn_layers,
rnn_type=rnn_type,
add_prev_actions=add_prev_actions,
add_prev_action_null_token=add_prev_action_null_token,
prev_action_embed_size=action_embed_size,
)
self.create_actorcritic_head()
self.create_aux_models(
obs_embed_size=self.goal_visual_encoder.output_dims,
action_embed_size=action_embed_size,
)
self.train()
@property
def is_blind(self) -> bool:
"""True if the model is blind (e.g. neither 'depth' or 'rgb' is an
input observation type)."""
return self.goal_visual_encoder.is_blind
def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:
return self.goal_visual_encoder(observations)
class ResnetTensorGoalEncoder(nn.Module):
def __init__(
self,
observation_spaces: SpaceDict,
goal_sensor_uuid: str,
resnet_preprocessor_uuid: str,
goal_embed_dims: int = 32,
resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),
combiner_hidden_out_dims: Tuple[int, int] = (128, 32),
) -> None:
super().__init__()
self.goal_uuid = goal_sensor_uuid
self.resnet_uuid = resnet_preprocessor_uuid
self.goal_embed_dims = goal_embed_dims
self.resnet_hid_out_dims = resnet_compressor_hidden_out_dims
self.combine_hid_out_dims = combiner_hidden_out_dims
self.goal_space = observation_spaces.spaces[self.goal_uuid]
if isinstance(self.goal_space, gym.spaces.Discrete):
self.embed_goal = nn.Embedding(
num_embeddings=self.goal_space.n,
embedding_dim=self.goal_embed_dims,
)
elif isinstance(self.goal_space, gym.spaces.Box):
self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims)
else:
raise NotImplementedError
self.blind = self.resnet_uuid not in observation_spaces.spaces
if not self.blind:
self.resnet_tensor_shape = observation_spaces.spaces[self.resnet_uuid].shape
self.resnet_compressor = nn.Sequential(
nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1),
nn.ReLU(),
nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1),
nn.ReLU(),
)
self.target_obs_combiner = nn.Sequential(
nn.Conv2d(
self.resnet_hid_out_dims[1] + self.goal_embed_dims,
self.combine_hid_out_dims[0],
1,
),
nn.ReLU(),
nn.Conv2d(*self.combine_hid_out_dims[0:2], 1),
)
@property
def is_blind(self):
return self.blind
@property
def output_dims(self):
if self.blind:
return self.goal_embed_dims
else:
return (
self.combine_hid_out_dims[-1]
* self.resnet_tensor_shape[1]
* self.resnet_tensor_shape[2]
)
def get_object_type_encoding(
self, observations: Dict[str, torch.FloatTensor]
) -> torch.FloatTensor:
"""Get the object type encoding from input batched observations."""
return cast(
torch.FloatTensor,
self.embed_goal(observations[self.goal_uuid].to(torch.int64)),
)
def compress_resnet(self, observations):
return self.resnet_compressor(observations[self.resnet_uuid])
def distribute_target(self, observations):
target_emb = self.embed_goal(observations[self.goal_uuid])
return target_emb.view(-1, self.goal_embed_dims, 1, 1).expand(
-1, -1, self.resnet_tensor_shape[-2], self.resnet_tensor_shape[-1]
)
def adapt_input(self, observations):
observations = {**observations}
resnet = observations[self.resnet_uuid]
goal = observations[self.goal_uuid]
use_agent = False
nagent = 1
if len(resnet.shape) == 6:
use_agent = True
nstep, nsampler, nagent = resnet.shape[:3]
else:
nstep, nsampler = resnet.shape[:2]
observations[self.resnet_uuid] = resnet.view(-1, *resnet.shape[-3:])
observations[self.goal_uuid] = goal.view(-1, goal.shape[-1])
return observations, use_agent, nstep, nsampler, nagent
@staticmethod
def adapt_output(x, use_agent, nstep, nsampler, nagent):
if use_agent:
return x.view(nstep, nsampler, nagent, -1)
return x.view(nstep, nsampler * nagent, -1)
def forward(self, observations):
observations, use_agent, nstep, nsampler, nagent = self.adapt_input(
observations
)
if self.blind:
return self.embed_goal(observations[self.goal_uuid])
embs = [
self.compress_resnet(observations),
self.distribute_target(observations),
]
x = self.target_obs_combiner(
torch.cat(
embs,
dim=1,
)
)
x = x.reshape(x.size(0), -1) # flatten
return self.adapt_output(x, use_agent, nstep, nsampler, nagent)
class ResnetDualTensorGoalEncoder(nn.Module):
def __init__(
self,
observation_spaces: SpaceDict,
goal_sensor_uuid: str,
rgb_resnet_preprocessor_uuid: str,
depth_resnet_preprocessor_uuid: str,
goal_embed_dims: int = 32,
resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),
combiner_hidden_out_dims: Tuple[int, int] = (128, 32),
) -> None:
super().__init__()
self.goal_uuid = goal_sensor_uuid
self.rgb_resnet_uuid = rgb_resnet_preprocessor_uuid
self.depth_resnet_uuid = depth_resnet_preprocessor_uuid
self.goal_embed_dims = goal_embed_dims
self.resnet_hid_out_dims = resnet_compressor_hidden_out_dims
self.combine_hid_out_dims = combiner_hidden_out_dims
self.goal_space = observation_spaces.spaces[self.goal_uuid]
if isinstance(self.goal_space, gym.spaces.Discrete):
self.embed_goal = nn.Embedding(
num_embeddings=self.goal_space.n,
embedding_dim=self.goal_embed_dims,
)
elif isinstance(self.goal_space, gym.spaces.Box):
self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims)
else:
raise NotImplementedError
self.blind = (
self.rgb_resnet_uuid not in observation_spaces.spaces
or self.depth_resnet_uuid not in observation_spaces.spaces
)
if not self.blind:
self.resnet_tensor_shape = observation_spaces.spaces[
self.rgb_resnet_uuid
].shape
self.rgb_resnet_compressor = nn.Sequential(
nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1),
nn.ReLU(),
nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1),
nn.ReLU(),
)
self.depth_resnet_compressor = nn.Sequential(
nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1),
nn.ReLU(),
nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1),
nn.ReLU(),
)
self.rgb_target_obs_combiner = nn.Sequential(
nn.Conv2d(
self.resnet_hid_out_dims[1] + self.goal_embed_dims,
self.combine_hid_out_dims[0],
1,
),
nn.ReLU(),
nn.Conv2d(*self.combine_hid_out_dims[0:2], 1),
)
self.depth_target_obs_combiner = nn.Sequential(
nn.Conv2d(
self.resnet_hid_out_dims[1] + self.goal_embed_dims,
self.combine_hid_out_dims[0],
1,
),
nn.ReLU(),
nn.Conv2d(*self.combine_hid_out_dims[0:2], 1),
)
@property
def is_blind(self):
return self.blind
@property
def output_dims(self):
if self.blind:
return self.goal_embed_dims
else:
return (
2
* self.combine_hid_out_dims[-1]
* self.resnet_tensor_shape[1]
* self.resnet_tensor_shape[2]
)
def get_object_type_encoding(
self, observations: Dict[str, torch.FloatTensor]
) -> torch.FloatTensor:
"""Get the object type encoding from input batched observations."""
return cast(
torch.FloatTensor,
self.embed_goal(observations[self.goal_uuid].to(torch.int64)),
)
def compress_rgb_resnet(self, observations):
return self.rgb_resnet_compressor(observations[self.rgb_resnet_uuid])
def compress_depth_resnet(self, observations):
return self.depth_resnet_compressor(observations[self.depth_resnet_uuid])
def distribute_target(self, observations):
target_emb = self.embed_goal(observations[self.goal_uuid])
return target_emb.view(-1, self.goal_embed_dims, 1, 1).expand(
-1, -1, self.resnet_tensor_shape[-2], self.resnet_tensor_shape[-1]
)
def adapt_input(self, observations):
rgb = observations[self.rgb_resnet_uuid]
depth = observations[self.depth_resnet_uuid]
use_agent = False
nagent = 1
if len(rgb.shape) == 6:
use_agent = True
nstep, nsampler, nagent = rgb.shape[:3]
else:
nstep, nsampler = rgb.shape[:2]
observations[self.rgb_resnet_uuid] = rgb.view(-1, *rgb.shape[-3:])
observations[self.depth_resnet_uuid] = depth.view(-1, *depth.shape[-3:])
observations[self.goal_uuid] = observations[self.goal_uuid].view(-1, 1)
return observations, use_agent, nstep, nsampler, nagent
@staticmethod
def adapt_output(x, use_agent, nstep, nsampler, nagent):
if use_agent:
return x.view(nstep, nsampler, nagent, -1)
return x.view(nstep, nsampler * nagent, -1)
def forward(self, observations):
observations, use_agent, nstep, nsampler, nagent = self.adapt_input(
observations
)
if self.blind:
return self.embed_goal(observations[self.goal_uuid])
rgb_embs = [
self.compress_rgb_resnet(observations),
self.distribute_target(observations),
]
rgb_x = self.rgb_target_obs_combiner(
torch.cat(
rgb_embs,
dim=1,
)
)
depth_embs = [
self.compress_depth_resnet(observations),
self.distribute_target(observations),
]
depth_x = self.depth_target_obs_combiner(
torch.cat(
depth_embs,
dim=1,
)
)
x = torch.cat([rgb_x, depth_x], dim=1)
x = x.reshape(x.shape[0], -1) # flatten
return self.adapt_output(x, use_agent, nstep, nsampler, nagent)
================================================
FILE: allenact_plugins/navigation_plugin/pointnav/__init__.py
================================================
================================================
FILE: allenact_plugins/navigation_plugin/pointnav/models.py
================================================
"""Baseline models for use in the point navigation task.
Object navigation is currently available as a Task in AI2-THOR and
Facebook's Habitat.
"""
from typing import Optional, List, Union, Sequence
import gym
import torch
import torch.nn as nn
from gym.spaces import Dict as SpaceDict
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.embodiedai.models import resnet as resnet
from allenact.embodiedai.models.basic_models import SimpleCNN
from allenact.embodiedai.models.visual_nav_models import (
VisualNavActorCritic,
FusionType,
)
class PointNavActorCritic(VisualNavActorCritic):
"""Use raw image as observation to the agent."""
def __init__(
# base params
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
goal_sensor_uuid: str,
hidden_size=512,
num_rnn_layers=1,
rnn_type="GRU",
add_prev_actions=False,
add_prev_action_null_token=False,
action_embed_size=4,
multiple_beliefs=False,
beliefs_fusion: Optional[FusionType] = None,
auxiliary_uuids: Optional[Sequence[str]] = None,
# custom params
rgb_uuid: Optional[str] = None,
depth_uuid: Optional[str] = None,
embed_coordinates=False,
coordinate_embedding_dim=8,
coordinate_dims=2,
# perception backbone params,
backbone="gnresnet18",
resnet_baseplanes=32,
):
super().__init__(
action_space=action_space,
observation_space=observation_space,
hidden_size=hidden_size,
multiple_beliefs=multiple_beliefs,
beliefs_fusion=beliefs_fusion,
auxiliary_uuids=auxiliary_uuids,
)
self.goal_sensor_uuid = goal_sensor_uuid
self.embed_coordinates = embed_coordinates
if self.embed_coordinates:
self.coordinate_embedding_size = coordinate_embedding_dim
else:
self.coordinate_embedding_size = coordinate_dims
self.sensor_fusion = False
if rgb_uuid is not None and depth_uuid is not None:
self.sensor_fuser = nn.Linear(hidden_size * 2, hidden_size)
self.sensor_fusion = True
self.backbone = backbone
if backbone == "simple_cnn":
self.visual_encoder = SimpleCNN(
observation_space=observation_space,
output_size=hidden_size,
rgb_uuid=rgb_uuid,
depth_uuid=depth_uuid,
)
else: # resnet family
self.visual_encoder = resnet.GroupNormResNetEncoder(
observation_space=observation_space,
output_size=hidden_size,
rgb_uuid=rgb_uuid,
depth_uuid=depth_uuid,
baseplanes=resnet_baseplanes,
ngroups=resnet_baseplanes // 2,
make_backbone=getattr(resnet, backbone),
)
if self.embed_coordinates:
self.coordinate_embedding = nn.Linear(
coordinate_dims, coordinate_embedding_dim
)
self.create_state_encoders(
obs_embed_size=self.goal_visual_encoder_output_dims,
num_rnn_layers=num_rnn_layers,
rnn_type=rnn_type,
add_prev_actions=add_prev_actions,
add_prev_action_null_token=add_prev_action_null_token,
prev_action_embed_size=action_embed_size,
)
self.create_actorcritic_head()
self.create_aux_models(
obs_embed_size=self.goal_visual_encoder_output_dims,
action_embed_size=action_embed_size,
)
self.train()
@property
def is_blind(self):
return self.visual_encoder.is_blind
@property
def goal_visual_encoder_output_dims(self):
dims = self.coordinate_embedding_size
if self.is_blind:
return dims
return dims + self.recurrent_hidden_state_size
def get_target_coordinates_encoding(self, observations):
if self.embed_coordinates:
return self.coordinate_embedding(
observations[self.goal_sensor_uuid].to(torch.float32)
)
else:
return observations[self.goal_sensor_uuid].to(torch.float32)
def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:
target_encoding = self.get_target_coordinates_encoding(observations)
obs_embeds: Union[torch.Tensor, List[torch.Tensor]]
obs_embeds = [target_encoding]
if not self.is_blind:
perception_embed = self.visual_encoder(observations)
if self.sensor_fusion:
perception_embed = self.sensor_fuser(perception_embed)
obs_embeds = [perception_embed] + obs_embeds
obs_embeds = torch.cat(obs_embeds, dim=-1)
return obs_embeds
================================================
FILE: allenact_plugins/robothor_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker
with ImportChecker(
"Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`)."
):
# noinspection PyUnresolvedReferences
import ai2thor
================================================
FILE: allenact_plugins/robothor_plugin/configs/__init__.py
================================================
================================================
FILE: allenact_plugins/robothor_plugin/extra_environment.yml
================================================
channels:
- defaults
- conda-forge
dependencies:
- ai2thor>=2.5.3
- numba
- pip
- colour
- packaging
- pip:
- numpy-quaternion
- pyquaternion>=0.9.9
- python-xlib
================================================
FILE: allenact_plugins/robothor_plugin/extra_requirements.txt
================================================
ai2thor>=2.5.3
numpy-quaternion
pyquaternion>=0.9.9
colour
numba
packaging
python-xlib
================================================
FILE: allenact_plugins/robothor_plugin/robothor_constants.py
================================================
MOVE_AHEAD = "MoveAhead"
ROTATE_LEFT = "RotateLeft"
ROTATE_RIGHT = "RotateRight"
LOOK_DOWN = "LookDown"
LOOK_UP = "LookUp"
END = "End"
PASS = "Pass"
================================================
FILE: allenact_plugins/robothor_plugin/robothor_distributions.py
================================================
from typing import Tuple
import torch
from allenact.base_abstractions.distributions import CategoricalDistr, Distr
class TupleCategoricalDistr(Distr):
def __init__(self, probs=None, logits=None, validate_args=None):
self.dists = CategoricalDistr(
probs=probs, logits=logits, validate_args=validate_args
)
def log_prob(self, actions: Tuple[torch.LongTensor, ...]) -> torch.FloatTensor:
# flattened output [steps, samplers, num_agents]
return self.dists.log_prob(torch.stack(actions, dim=-1))
def entropy(self) -> torch.FloatTensor:
# flattened output [steps, samplers, num_agents]
return self.dists.entropy()
def sample(self, sample_shape=torch.Size()) -> Tuple[torch.LongTensor, ...]:
# split and remove trailing singleton dim
res = self.dists.sample(sample_shape).split(1, dim=-1)
return tuple([r.view(r.shape[:2]) for r in res])
def mode(self) -> Tuple[torch.LongTensor, ...]:
# split and remove trailing singleton dim
res = self.dists.mode().split(1, dim=-1)
return tuple([r.view(r.shape[:2]) for r in res])
================================================
FILE: allenact_plugins/robothor_plugin/robothor_environment.py
================================================
import copy
import math
import random
import warnings
from typing import Any, Optional, Dict, List, Union, Tuple, Collection
import ai2thor.server
import numpy as np
from ai2thor.controller import Controller
from ai2thor.fifo_server import FifoServer
from ai2thor.util import metrics
from allenact.utils.cache_utils import DynamicDistanceCache
from allenact.utils.experiment_utils import recursive_update
from allenact.utils.system import get_logger
class RoboThorEnvironment:
"""Wrapper for the robo2thor controller providing additional functionality
and bookkeeping.
See [here](https://ai2thor.allenai.org/robothor/documentation) for comprehensive
documentation on RoboTHOR.
# Attributes
controller : The AI2-THOR controller.
config : The AI2-THOR controller configuration
"""
def __init__(self, all_metadata_available: bool = True, **kwargs):
self.config = dict(
rotateStepDegrees=30.0,
visibilityDistance=1.0,
gridSize=0.25,
continuousMode=True,
snapToGrid=False,
agentMode="locobot",
width=640,
height=480,
agentCount=1,
server_class=FifoServer,
)
if "agentCount" in kwargs:
assert kwargs["agentCount"] > 0
kwargs["agentMode"] = kwargs.get("agentMode", "locobot")
if kwargs["agentMode"] not in ["bot", "locobot"]:
warnings.warn(
f"The RoboTHOR environment has not been tested using"
f" an agent of mode '{kwargs['agentMode']}'."
)
recursive_update(self.config, kwargs)
self.controller = Controller(
**self.config,
)
self.all_metadata_available = all_metadata_available
self.scene_to_reachable_positions: Optional[Dict[str, Any]] = None
self.distance_cache: Optional[DynamicDistanceCache] = None
if self.all_metadata_available:
self.scene_to_reachable_positions = {
self.scene_name: copy.deepcopy(self.currently_reachable_points)
}
assert len(self.scene_to_reachable_positions[self.scene_name]) > 10
self.distance_cache = DynamicDistanceCache(rounding=1)
self.agent_count = self.config["agentCount"]
self._extra_teleport_kwargs: Dict[str, Any] = (
{}
) # Used for backwards compatability with the teleport action
def initialize_grid_dimensions(
self, reachable_points: Collection[Dict[str, float]]
) -> Tuple[int, int, int, int]:
"""Computes bounding box for reachable points quantized with the
current gridSize."""
points = {
(
round(p["x"] / self.config["gridSize"]),
round(p["z"] / self.config["gridSize"]),
): p
for p in reachable_points
}
assert len(reachable_points) == len(points)
xmin, xmax = min([p[0] for p in points]), max([p[0] for p in points])
zmin, zmax = min([p[1] for p in points]), max([p[1] for p in points])
return xmin, xmax, zmin, zmax
def set_object_filter(self, object_ids: List[str]):
self.controller.step("SetObjectFilter", objectIds=object_ids, renderImage=False)
def reset_object_filter(self):
self.controller.step("ResetObjectFilter", renderImage=False)
def path_from_point_to_object_type(
self, point: Dict[str, float], object_type: str, allowed_error: float
) -> Optional[List[Dict[str, float]]]:
event = self.controller.step(
action="GetShortestPath",
objectType=object_type,
position=point,
allowedError=allowed_error,
)
if event.metadata["lastActionSuccess"]:
return event.metadata["actionReturn"]["corners"]
else:
get_logger().debug(
"Failed to find path for {} in {}. Start point {}, agent state {}.".format(
object_type,
self.controller.last_event.metadata["sceneName"],
point,
self.agent_state(),
)
)
return None
def distance_from_point_to_object_type(
self, point: Dict[str, float], object_type: str, allowed_error: float
) -> float:
"""Minimal geodesic distance from a point to an object of the given
type.
It might return -1.0 for unreachable targets.
"""
path = self.path_from_point_to_object_type(point, object_type, allowed_error)
if path:
# Because `allowed_error != 0` means that the path returned above might not start
# at `point`, we explicitly add any offset there is.
s_dist = math.sqrt(
(point["x"] - path[0]["x"]) ** 2 + (point["z"] - path[0]["z"]) ** 2
)
return metrics.path_distance(path) + s_dist
return -1.0
def distance_to_object_type(self, object_type: str, agent_id: int = 0) -> float:
"""Minimal geodesic distance to object of given type from agent's
current location.
It might return -1.0 for unreachable targets.
"""
assert 0 <= agent_id < self.agent_count
assert (
self.all_metadata_available
), "`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`."
def retry_dist(position: Dict[str, float], object_type: str):
allowed_error = 0.05
debug_log = ""
d = -1.0
while allowed_error < 2.5:
d = self.distance_from_point_to_object_type(
position, object_type, allowed_error
)
if d < 0:
debug_log = (
f"In scene {self.scene_name}, could not find a path from {position} to {object_type} with"
f" {allowed_error} error tolerance. Increasing this tolerance to"
f" {2 * allowed_error} any trying again."
)
allowed_error *= 2
else:
break
if d < 0:
get_logger().debug(
f"In scene {self.scene_name}, could not find a path from {position} to {object_type}"
f" with {allowed_error} error tolerance. Returning a distance of -1."
)
elif debug_log != "":
get_logger().debug(debug_log)
return d
return self.distance_cache.find_distance(
self.scene_name,
self.controller.last_event.events[agent_id].metadata["agent"]["position"],
object_type,
retry_dist,
)
def path_from_point_to_point(
self, position: Dict[str, float], target: Dict[str, float], allowedError: float
) -> Optional[List[Dict[str, float]]]:
try:
return self.controller.step(
action="GetShortestPathToPoint",
position=position,
target=target,
allowedError=allowedError,
).metadata["actionReturn"]["corners"]
except ValueError:
raise
except Exception:
get_logger().debug(
"Failed to find path for {} in {}. Start point {}, agent state {}.".format(
target,
self.controller.last_event.metadata["sceneName"],
position,
self.agent_state(),
)
)
return None
def distance_from_point_to_point(
self, position: Dict[str, float], target: Dict[str, float], allowed_error: float
) -> float:
path = self.path_from_point_to_point(position, target, allowed_error)
if path:
# Because `allowed_error != 0` means that the path returned above might not start
# or end exactly at the position/target points, we explictly add any offset there is.
s_dist = math.sqrt(
(position["x"] - path[0]["x"]) ** 2
+ (position["z"] - path[0]["z"]) ** 2
)
t_dist = math.sqrt(
(target["x"] - path[-1]["x"]) ** 2 + (target["z"] - path[-1]["z"]) ** 2
)
return metrics.path_distance(path) + s_dist + t_dist
return -1.0
def distance_to_point(self, target: Dict[str, float], agent_id: int = 0) -> float:
"""Minimal geodesic distance to end point from agent's current
location.
It might return -1.0 for unreachable targets.
"""
assert 0 <= agent_id < self.agent_count
assert (
self.all_metadata_available
), "`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`."
def retry_dist(position: Dict[str, float], target: Dict[str, float]):
allowed_error = 0.05
debug_log = ""
d = -1.0
while allowed_error < 2.5:
d = self.distance_from_point_to_point(position, target, allowed_error)
if d < 0:
debug_log = (
f"In scene {self.scene_name}, could not find a path from {position} to {target} with"
f" {allowed_error} error tolerance. Increasing this tolerance to"
f" {2 * allowed_error} any trying again."
)
allowed_error *= 2
else:
break
if d < 0:
get_logger().debug(
f"In scene {self.scene_name}, could not find a path from {position} to {target}"
f" with {allowed_error} error tolerance. Returning a distance of -1."
)
elif debug_log != "":
get_logger().debug(debug_log)
return d
return self.distance_cache.find_distance(
self.scene_name,
self.controller.last_event.events[agent_id].metadata["agent"]["position"],
target,
retry_dist,
)
def agent_state(self, agent_id: int = 0) -> Dict:
"""Return agent position, rotation and horizon."""
assert 0 <= agent_id < self.agent_count
agent_meta = self.last_event.events[agent_id].metadata["agent"]
return {
**{k: float(v) for k, v in agent_meta["position"].items()},
"rotation": {k: float(v) for k, v in agent_meta["rotation"].items()},
"horizon": round(float(agent_meta["cameraHorizon"]), 1),
}
def teleport(
self,
pose: Dict[str, float],
rotation: Dict[str, float],
horizon: float = 0.0,
agent_id: int = 0,
):
assert 0 <= agent_id < self.agent_count
try:
e = self.controller.step(
action="TeleportFull",
x=pose["x"],
y=pose["y"],
z=pose["z"],
rotation=rotation,
horizon=horizon,
agentId=agent_id,
**self._extra_teleport_kwargs,
)
except ValueError as e:
if len(self._extra_teleport_kwargs) == 0:
self._extra_teleport_kwargs["standing"] = True
else:
raise e
return self.teleport(
pose=pose, rotation=rotation, horizon=horizon, agent_id=agent_id
)
return e.metadata["lastActionSuccess"]
def reset(
self, scene_name: str = None, filtered_objects: Optional[List[str]] = None
) -> None:
"""Resets scene to a known initial state."""
if scene_name is not None and scene_name != self.scene_name:
self.controller.reset(scene_name)
assert self.last_action_success, "Could not reset to new scene"
if (
self.all_metadata_available
and scene_name not in self.scene_to_reachable_positions
):
self.scene_to_reachable_positions[scene_name] = copy.deepcopy(
self.currently_reachable_points
)
assert len(self.scene_to_reachable_positions[scene_name]) > 10
if filtered_objects:
self.set_object_filter(filtered_objects)
else:
self.reset_object_filter()
def random_reachable_state(
self, seed: Optional[int] = None
) -> Dict[str, Union[Dict[str, float], float]]:
"""Returns a random reachable location in the scene."""
assert (
self.all_metadata_available
), "`random_reachable_state` cannot be called when `self.all_metadata_available` is `False`."
if seed is not None:
random.seed(seed)
# xyz = random.choice(self.currently_reachable_points)
assert len(self.scene_to_reachable_positions[self.scene_name]) > 10
xyz = copy.deepcopy(
random.choice(self.scene_to_reachable_positions[self.scene_name])
)
rotation = random.choice(
np.arange(0.0, 360.0, self.config["rotateStepDegrees"])
)
horizon = 0.0 # random.choice([0.0, 30.0, 330.0])
return {
**{k: float(v) for k, v in xyz.items()},
"rotation": {"x": 0.0, "y": float(rotation), "z": 0.0},
"horizon": float(horizon),
}
def randomize_agent_location(
self,
seed: int = None,
partial_position: Optional[Dict[str, float]] = None,
agent_id: int = 0,
) -> Dict[str, Union[Dict[str, float], float]]:
"""Teleports the agent to a random reachable location in the scene."""
assert 0 <= agent_id < self.agent_count
if partial_position is None:
partial_position = {}
k = 0
state: Optional[Dict] = None
while k == 0 or (not self.last_action_success and k < 10):
# self.reset()
state = {**self.random_reachable_state(seed=seed), **partial_position}
# get_logger().debug("picked target location {}".format(state))
self.controller.step("TeleportFull", **state, agentId=agent_id)
k += 1
if not self.last_action_success:
get_logger().warning(
(
"Randomize agent location in scene {} and current random state {}"
" with seed {} and partial position {} failed in "
"10 attempts. Forcing the action."
).format(self.scene_name, state, seed, partial_position)
)
self.controller.step("TeleportFull", **state, force_action=True, agentId=agent_id) # type: ignore
assert self.last_action_success, "Force action failed with {}".format(state)
# get_logger().debug("location after teleport full {}".format(self.agent_state()))
# self.controller.step("TeleportFull", **self.agent_state()) # TODO only for debug
# get_logger().debug("location after re-teleport full {}".format(self.agent_state()))
return self.agent_state(agent_id=agent_id)
def known_good_locations_list(self):
assert (
self.all_metadata_available
), "`known_good_locations_list` cannot be called when `self.all_metadata_available` is `False`."
return self.scene_to_reachable_positions[self.scene_name]
@property
def currently_reachable_points(self) -> List[Dict[str, float]]:
"""List of {"x": x, "y": y, "z": z} locations in the scene that are
currently reachable."""
self.controller.step(action="GetReachablePositions")
assert (
self.last_action_success
), f"Could not get reachable positions for reason {self.last_event.metadata['errorMessage']}."
return self.last_action_return
@property
def scene_name(self) -> str:
"""Current ai2thor scene."""
return self.controller.last_event.metadata["sceneName"].replace("_physics", "")
@property
def current_frame(self) -> np.ndarray:
"""Returns rgb image corresponding to the agent's egocentric view."""
return self.controller.last_event.frame
@property
def current_depth(self) -> np.ndarray:
"""Returns depth image corresponding to the agent's egocentric view."""
return self.controller.last_event.depth_frame
@property
def current_frames(self) -> List[np.ndarray]:
"""Returns rgb images corresponding to the agents' egocentric views."""
return [
self.controller.last_event.events[agent_id].frame
for agent_id in range(self.agent_count)
]
@property
def current_depths(self) -> List[np.ndarray]:
"""Returns depth images corresponding to the agents' egocentric
views."""
return [
self.controller.last_event.events[agent_id].depth_frame
for agent_id in range(self.agent_count)
]
@property
def last_event(self) -> ai2thor.server.Event:
"""Last event returned by the controller."""
return self.controller.last_event
@property
def last_action(self) -> str:
"""Last action, as a string, taken by the agent."""
return self.controller.last_event.metadata["lastAction"]
@property
def last_action_success(self) -> bool:
"""Was the last action taken by the agent a success?"""
return self.controller.last_event.metadata["lastActionSuccess"]
@property
def last_action_return(self) -> Any:
"""Get the value returned by the last action (if applicable).
For an example of an action that returns a value, see
`"GetReachablePositions"`.
"""
return self.controller.last_event.metadata["actionReturn"]
def step(
self,
action_dict: Optional[Dict[str, Union[str, int, float, Dict]]] = None,
**kwargs: Union[str, int, float, Dict],
) -> ai2thor.server.Event:
"""Take a step in the ai2thor environment."""
if action_dict is None:
action_dict = dict()
action_dict.update(kwargs)
return self.controller.step(**action_dict)
def stop(self):
"""Stops the ai2thor controller."""
try:
self.controller.stop()
except Exception as e:
get_logger().warning(str(e))
def all_objects(self) -> List[Dict[str, Any]]:
"""Return all object metadata."""
return self.controller.last_event.metadata["objects"]
def all_objects_with_properties(
self, properties: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""Find all objects with the given properties."""
objects = []
for o in self.all_objects():
satisfies_all = True
for k, v in properties.items():
if o[k] != v:
satisfies_all = False
break
if satisfies_all:
objects.append(o)
return objects
def visible_objects(self) -> List[Dict[str, Any]]:
"""Return all visible objects."""
return self.all_objects_with_properties({"visible": True})
================================================
FILE: allenact_plugins/robothor_plugin/robothor_models.py
================================================
from typing import Tuple, Optional
import gym
import torch
from gym.spaces import Dict as SpaceDict
from allenact.algorithms.onpolicy_sync.policy import (
ActorCriticModel,
LinearActorCriticHead,
DistributionType,
Memory,
ObservationType,
)
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.embodiedai.models.basic_models import RNNStateEncoder, SimpleCNN
from allenact_plugins.robothor_plugin.robothor_distributions import (
TupleCategoricalDistr,
)
class TupleLinearActorCriticHead(LinearActorCriticHead):
def forward(self, x):
out = self.actor_and_critic(x)
logits = out[..., :-1]
values = out[..., -1:]
# noinspection PyArgumentList
return (
TupleCategoricalDistr(logits=logits), # [steps, samplers, ...]
values.view(*values.shape[:2], -1), # [steps, samplers, flattened]
)
class NavToPartnerActorCriticSimpleConvRNN(ActorCriticModel[TupleCategoricalDistr]):
action_space: gym.spaces.Tuple
def __init__(
self,
action_space: gym.spaces.Tuple,
observation_space: SpaceDict,
rgb_uuid: Optional[str] = "rgb",
hidden_size=512,
num_rnn_layers=1,
rnn_type="GRU",
):
super().__init__(action_space=action_space, observation_space=observation_space)
self._hidden_size = hidden_size
self.rgb_uuid = rgb_uuid
self.visual_encoder = SimpleCNN(
observation_space=observation_space,
output_size=hidden_size,
rgb_uuid=self.rgb_uuid,
depth_uuid=None,
)
self.state_encoder = RNNStateEncoder(
0 if self.is_blind else self.recurrent_hidden_state_size,
self._hidden_size,
num_layers=num_rnn_layers,
rnn_type=rnn_type,
)
self.actor_critic = TupleLinearActorCriticHead(
self._hidden_size, action_space[0].n
)
self.train()
@property
def output_size(self):
return self._hidden_size
@property
def is_blind(self):
return self.visual_encoder.is_blind
@property
def num_recurrent_layers(self):
return self.state_encoder.num_recurrent_layers
@property
def recurrent_hidden_state_size(self):
return self._hidden_size
@property
def num_agents(self):
return len(self.action_space)
def _recurrent_memory_specification(self):
return dict(
rnn=(
(
("layer", self.num_recurrent_layers),
("sampler", None),
("agent", self.num_agents),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
)
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
if not self.is_blind:
perception_embed = self.visual_encoder(observations)
else:
# TODO manage blindness for all agents simultaneously or separate?
raise NotImplementedError()
# TODO alternative where all agents consume all observations
x, rnn_hidden_states = self.state_encoder(
perception_embed, memory.tensor("rnn"), masks
)
dists, vals = self.actor_critic(x)
return (
ActorCriticOutput(
distributions=dists,
values=vals,
extras={},
),
memory.set_tensor("rnn", rnn_hidden_states),
)
================================================
FILE: allenact_plugins/robothor_plugin/robothor_preprocessors.py
================================================
from collections import OrderedDict
from typing import Dict, Any, Optional, List, cast
import gym
import numpy as np
import torch
from gym.spaces.dict import Dict as SpaceDict
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.cacheless_frcnn import fasterrcnn_resnet50_fpn
from allenact.utils.misc_utils import prepare_locals_for_super
class BatchedFasterRCNN(torch.nn.Module):
# fmt: off
COCO_INSTANCE_CATEGORY_NAMES = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
# fmt: on
def __init__(self, thres=0.12, maxdets=3, res=7):
super().__init__()
self.model = fasterrcnn_resnet50_fpn(pretrained=True)
self.eval()
self.min_score = thres
self.maxdets = maxdets
self.res = res
def detector_tensor(self, boxes, classes, scores, aspect_ratio=1.0):
res, maxdets = self.res, self.maxdets
bins = np.array(list(range(res + 1)))[1:-1] / res
res_classes = torch.zeros(
res, res, maxdets, dtype=torch.int64
) # 0 is background
res_boxes = -1 * torch.ones(
res, res, maxdets, 5
) # regular range is [0, 1] (vert) or [0, aspect_ratio] (horiz)
temp = [[[] for _ in range(res)] for _ in range(res)] # grid of arrays
# # TODO Debug
# print('NEW IMAGE')
for it in range(classes.shape[0]):
cx = (boxes[it, 0].item() + boxes[it, 2].item()) / 2
cy = (boxes[it, 1].item() + boxes[it, 3].item()) / 2
px = np.digitize(cx, bins=aspect_ratio * bins).item()
py = np.digitize(cy, bins=bins).item()
temp[py][px].append(
(
scores[it][classes[it]].item(), # prob
(boxes[it, 2] - boxes[it, 0]).item() / aspect_ratio, # width
(boxes[it, 3] - boxes[it, 1]).item(), # height
boxes[it, 0].item() / aspect_ratio, # x
boxes[it, 1].item(), # y
classes[it].item(), # class
)
)
# # TODO Debug:
# print(self.COCO_INSTANCE_CATEGORY_NAMES[classes[it].item()])
for py in range(res):
for px in range(res):
order = sorted(temp[py][px], reverse=True)[:maxdets]
for it, data in enumerate(order):
res_classes[py, px, it] = data[-1]
res_boxes[py, px, it, :] = torch.tensor(
list(data[:-1])
) # prob, size, top left
res_classes = res_classes.permute(2, 0, 1).unsqueeze(0).contiguous()
res_boxes = (
res_boxes.view(res, res, -1).permute(2, 0, 1).unsqueeze(0).contiguous()
)
return res_classes, res_boxes
def forward(self, imbatch):
with torch.no_grad():
imglist = [im_in.squeeze(0) for im_in in imbatch.split(split_size=1, dim=0)]
# # TODO Debug
# import cv2
# for it, im_in in enumerate(imglist):
# cvim = 255.0 * im_in.to('cpu').permute(1, 2, 0).numpy()[:, :, ::-1]
# cv2.imwrite('test_highres{}.png'.format(it), cvim)
preds = self.model(imglist)
keeps = [
pred["scores"] > self.min_score for pred in preds
] # already after nms
# [0, 1] for rows, [0, aspect_ratio] for cols (im_in is C x H x W), with all images of same size (batch)
all_boxes = [
pred["boxes"][keep] / imbatch.shape[-2]
for pred, keep in zip(preds, keeps)
]
all_classes = [pred["labels"][keep] for pred, keep in zip(preds, keeps)]
all_pred_scores = [pred["scores"][keep] for pred, keep in zip(preds, keeps)]
# hack: fill in a full prob score (all classes, 0 score if undetected) for each box, for backwards compatibility
all_scores = [
torch.zeros(pred_scores.shape[0], 91, device=pred_scores.device)
for pred_scores in all_pred_scores
]
all_scores = [
torch.where(
torch.arange(91, device=pred_scores.device).unsqueeze(0)
== merged_classes.unsqueeze(1),
pred_scores.unsqueeze(1),
scores,
)
for merged_classes, pred_scores, scores in zip(
all_classes, all_pred_scores, all_scores
)
]
all_classes_boxes = [
self.detector_tensor(
boxes,
classes,
scores,
aspect_ratio=imbatch.shape[-1] / imbatch.shape[-2],
)
for boxes, classes, scores in zip(all_boxes, all_classes, all_scores)
]
classes = torch.cat(
[classes_boxes[0] for classes_boxes in all_classes_boxes], dim=0
).to(imbatch.device)
boxes = torch.cat(
[classes_boxes[1] for classes_boxes in all_classes_boxes], dim=0
).to(imbatch.device)
return classes, boxes
class FasterRCNNPreProcessorRoboThor(Preprocessor):
"""Preprocess RGB image using a ResNet model."""
COCO_INSTANCE_CATEGORY_NAMES = BatchedFasterRCNN.COCO_INSTANCE_CATEGORY_NAMES
def __init__(
self,
input_uuids: List[str],
output_uuid: str,
input_height: int,
input_width: int,
max_dets: int,
detector_spatial_res: int,
detector_thres: float,
device: Optional[torch.device] = None,
device_ids: Optional[List[torch.device]] = None,
**kwargs: Any,
):
self.input_height = input_height
self.input_width = input_width
self.max_dets = max_dets
self.detector_spatial_res = detector_spatial_res
self.detector_thres = detector_thres
self.device = torch.device("cpu") if device is None else device
self.device_ids = device_ids or cast(
List[torch.device], list(range(torch.cuda.device_count()))
)
self.frcnn: BatchedFasterRCNN = BatchedFasterRCNN(
thres=self.detector_thres,
maxdets=self.max_dets,
res=self.detector_spatial_res,
)
spaces: OrderedDict[str, gym.Space] = OrderedDict()
shape = (self.max_dets, self.detector_spatial_res, self.detector_spatial_res)
spaces["frcnn_classes"] = gym.spaces.Box(
low=0, # 0 is bg
high=len(self.COCO_INSTANCE_CATEGORY_NAMES) - 1,
shape=shape,
dtype=np.int64,
)
shape = (
self.max_dets * 5,
self.detector_spatial_res,
self.detector_spatial_res,
)
spaces["frcnn_boxes"] = gym.spaces.Box(low=-np.inf, high=np.inf, shape=shape)
assert (
len(input_uuids) == 1
), "fasterrcnn preprocessor can only consume one observation type"
observation_space = SpaceDict(spaces=spaces)
super().__init__(**prepare_locals_for_super(locals()))
def to(self, device: torch.device) -> "FasterRCNNPreProcessorRoboThor":
self.frcnn = self.frcnn.to(device)
self.device = device
return self
def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
frames_tensor = (
obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)
) # bhwc -> bchw (unnormalized)
classes, boxes = self.frcnn(frames_tensor)
return {"frcnn_classes": classes, "frcnn_boxes": boxes}
================================================
FILE: allenact_plugins/robothor_plugin/robothor_sensors.py
================================================
from typing import Any, Tuple, Optional
import ai2thor.controller
import gym
import numpy as np
import quaternion # noqa # pylint: disable=unused-import
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_sensors import (
RGBSensorThor,
THOR_ENV_TYPE,
THOR_TASK_TYPE,
)
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
class RGBSensorRoboThor(RGBSensorThor):
"""Sensor for RGB images in RoboTHOR.
Returns from a running RoboThorEnvironment instance, the current RGB
frame corresponding to the agent's egocentric view.
"""
def __init__(self, *args: Any, **kwargs: Any):
get_logger().warning(
"`RGBSensorRoboThor` is deprecated, use `RGBSensorThor` instead."
)
super().__init__(*args, **kwargs)
class RGBSensorMultiRoboThor(RGBSensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):
"""Sensor for RGB images in RoboTHOR.
Returns from a running RoboThorEnvironment instance, the current RGB
frame corresponding to the agent's egocentric view.
"""
def __init__(self, agent_count: int = 2, **kwargs):
# TODO take all named args from superclass and pass with super().__init__(**prepare_locals_for_super(locals()))
super().__init__(**kwargs)
self.agent_count = agent_count
self.agent_id = 0
def frame_from_env(
self, env: RoboThorEnvironment, task: Optional[Task[RoboThorEnvironment]]
) -> np.ndarray:
return env.current_frames[self.agent_id].copy()
def get_observation(
self,
env: RoboThorEnvironment,
task: Task[RoboThorEnvironment],
*args: Any,
**kwargs: Any
) -> Any:
obs = []
for self.agent_id in range(self.agent_count):
obs.append(super().get_observation(env, task, *args, **kwargs))
return np.stack(obs, axis=0) # agents x width x height x channels
class GPSCompassSensorRoboThor(Sensor[RoboThorEnvironment, PointNavTask]):
def __init__(self, uuid: str = "target_coordinates_ind", **kwargs: Any):
observation_space = self._get_observation_space()
super().__init__(**prepare_locals_for_super(locals()))
def _get_observation_space(self):
return gym.spaces.Box(
low=np.finfo(np.float32).min,
high=np.finfo(np.float32).max,
shape=(2,),
dtype=np.float32,
)
@staticmethod
def _compute_pointgoal(
source_position: np.ndarray,
source_rotation: np.quaternion,
goal_position: np.ndarray,
):
direction_vector = goal_position - source_position
direction_vector_agent = GPSCompassSensorRoboThor.quaternion_rotate_vector(
source_rotation.inverse(), direction_vector
)
rho, phi = GPSCompassSensorRoboThor.cartesian_to_polar(
direction_vector_agent[2], -direction_vector_agent[0]
)
return np.array([rho, phi], dtype=np.float32)
@staticmethod
def quaternion_from_y_angle(angle: float) -> np.quaternion:
r"""Creates a quaternion from rotation angle around y axis"""
return GPSCompassSensorRoboThor.quaternion_from_coeff(
np.array(
[0.0, np.sin(np.pi * angle / 360.0), 0.0, np.cos(np.pi * angle / 360.0)]
)
)
@staticmethod
def quaternion_from_coeff(coeffs: np.ndarray) -> np.quaternion:
r"""Creates a quaternions from coeffs in [x, y, z, w] format"""
quat = np.quaternion(0, 0, 0, 0)
quat.real = coeffs[3]
quat.imag = coeffs[0:3]
return quat
@staticmethod
def cartesian_to_polar(x, y):
rho = np.sqrt(x**2 + y**2)
phi = np.arctan2(y, x)
return rho, phi
@staticmethod
def quaternion_rotate_vector(quat: np.quaternion, v: np.array) -> np.array:
r"""Rotates a vector by a quaternion
Args:
quat: The quaternion to rotate by
v: The vector to rotate
Returns:
np.array: The rotated vector
"""
vq = np.quaternion(0, 0, 0, 0)
vq.imag = v
return (quat * vq * quat.inverse()).imag
def get_observation(
self,
env: RoboThorEnvironment,
task: Optional[PointNavTask],
*args: Any,
**kwargs: Any
) -> Any:
agent_state = env.agent_state()
agent_position = np.array([agent_state[k] for k in ["x", "y", "z"]])
rotation_world_agent = self.quaternion_from_y_angle(
agent_state["rotation"]["y"]
)
goal_position = np.array([task.task_info["target"][k] for k in ["x", "y", "z"]])
return self._compute_pointgoal(
agent_position, rotation_world_agent, goal_position
)
class DepthSensorThor(
DepthSensor[
THOR_ENV_TYPE,
THOR_TASK_TYPE,
],
):
def __init__(
self,
use_resnet_normalization: Optional[bool] = None,
use_normalization: Optional[bool] = None,
mean: Optional[np.ndarray] = np.array([[0.5]], dtype=np.float32),
stdev: Optional[np.ndarray] = np.array([[0.25]], dtype=np.float32),
height: Optional[int] = None,
width: Optional[int] = None,
uuid: str = "depth",
output_shape: Optional[Tuple[int, ...]] = None,
output_channels: int = 1,
unnormalized_infimum: float = 0.0,
unnormalized_supremum: float = 5.0,
scale_first: bool = False,
**kwargs: Any
):
# Give priority to use_normalization, but use_resnet_normalization for backward compat. if not set
if use_resnet_normalization is not None and use_normalization is None:
use_normalization = use_resnet_normalization
elif use_normalization is None:
use_normalization = False
super().__init__(**prepare_locals_for_super(locals()))
def frame_from_env(
self, env: THOR_ENV_TYPE, task: Optional[THOR_TASK_TYPE]
) -> np.ndarray:
if not isinstance(env, ai2thor.controller.Controller):
return env.controller.last_event.depth_frame
return env.last_event.depth_frame
class DepthSensorRoboThor(DepthSensorThor):
# For backwards compatibility
def __init__(self, *args: Any, **kwargs: Any):
get_logger().warning(
"`DepthSensorRoboThor` is deprecated, use `DepthSensorThor` instead."
)
super().__init__(*args, **kwargs)
================================================
FILE: allenact_plugins/robothor_plugin/robothor_task_samplers.py
================================================
import copy
import gzip
import json
import random
from typing import List, Optional, Union, Dict, Any, cast, Tuple
import gym
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.cache_utils import str_to_pos_for_cache
from allenact.utils.experiment_utils import set_seed, set_deterministic_cudnn
from allenact.utils.system import get_logger
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment
from allenact_plugins.robothor_plugin.robothor_tasks import (
ObjectNavTask,
PointNavTask,
NavToPartnerTask,
)
class ObjectNavTaskSampler(TaskSampler):
def __init__(
self,
scenes: Union[List[str], str],
object_types: List[str],
sensors: List[Sensor],
max_steps: int,
env_args: Dict[str, Any],
action_space: gym.Space,
rewards_config: Dict,
scene_period: Optional[Union[int, str]] = None,
max_tasks: Optional[int] = None,
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
allow_flipping: bool = False,
dataset_first: int = -1,
dataset_last: int = -1,
**kwargs,
) -> None:
self.rewards_config = rewards_config
self.env_args = env_args
self.scenes = scenes
self.object_types = object_types
self.env: Optional[RoboThorEnvironment] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_space = action_space
self.allow_flipping = allow_flipping
self.scenes_is_dataset = (dataset_first >= 0) or (dataset_last >= 0)
if not self.scenes_is_dataset:
assert isinstance(
self.scenes, List
), "When not using a dataset, scenes ({}) must be a list".format(
self.scenes
)
self.scene_counter: Optional[int] = None
self.scene_order: Optional[List[str]] = None
self.scene_id: Optional[int] = None
self.scene_period: Optional[Union[str, int]] = (
scene_period # default makes a random choice
)
self.max_tasks: Optional[int] = None
self.reset_tasks = max_tasks
else:
assert isinstance(
self.scenes, str
), "When using a dataset, scenes ({}) must be a json file name string".format(
self.scenes
)
with open(self.scenes, "r") as f:
self.dataset_episodes = json.load(f)
# get_logger().debug("Loaded {} object nav episodes".format(len(self.dataset_episodes)))
self.dataset_first = dataset_first if dataset_first >= 0 else 0
self.dataset_last = (
dataset_last if dataset_last >= 0 else len(self.dataset_episodes) - 1
)
assert (
0 <= self.dataset_first <= self.dataset_last
), "dataset_last {} must be >= dataset_first {} >= 0".format(
dataset_last, dataset_first
)
self.reset_tasks = self.dataset_last - self.dataset_first + 1
# get_logger().debug("{} tasks ({}, {}) in sampler".format(self.reset_tasks, self.dataset_first, self.dataset_last))
self._last_sampled_task: Optional[ObjectNavTask] = None
self.seed: Optional[int] = None
self.set_seed(seed)
if deterministic_cudnn:
set_deterministic_cudnn()
self.reset()
def _create_environment(self) -> RoboThorEnvironment:
env = RoboThorEnvironment(**self.env_args)
return env
@property
def length(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled. Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
@property
def total_unique(self) -> Optional[Union[int, float]]:
return self.reset_tasks
@property
def last_sampled_task(self) -> Optional[ObjectNavTask]:
return self._last_sampled_task
def close(self) -> None:
if self.env is not None:
self.env.stop()
@property
def all_observation_spaces_equal(self) -> bool:
"""Check if observation spaces equal.
# Returns
True if all Tasks that can be sampled by this sampler have the
same observation space. Otherwise False.
"""
return True
def sample_scene(self, force_advance_scene: bool):
if force_advance_scene:
if self.scene_period != "manual":
get_logger().warning(
"When sampling scene, have `force_advance_scene == True`"
"but `self.scene_period` is not equal to 'manual',"
"this may cause unexpected behavior."
)
self.scene_id = (1 + self.scene_id) % len(self.scenes)
if self.scene_id == 0:
random.shuffle(self.scene_order)
if self.scene_period is None:
# Random scene
self.scene_id = random.randint(0, len(self.scenes) - 1)
elif self.scene_period == "manual":
pass
elif self.scene_counter >= cast(int, self.scene_period):
if self.scene_id == len(self.scene_order) - 1:
# Randomize scene order for next iteration
random.shuffle(self.scene_order)
# Move to next scene
self.scene_id = 0
else:
# Move to next scene
self.scene_id += 1
# Reset scene counter
self.scene_counter = 1
elif isinstance(self.scene_period, int):
# Stay in current scene
self.scene_counter += 1
else:
raise NotImplementedError(
"Invalid scene_period {}".format(self.scene_period)
)
if self.max_tasks is not None:
self.max_tasks -= 1
return self.scenes[int(self.scene_order[self.scene_id])]
# def sample_episode(self, scene):
# self.scene_counters[scene] = (self.scene_counters[scene] + 1) % len(self.scene_to_episodes[scene])
# if self.scene_counters[scene] == 0:
# random.shuffle(self.scene_to_episodes[scene])
# return self.scene_to_episodes[scene][self.scene_counters[scene]]
def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
# get_logger().debug("max_tasks {}".format(self.max_tasks))
return None
if not self.scenes_is_dataset:
scene = self.sample_scene(force_advance_scene)
if self.env is not None:
if scene.replace("_physics", "") != self.env.scene_name.replace(
"_physics", ""
):
self.env.reset(scene)
else:
self.env = self._create_environment()
self.env.reset(scene_name=scene)
pose = self.env.randomize_agent_location()
object_types_in_scene = set(
[o["objectType"] for o in self.env.last_event.metadata["objects"]]
)
task_info = {"scene": scene}
for ot in random.sample(self.object_types, len(self.object_types)):
if ot in object_types_in_scene:
task_info["object_type"] = ot
break
if len(task_info) == 0:
get_logger().warning(
"Scene {} does not contain any"
" objects of any of the types {}.".format(scene, self.object_types)
)
task_info["initial_position"] = {k: pose[k] for k in ["x", "y", "z"]}
task_info["initial_orientation"] = cast(Dict[str, float], pose["rotation"])[
"y"
]
else:
assert self.max_tasks is not None
next_task_id = self.dataset_first + self.max_tasks - 1
# get_logger().debug("task {}".format(next_task_id))
assert (
self.dataset_first <= next_task_id <= self.dataset_last
), "wrong task_id {} for min {} max {}".format(
next_task_id, self.dataset_first, self.dataset_last
)
task_info = copy.deepcopy(self.dataset_episodes[next_task_id])
scene = task_info["scene"]
if self.env is not None:
if scene.replace("_physics", "") != self.env.scene_name.replace(
"_physics", ""
):
self.env.reset(scene_name=scene)
else:
self.env = self._create_environment()
self.env.reset(scene_name=scene)
self.env.step(
{
"action": "TeleportFull",
**{k: float(v) for k, v in task_info["initial_position"].items()},
"rotation": {
"x": 0.0,
"y": float(task_info["initial_orientation"]),
"z": 0.0,
},
"horizon": 0.0,
"standing": True,
}
)
assert self.env.last_action_success, "Failed to reset agent for {}".format(
task_info
)
self.max_tasks -= 1
# task_info["actions"] = [] # TODO populated by Task(Generic[EnvType]).step(...) but unused
if self.allow_flipping and random.random() > 0.5:
task_info["mirrored"] = True
else:
task_info["mirrored"] = False
self._last_sampled_task = ObjectNavTask(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
reward_configs=self.rewards_config,
)
return self._last_sampled_task
def reset(self):
if not self.scenes_is_dataset:
self.scene_counter = 0
self.scene_order = list(range(len(self.scenes)))
random.shuffle(self.scene_order)
self.scene_id = 0
self.max_tasks = self.reset_tasks
def set_seed(self, seed: int):
self.seed = seed
if seed is not None:
set_seed(seed)
class ObjectNavDatasetTaskSampler(TaskSampler):
def __init__(
self,
scenes: List[str],
scene_directory: str,
sensors: List[Sensor],
max_steps: int,
env_args: Dict[str, Any],
action_space: gym.Space,
rewards_config: Dict,
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
loop_dataset: bool = True,
allow_flipping=False,
env_class=RoboThorEnvironment,
randomize_materials_in_training: bool = False,
**kwargs,
) -> None:
self.rewards_config = rewards_config
self.env_args = env_args
self.scenes = scenes
self.episodes = {
scene: ObjectNavDatasetTaskSampler.load_dataset(
scene, scene_directory + "/episodes"
)
for scene in scenes
}
# Only keep episodes containing desired objects
if "object_types" in kwargs:
self.episodes = {
scene: [
ep for ep in episodes if ep["object_type"] in kwargs["object_types"]
]
for scene, episodes in self.episodes.items()
}
self.episodes = {
scene: episodes
for scene, episodes in self.episodes.items()
if len(episodes) > 0
}
self.scenes = [scene for scene in self.scenes if scene in self.episodes]
self.env_class = env_class
self.object_types = [
ep["object_type"] for scene in self.episodes for ep in self.episodes[scene]
]
self.env: Optional[RoboThorEnvironment] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_space = action_space
self.allow_flipping = allow_flipping
self.scene_counter: Optional[int] = None
self.scene_order: Optional[List[str]] = None
self.scene_id: Optional[int] = None
# get the total number of tasks assigned to this process
if loop_dataset:
self.max_tasks = None
else:
self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes)
self.reset_tasks = self.max_tasks
self.scene_index = 0
self.episode_index = 0
self.randomize_materials_in_training = randomize_materials_in_training
self._last_sampled_task: Optional[ObjectNavTask] = None
self.seed: Optional[int] = None
self.set_seed(seed)
if deterministic_cudnn:
set_deterministic_cudnn()
self.reset()
def _create_environment(self) -> RoboThorEnvironment:
env = self.env_class(**self.env_args)
return env
@staticmethod
def load_dataset(scene: str, base_directory: str) -> List[Dict]:
filename = (
"/".join([base_directory, scene])
if base_directory[-1] != "/"
else "".join([base_directory, scene])
)
filename += ".json.gz"
fin = gzip.GzipFile(filename, "r")
json_bytes = fin.read()
fin.close()
json_str = json_bytes.decode("utf-8")
data = json.loads(json_str)
random.shuffle(data)
return data
@staticmethod
def load_distance_cache_from_file(scene: str, base_directory: str) -> Dict:
filename = (
"/".join([base_directory, scene])
if base_directory[-1] != "/"
else "".join([base_directory, scene])
)
filename += ".json.gz"
fin = gzip.GzipFile(filename, "r")
json_bytes = fin.read()
fin.close()
json_str = json_bytes.decode("utf-8")
data = json.loads(json_str)
return data
@property
def __len__(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled. Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
@property
def total_unique(self) -> Optional[Union[int, float]]:
return self.reset_tasks
@property
def last_sampled_task(self) -> Optional[ObjectNavTask]:
return self._last_sampled_task
def close(self) -> None:
if self.env is not None:
self.env.stop()
@property
def all_observation_spaces_equal(self) -> bool:
"""Check if observation spaces equal.
# Returns
True if all Tasks that can be sampled by this sampler have the
same observation space. Otherwise False.
"""
return True
@property
def length(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled. Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
return None
if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]):
self.scene_index = (self.scene_index + 1) % len(self.scenes)
# shuffle the new list of episodes to train on
random.shuffle(self.episodes[self.scenes[self.scene_index]])
self.episode_index = 0
scene = self.scenes[self.scene_index]
episode = self.episodes[scene][self.episode_index]
if self.env is None:
self.env = self._create_environment()
if scene.replace("_physics", "") != self.env.scene_name.replace("_physics", ""):
self.env.reset(scene_name=scene)
else:
self.env.reset_object_filter()
self.env.set_object_filter(
object_ids=[
o["objectId"]
for o in self.env.last_event.metadata["objects"]
if o["objectType"] == episode["object_type"]
]
)
# only randomize materials in train scenes
were_materials_randomized = False
if self.randomize_materials_in_training:
if (
"Train" in scene
or int(scene.replace("FloorPlan", "").replace("_physics", "")) % 100
< 21
):
were_materials_randomized = True
self.env.controller.step(action="RandomizeMaterials")
task_info = {
"scene": scene,
"object_type": episode["object_type"],
"materials_randomized": were_materials_randomized,
}
if len(task_info) == 0:
get_logger().warning(
"Scene {} does not contain any"
" objects of any of the types {}.".format(scene, self.object_types)
)
task_info["initial_position"] = episode["initial_position"]
task_info["initial_orientation"] = episode["initial_orientation"]
task_info["initial_horizon"] = episode.get("initial_horizon", 0)
task_info["distance_to_target"] = episode.get("shortest_path_length")
task_info["path_to_target"] = episode.get("shortest_path")
task_info["object_type"] = episode["object_type"]
task_info["id"] = episode["id"]
if self.allow_flipping and random.random() > 0.5:
task_info["mirrored"] = True
else:
task_info["mirrored"] = False
self.episode_index += 1
if self.max_tasks is not None:
self.max_tasks -= 1
if not self.env.teleport(
pose=episode["initial_position"],
rotation=episode["initial_orientation"],
horizon=episode.get("initial_horizon", 0),
):
return self.next_task()
self._last_sampled_task = ObjectNavTask(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
reward_configs=self.rewards_config,
)
return self._last_sampled_task
def reset(self):
self.episode_index = 0
self.scene_index = 0
self.max_tasks = self.reset_tasks
def set_seed(self, seed: int):
self.seed = seed
if seed is not None:
set_seed(seed)
class PointNavTaskSampler(TaskSampler):
def __init__(
self,
scenes: List[str],
# object_types: List[str],
# scene_to_episodes: List[Dict[str, Any]],
sensors: List[Sensor],
max_steps: int,
env_args: Dict[str, Any],
action_space: gym.Space,
rewards_config: Dict,
scene_period: Optional[Union[int, str]] = None,
max_tasks: Optional[int] = None,
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
**kwargs,
) -> None:
self.rewards_config = rewards_config
self.env_args = env_args
self.scenes = scenes
# self.object_types = object_types
# self.scene_to_episodes = scene_to_episodes
# self.scene_counters = {scene: -1 for scene in self.scene_to_episodes}
# self.scenes = list(self.scene_to_episodes.keys())
self.env: Optional[RoboThorEnvironment] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_space = action_space
self.scene_counter: Optional[int] = None
self.scene_order: Optional[List[str]] = None
self.scene_id: Optional[int] = None
self.scene_period: Optional[Union[str, int]] = (
scene_period # default makes a random choice
)
self.max_tasks: Optional[int] = None
self.reset_tasks = max_tasks
self._last_sampled_task: Optional[PointNavTask] = None
self.seed: Optional[int] = None
self.set_seed(seed)
if deterministic_cudnn:
set_deterministic_cudnn()
self.reset()
def _create_environment(self) -> RoboThorEnvironment:
env = RoboThorEnvironment(**self.env_args)
return env
@property
def length(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled.
Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
@property
def total_unique(self) -> Optional[Union[int, float]]:
# total = 0
# for scene in self.scene_to_episodes:
# total += len(self.scene_to_episodes[scene])
# return total
return self.reset_tasks
@property
def last_sampled_task(self) -> Optional[PointNavTask]:
return self._last_sampled_task
def close(self) -> None:
if self.env is not None:
self.env.stop()
@property
def all_observation_spaces_equal(self) -> bool:
"""Check if observation spaces equal.
# Returns
True if all Tasks that can be sampled by this sampler
have the same observation space. Otherwise False.
"""
return True
def sample_scene(self, force_advance_scene: bool):
if force_advance_scene:
if self.scene_period != "manual":
get_logger().warning(
"When sampling scene, have `force_advance_scene == True`"
"but `self.scene_period` is not equal to 'manual',"
"this may cause unexpected behavior."
)
self.scene_id = (1 + self.scene_id) % len(self.scenes)
if self.scene_id == 0:
random.shuffle(self.scene_order)
if self.scene_period is None:
# Random scene
self.scene_id = random.randint(0, len(self.scenes) - 1)
elif self.scene_period == "manual":
pass
elif self.scene_counter >= cast(int, self.scene_period):
if self.scene_id == len(self.scene_order) - 1:
# Randomize scene order for next iteration
random.shuffle(self.scene_order)
# Move to next scene
self.scene_id = 0
else:
# Move to next scene
self.scene_id += 1
# Reset scene counter
self.scene_counter = 1
elif isinstance(self.scene_period, int):
# Stay in current scene
self.scene_counter += 1
else:
raise NotImplementedError(
"Invalid scene_period {}".format(self.scene_period)
)
if self.max_tasks is not None:
self.max_tasks -= 1
return self.scenes[int(self.scene_order[self.scene_id])]
# def sample_episode(self, scene):
# self.scene_counters[scene] = (self.scene_counters[scene] + 1) % len(self.scene_to_episodes[scene])
# if self.scene_counters[scene] == 0:
# random.shuffle(self.scene_to_episodes[scene])
# return self.scene_to_episodes[scene][self.scene_counters[scene]]
def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
return None
scene = self.sample_scene(force_advance_scene)
if self.env is not None:
if scene.replace("_physics", "") != self.env.scene_name.replace(
"_physics", ""
):
self.env.reset(scene_name=scene)
else:
self.env = self._create_environment()
self.env.reset(scene_name=scene)
# task_info = copy.deepcopy(self.sample_episode(scene))
# task_info['target'] = task_info['target_position']
# task_info['actions'] = []
locs = self.env.known_good_locations_list()
# get_logger().debug("locs[0] {} locs[-1] {}".format(locs[0], locs[-1]))
ys = [loc["y"] for loc in locs]
miny = min(ys)
maxy = max(ys)
assert maxy - miny < 1e-6, "miny {} maxy {} for scene {}".format(
miny, maxy, scene
)
too_close_to_target = True
target: Optional[Dict[str, float]] = None
for _ in range(10):
self.env.randomize_agent_location()
target = copy.copy(random.choice(locs))
too_close_to_target = self.env.distance_to_point(target) <= 0
if not too_close_to_target:
break
pose = self.env.agent_state()
task_info = {
"scene": scene,
"initial_position": {k: pose[k] for k in ["x", "y", "z"]},
"initial_orientation": pose["rotation"]["y"],
"target": target,
"actions": [],
}
if too_close_to_target:
get_logger().warning("No path for sampled episode {}".format(task_info))
# else:
# get_logger().debug("Path found for sampled episode {}".format(task_info))
# pose = {**task_info['initial_position'], 'rotation': {'x': 0.0, 'y': task_info['initial_orientation'], 'z': 0.0}, 'horizon': 0.0}
# self.env.step({"action": "TeleportFull", **pose})
# assert self.env.last_action_success, "Failed to initialize agent to {} in {} for epsiode {}".format(pose, scene, task_info)
self._last_sampled_task = PointNavTask(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
reward_configs=self.rewards_config,
)
return self._last_sampled_task
def reset(self):
self.scene_counter = 0
self.scene_order = list(range(len(self.scenes)))
random.shuffle(self.scene_order)
self.scene_id = 0
self.max_tasks = self.reset_tasks
# for scene in self.scene_to_episodes:
# random.shuffle(self.scene_to_episodes[scene])
# for scene in self.scene_counters:
# self.scene_counters[scene] = -1
def set_seed(self, seed: int):
self.seed = seed
if seed is not None:
set_seed(seed)
class PointNavDatasetTaskSampler(TaskSampler):
def __init__(
self,
scenes: List[str],
scene_directory: str,
sensors: List[Sensor],
max_steps: int,
env_args: Dict[str, Any],
action_space: gym.Space,
rewards_config: Dict,
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
loop_dataset: bool = True,
shuffle_dataset: bool = True,
allow_flipping=False,
env_class=RoboThorEnvironment,
**kwargs,
) -> None:
self.rewards_config = rewards_config
self.env_args = env_args
self.scenes = scenes
self.shuffle_dataset: bool = shuffle_dataset
self.episodes = {
scene: ObjectNavDatasetTaskSampler.load_dataset(
scene, scene_directory + "/episodes"
)
for scene in scenes
}
self.env_class = env_class
self.env: Optional[RoboThorEnvironment] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_space = action_space
self.allow_flipping = allow_flipping
self.scene_counter: Optional[int] = None
self.scene_order: Optional[List[str]] = None
self.scene_id: Optional[int] = None
# get the total number of tasks assigned to this process
if loop_dataset:
self.max_tasks = None
else:
self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes)
self.reset_tasks = self.max_tasks
self.scene_index = 0
self.episode_index = 0
self._last_sampled_task: Optional[PointNavTask] = None
self.seed: Optional[int] = None
self.set_seed(seed)
if deterministic_cudnn:
set_deterministic_cudnn()
self.reset()
def _create_environment(self) -> RoboThorEnvironment:
env = self.env_class(**self.env_args)
return env
@property
def __len__(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled. Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
@property
def total_unique(self) -> Optional[Union[int, float]]:
return self.reset_tasks
@property
def last_sampled_task(self) -> Optional[PointNavTask]:
return self._last_sampled_task
def close(self) -> None:
if self.env is not None:
self.env.stop()
@property
def all_observation_spaces_equal(self) -> bool:
"""Check if observation spaces equal.
# Returns
True if all Tasks that can be sampled by this sampler have the
same observation space. Otherwise False.
"""
return True
def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
return None
if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]):
self.scene_index = (self.scene_index + 1) % len(self.scenes)
# shuffle the new list of episodes to train on
if self.shuffle_dataset:
random.shuffle(self.episodes[self.scenes[self.scene_index]])
self.episode_index = 0
scene = self.scenes[self.scene_index]
episode = self.episodes[scene][self.episode_index]
if self.env is not None:
if scene.replace("_physics", "") != self.env.scene_name.replace(
"_physics", ""
):
self.env.reset(scene_name=scene, filtered_objects=[])
else:
self.env = self._create_environment()
self.env.reset(scene_name=scene, filtered_objects=[])
def to_pos(s):
if isinstance(s, (Dict, Tuple)):
return s
if isinstance(s, float):
return {"x": 0, "y": s, "z": 0}
return str_to_pos_for_cache(s)
for k in ["initial_position", "initial_orientation", "target_position"]:
episode[k] = to_pos(episode[k])
task_info = {
"scene": scene,
"initial_position": episode["initial_position"],
"initial_orientation": episode["initial_orientation"],
"target": episode["target_position"],
"shortest_path": episode["shortest_path"],
"distance_to_target": episode["shortest_path_length"],
"id": episode["id"],
}
if self.allow_flipping and random.random() > 0.5:
task_info["mirrored"] = True
else:
task_info["mirrored"] = False
self.episode_index += 1
if self.max_tasks is not None:
self.max_tasks -= 1
if not self.env.teleport(
pose=episode["initial_position"], rotation=episode["initial_orientation"]
):
return self.next_task()
self._last_sampled_task = PointNavTask(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
reward_configs=self.rewards_config,
)
return self._last_sampled_task
def reset(self):
self.episode_index = 0
self.scene_index = 0
self.max_tasks = self.reset_tasks
def set_seed(self, seed: int):
self.seed = seed
if seed is not None:
set_seed(seed)
@property
def length(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled.
Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
class NavToPartnerTaskSampler(TaskSampler):
def __init__(
self,
scenes: List[str],
sensors: List[Sensor],
max_steps: int,
env_args: Dict[str, Any],
action_space: gym.Space,
rewards_config: Dict,
scene_period: Optional[Union[int, str]] = None,
max_tasks: Optional[int] = None,
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
**kwargs,
) -> None:
self.rewards_config = rewards_config
self.env_args = env_args
self.scenes = scenes
self.env: Optional[RoboThorEnvironment] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_space = action_space
self.scene_counter: Optional[int] = None
self.scene_order: Optional[List[str]] = None
self.scene_id: Optional[int] = None
self.scene_period: Optional[Union[str, int]] = (
scene_period # default makes a random choice
)
self.max_tasks: Optional[int] = None
self.reset_tasks = max_tasks
self._last_sampled_task: Optional[NavToPartnerTask] = None
self.seed: Optional[int] = None
self.set_seed(seed)
if deterministic_cudnn:
set_deterministic_cudnn()
self.reset()
def _create_environment(self) -> RoboThorEnvironment:
assert (
self.env_args["agentCount"] == 2
), "NavToPartner is only defined for 2 agents!"
env = RoboThorEnvironment(**self.env_args)
return env
@property
def length(self) -> Union[int, float]:
"""Length.
# Returns
Number of total tasks remaining that can be sampled.
Can be float('inf').
"""
return float("inf") if self.max_tasks is None else self.max_tasks
@property
def total_unique(self) -> Optional[Union[int, float]]:
return self.reset_tasks
@property
def last_sampled_task(self) -> Optional[NavToPartnerTask]:
return self._last_sampled_task
def close(self) -> None:
if self.env is not None:
self.env.stop()
@property
def all_observation_spaces_equal(self) -> bool:
"""Check if observation spaces equal.
# Returns
True if all Tasks that can be sampled by this sampler
have the same observation space. Otherwise False.
"""
return True
def sample_scene(self, force_advance_scene: bool):
if force_advance_scene:
if self.scene_period != "manual":
get_logger().warning(
"When sampling scene, have `force_advance_scene == True`"
"but `self.scene_period` is not equal to 'manual',"
"this may cause unexpected behavior."
)
self.scene_id = (1 + self.scene_id) % len(self.scenes)
if self.scene_id == 0:
random.shuffle(self.scene_order)
if self.scene_period is None:
# Random scene
self.scene_id = random.randint(0, len(self.scenes) - 1)
elif self.scene_period == "manual":
pass
elif self.scene_counter >= cast(int, self.scene_period):
if self.scene_id == len(self.scene_order) - 1:
# Randomize scene order for next iteration
random.shuffle(self.scene_order)
# Move to next scene
self.scene_id = 0
else:
# Move to next scene
self.scene_id += 1
# Reset scene counter
self.scene_counter = 1
elif isinstance(self.scene_period, int):
# Stay in current scene
self.scene_counter += 1
else:
raise NotImplementedError(
"Invalid scene_period {}".format(self.scene_period)
)
if self.max_tasks is not None:
self.max_tasks -= 1
return self.scenes[int(self.scene_order[self.scene_id])]
def next_task(
self, force_advance_scene: bool = False
) -> Optional[NavToPartnerTask]:
if self.max_tasks is not None and self.max_tasks <= 0:
return None
scene = self.sample_scene(force_advance_scene)
if self.env is not None:
if scene.replace("_physics", "") != self.env.scene_name.replace(
"_physics", ""
):
self.env.reset(scene_name=scene)
else:
self.env = self._create_environment()
self.env.reset(scene_name=scene)
too_close_to_target = True
for _ in range(10):
self.env.randomize_agent_location(agent_id=0)
self.env.randomize_agent_location(agent_id=1)
pose1 = self.env.agent_state(0)
pose2 = self.env.agent_state(1)
dist = self.env.distance_cache.find_distance(
self.env.scene_name,
{k: pose1[k] for k in ["x", "y", "z"]},
{k: pose2[k] for k in ["x", "y", "z"]},
self.env.distance_from_point_to_point,
)
too_close_to_target = (
dist <= 1.25 * self.rewards_config["max_success_distance"]
)
if not too_close_to_target:
break
task_info = {
"scene": scene,
"initial_position1": {k: pose1[k] for k in ["x", "y", "z"]},
"initial_position2": {k: pose2[k] for k in ["x", "y", "z"]},
"initial_orientation1": pose1["rotation"]["y"],
"initial_orientation2": pose2["rotation"]["y"],
"id": "_".join(
[scene]
# + ["%4.2f" % pose1[k] for k in ["x", "y", "z"]]
# + ["%4.2f" % pose1["rotation"]["y"]]
# + ["%4.2f" % pose2[k] for k in ["x", "y", "z"]]
# + ["%4.2f" % pose2["rotation"]["y"]]
+ ["%d" % random.randint(0, 2**63 - 1)]
),
}
if too_close_to_target:
get_logger().warning("Bad sampled episode {}".format(task_info))
self._last_sampled_task = NavToPartnerTask(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_space,
reward_configs=self.rewards_config,
)
return self._last_sampled_task
def reset(self):
self.scene_counter = 0
self.scene_order = list(range(len(self.scenes)))
random.shuffle(self.scene_order)
self.scene_id = 0
self.max_tasks = self.reset_tasks
def set_seed(self, seed: int):
self.seed = seed
if seed is not None:
set_seed(seed)
================================================
FILE: allenact_plugins/robothor_plugin/robothor_tasks.py
================================================
import math
from typing import Tuple, List, Dict, Any, Optional, Union, Sequence, cast
import gym
import numpy as np
from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.utils.system import get_logger
from allenact.utils.tensor_utils import tile_images
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.robothor_plugin.robothor_constants import (
MOVE_AHEAD,
ROTATE_LEFT,
ROTATE_RIGHT,
END,
LOOK_UP,
LOOK_DOWN,
)
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment
def spl_metric(
success: bool, optimal_distance: float, travelled_distance: float
) -> Optional[float]:
if not success:
return 0.0
elif optimal_distance < 0:
return None
elif optimal_distance == 0:
if travelled_distance == 0:
return 1.0
else:
return 0.0
else:
travelled_distance = max(travelled_distance, optimal_distance)
return optimal_distance / travelled_distance
class PointNavTask(Task[RoboThorEnvironment]):
_actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END)
def __init__(
self,
env: RoboThorEnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
reward_configs: Dict[str, Any],
**kwargs,
) -> None:
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self.reward_configs = reward_configs
self._took_end_action: bool = False
self._success: Optional[bool] = False
self.last_geodesic_distance = self.env.distance_to_point(
self.task_info["target"]
)
self.optimal_distance = self.last_geodesic_distance
self._rewards: List[float] = []
self._distance_to_goal: List[float] = []
self._metrics = None
self.path: List[Any] = (
[]
) # the initial coordinate will be directly taken from the optimal path
self.travelled_distance = 0.0
self.task_info["followed_path"] = [self.env.agent_state()]
self.task_info["action_names"] = self.action_names()
@property
def action_space(self):
return gym.spaces.Discrete(len(self._actions))
def reached_terminal_state(self) -> bool:
return self._took_end_action
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return cls._actions
def close(self) -> None:
self.env.stop()
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
action_str = self.class_action_names()[action]
if action_str == END:
self._took_end_action = True
self._success = self._is_goal_in_range()
self.last_action_success = self._success
else:
self.env.step({"action": action_str})
self.last_action_success = self.env.last_action_success
pose = self.env.agent_state()
self.path.append({k: pose[k] for k in ["x", "y", "z"]})
self.task_info["followed_path"].append(pose)
if len(self.path) > 1:
self.travelled_distance += IThorEnvironment.position_dist(
p0=self.path[-1], p1=self.path[-2], ignore_y=True
)
step_result = RLStepResult(
observation=self.get_observations(),
reward=self.judge(),
done=self.is_done(),
info={"last_action_success": self.last_action_success, "action": action},
)
return step_result
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
if mode == "rgb":
return self.env.current_frame
elif mode == "depth":
return self.env.current_depth
def _is_goal_in_range(self) -> Optional[bool]:
tget = self.task_info["target"]
dist = self.dist_to_target()
if -0.5 < dist <= 0.2:
return True
elif dist > 0.2:
return False
else:
get_logger().debug(
"No path for {} from {} to {}".format(
self.env.scene_name, self.env.agent_state(), tget
)
)
return None
def shaping(self) -> float:
rew = 0.0
if self.reward_configs["shaping_weight"] == 0.0:
return rew
geodesic_distance = self.dist_to_target()
if geodesic_distance == -1.0:
geodesic_distance = self.last_geodesic_distance
if (
self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5
): # (robothor limits)
rew += self.last_geodesic_distance - geodesic_distance
self.last_geodesic_distance = geodesic_distance
return rew * self.reward_configs["shaping_weight"]
def judge(self) -> float:
"""Judge the last event."""
reward = self.reward_configs["step_penalty"]
reward += self.shaping()
if self._took_end_action:
if self._success is not None:
reward += (
self.reward_configs["goal_success_reward"]
if self._success
else self.reward_configs["failed_stop_reward"]
)
elif self.num_steps_taken() + 1 >= self.max_steps:
reward += self.reward_configs.get("reached_max_steps_reward", 0.0)
self._rewards.append(float(reward))
return float(reward)
def dist_to_target(self):
return self.env.distance_to_point(self.task_info["target"])
def metrics(self) -> Dict[str, Any]:
if not self.is_done():
return {}
total_reward = float(np.sum(self._rewards))
self._rewards = []
if self._success is None:
return {}
dist2tget = self.dist_to_target()
spl = spl_metric(
success=self._success,
optimal_distance=self.optimal_distance,
travelled_distance=self.travelled_distance,
)
metrics = {
**super(PointNavTask, self).metrics(),
"success": self._success, # False also if no path to target
"total_reward": total_reward,
"dist_to_target": dist2tget,
"spl": 0 if spl is None else spl,
}
return metrics
class ObjectNavTask(Task[RoboThorEnvironment]):
_actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN)
def __init__(
self,
env: RoboThorEnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
reward_configs: Dict[str, Any],
**kwargs,
) -> None:
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self.reward_configs = reward_configs
self._took_end_action: bool = False
self._success: Optional[bool] = False
self.mirror = task_info["mirrored"]
self._all_metadata_available = env.all_metadata_available
self._rewards: List[float] = []
self._distance_to_goal: List[float] = []
self._metrics = None
self.path: List = (
[]
) # the initial coordinate will be directly taken from the optimal path
self.travelled_distance = 0.0
self.task_info["followed_path"] = [self.env.agent_state()]
self.task_info["taken_actions"] = []
self.task_info["action_names"] = self.class_action_names()
if self._all_metadata_available:
self.last_geodesic_distance = self.env.distance_to_object_type(
self.task_info["object_type"]
)
self.optimal_distance = self.last_geodesic_distance
self.closest_geo_distance = self.last_geodesic_distance
self.last_expert_action: Optional[int] = None
self.last_action_success = False
@property
def action_space(self):
return gym.spaces.Discrete(len(self._actions))
def reached_terminal_state(self) -> bool:
return self._took_end_action
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return cls._actions
def close(self) -> None:
self.env.stop()
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
action_str = self.class_action_names()[action]
if self.mirror:
if action_str == ROTATE_RIGHT:
action_str = ROTATE_LEFT
elif action_str == ROTATE_LEFT:
action_str = ROTATE_RIGHT
self.task_info["taken_actions"].append(action_str)
if action_str == END:
self._took_end_action = True
self._success = self._is_goal_in_range()
self.last_action_success = self._success
else:
self.env.step({"action": action_str})
self.last_action_success = self.env.last_action_success
pose = self.env.agent_state()
self.path.append({k: pose[k] for k in ["x", "y", "z"]})
self.task_info["followed_path"].append(pose)
if len(self.path) > 1:
self.travelled_distance += IThorEnvironment.position_dist(
p0=self.path[-1], p1=self.path[-2], ignore_y=True
)
step_result = RLStepResult(
observation=self.get_observations(),
reward=self.judge(),
done=self.is_done(),
info={"last_action_success": self.last_action_success, "action": action},
)
return step_result
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
if mode == "rgb":
frame = self.env.current_frame.copy()
elif mode == "depth":
frame = self.env.current_depth.copy()
else:
raise NotImplementedError(f"Mode '{mode}' is not supported.")
if self.mirror:
frame = frame[:, ::-1, :].copy() # horizontal flip
# print("mirrored render")
return frame
def _is_goal_in_range(self) -> bool:
return any(
o["objectType"] == self.task_info["object_type"]
for o in self.env.visible_objects()
)
def shaping(self) -> float:
rew = 0.0
if self.reward_configs["shaping_weight"] == 0.0:
return rew
geodesic_distance = self.env.distance_to_object_type(
self.task_info["object_type"]
)
# Ensuring the reward magnitude is not greater than the total distance moved
max_reward_mag = 0.0
if len(self.path) >= 2:
p0, p1 = self.path[-2:]
max_reward_mag = math.sqrt(
(p0["x"] - p1["x"]) ** 2 + (p0["z"] - p1["z"]) ** 2
)
if self.reward_configs.get("positive_only_reward", False):
if geodesic_distance > 0.5:
rew = max(self.closest_geo_distance - geodesic_distance, 0)
else:
if (
self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5
): # (robothor limits)
rew += self.last_geodesic_distance - geodesic_distance
self.last_geodesic_distance = geodesic_distance
self.closest_geo_distance = min(self.closest_geo_distance, geodesic_distance)
return (
max(
min(rew, max_reward_mag),
-max_reward_mag,
)
* self.reward_configs["shaping_weight"]
)
def judge(self) -> float:
"""Judge the last event."""
reward = self.reward_configs["step_penalty"]
reward += self.shaping()
if self._took_end_action:
if self._success:
reward += self.reward_configs["goal_success_reward"]
else:
reward += self.reward_configs["failed_stop_reward"]
elif self.num_steps_taken() + 1 >= self.max_steps:
reward += self.reward_configs.get("reached_max_steps_reward", 0.0)
self._rewards.append(float(reward))
return float(reward)
def get_observations(self, **kwargs) -> Any:
obs = self.sensor_suite.get_observations(env=self.env, task=self)
if self.mirror:
for o in obs:
if ("rgb" in o or "depth" in o) and isinstance(obs[o], np.ndarray):
if (
len(obs[o].shape) == 3
): # heuristic to determine this is a visual sensor
obs[o] = obs[o][:, ::-1, :].copy() # horizontal flip
elif len(obs[o].shape) == 2: # perhaps only two axes for depth?
obs[o] = obs[o][:, ::-1].copy() # horizontal flip
return obs
def metrics(self) -> Dict[str, Any]:
if not self.is_done():
return {}
metrics = super(ObjectNavTask, self).metrics()
if self._all_metadata_available:
dist2tget = self.env.distance_to_object_type(self.task_info["object_type"])
spl = spl_metric(
success=self._success,
optimal_distance=self.optimal_distance,
travelled_distance=self.travelled_distance,
)
metrics = {
**metrics,
"success": self._success,
"total_reward": np.sum(self._rewards),
"dist_to_target": dist2tget,
"spl": 0 if spl is None else spl,
}
return metrics
def query_expert(self, end_action_only: bool = False, **kwargs) -> Tuple[int, bool]:
if self._is_goal_in_range():
return self.class_action_names().index(END), True
if end_action_only:
return 0, False
else:
try:
self.env.step(
{
"action": "ObjectNavExpertAction",
"objectType": self.task_info["object_type"],
}
)
except ValueError:
raise RuntimeError(
"Attempting to use the action `ObjectNavExpertAction` which is not supported by your version of"
" AI2-THOR. The action `ObjectNavExpertAction` is experimental. In order"
" to enable this action, please install the (in development) version of AI2-THOR. Through pip"
" this can be done with the command"
" `pip install -e git+https://github.com/allenai/ai2thor.git@7d914cec13aae62298f5a6a816adb8ac6946c61f#egg=ai2thor`."
)
if self.env.last_action_success:
expert_action: Optional[str] = self.env.last_event.metadata[
"actionReturn"
]
if isinstance(expert_action, str):
if self.mirror:
if expert_action == "RotateLeft":
expert_action = "RotateRight"
elif expert_action == "RotateRight":
expert_action = "RotateLeft"
return self.class_action_names().index(expert_action), True
else:
# This should have been caught by self._is_goal_in_range()...
return 0, False
else:
return 0, False
class NavToPartnerTask(Task[RoboThorEnvironment]):
_actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT)
def __init__(
self,
env: RoboThorEnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
reward_configs: Dict[str, Any],
**kwargs,
) -> None:
super().__init__(
env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
)
self.reward_configs = reward_configs
assert self.env.agent_count == 2, "NavToPartnerTask only defined for 2 agents!"
pose1 = self.env.agent_state(0)
pose2 = self.env.agent_state(1)
self.last_geodesic_distance = self.env.distance_cache.find_distance(
self.env.scene_name,
{k: pose1[k] for k in ["x", "y", "z"]},
{k: pose2[k] for k in ["x", "y", "z"]},
self.env.distance_from_point_to_point,
)
self.task_info["followed_path1"] = [pose1]
self.task_info["followed_path2"] = [pose2]
self.task_info["action_names"] = self.class_action_names()
@property
def action_space(self):
return gym.spaces.Tuple(
[
gym.spaces.Discrete(len(self._actions)),
gym.spaces.Discrete(len(self._actions)),
]
)
def reached_terminal_state(self) -> bool:
return (
self.last_geodesic_distance <= self.reward_configs["max_success_distance"]
)
@classmethod
def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
return cls._actions
def close(self) -> None:
self.env.stop()
def _step(self, action: Tuple[int, int]) -> RLStepResult:
assert isinstance(action, tuple)
action_str1 = self.class_action_names()[action[0]]
action_str2 = self.class_action_names()[action[1]]
self.env.step({"action": action_str1, "agentId": 0})
self.last_action_success1 = self.env.last_action_success
self.env.step({"action": action_str2, "agentId": 1})
self.last_action_success2 = self.env.last_action_success
pose1 = self.env.agent_state(0)
self.task_info["followed_path1"].append(pose1)
pose2 = self.env.agent_state(1)
self.task_info["followed_path2"].append(pose2)
self.last_geodesic_distance = self.env.distance_cache.find_distance(
self.env.scene_name,
{k: pose1[k] for k in ["x", "y", "z"]},
{k: pose2[k] for k in ["x", "y", "z"]},
self.env.distance_from_point_to_point,
)
step_result = RLStepResult(
observation=self.get_observations(),
reward=self.judge(),
done=self.is_done(),
info={
"last_action_success": [
self.last_action_success1,
self.last_action_success2,
],
"action": action,
},
)
return step_result
def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
if mode == "rgb":
return tile_images(self.env.current_frames)
elif mode == "depth":
return tile_images(self.env.current_depths)
def judge(self) -> float:
"""Judge the last event."""
reward = self.reward_configs["step_penalty"]
if self.reached_terminal_state():
reward += self.reward_configs["success_reward"]
return reward # reward shared by both agents (no shaping)
def metrics(self) -> Dict[str, Any]:
if not self.is_done():
return {}
return {
**super().metrics(),
"success": self.reached_terminal_state(),
}
================================================
FILE: allenact_plugins/robothor_plugin/robothor_viz.py
================================================
import copy
import json
import math
import os
from typing import Tuple, Sequence, Union, Dict, Optional, Any, cast, Generator, List
import cv2
import numpy as np
from PIL import Image, ImageDraw
from ai2thor.controller import Controller
from matplotlib import pyplot as plt
from matplotlib.figure import Figure
import colour as col
from allenact.utils.system import get_logger
from allenact.utils.viz_utils import TrajectoryViz
ROBOTHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR = os.path.join(
os.path.expanduser("~"), ".allenact", "robothor", "top_down_viz_cache"
)
class ThorPositionTo2DFrameTranslator(object):
def __init__(
self,
frame_shape_rows_cols: Tuple[int, int],
cam_position: Sequence[float],
orth_size: float,
):
self.frame_shape = frame_shape_rows_cols
self.lower_left = np.array((cam_position[0], cam_position[2])) - orth_size
self.span = 2 * orth_size
def __call__(self, position: Sequence[float]):
if len(position) == 3:
x, _, z = position
else:
x, z = position
camera_position = (np.array((x, z)) - self.lower_left) / self.span
return np.array(
(
round(self.frame_shape[0] * (1.0 - camera_position[1])),
round(self.frame_shape[1] * camera_position[0]),
),
dtype=int,
)
class ThorViz(TrajectoryViz):
def __init__(
self,
path_to_trajectory: Sequence[str] = ("task_info", "followed_path"),
label: str = "thor_trajectory",
figsize: Tuple[float, float] = (8, 4), # width, height
fontsize: float = 10,
scenes: Union[
Tuple[str, int, int, int, int], Sequence[Tuple[str, int, int, int, int]]
] = ("FloorPlan_Val{}_{}", 1, 3, 1, 5),
viz_rows_cols: Tuple[int, int] = (448, 448),
single_color: bool = False,
view_triangle_only_on_last: bool = True,
disable_view_triangle: bool = False,
line_opacity: float = 1.0,
**kwargs
):
super().__init__(
path_to_trajectory=path_to_trajectory,
label=label,
figsize=figsize,
fontsize=fontsize,
**kwargs
)
if isinstance(scenes[0], str):
scenes = [
cast(Tuple[str, int, int, int, int], scenes)
] # make it list of tuples
self.scenes = cast(List[Tuple[str, int, int, int, int]], scenes)
self.room_path = ROBOTHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR
os.makedirs(self.room_path, exist_ok=True)
self.viz_rows_cols = viz_rows_cols
self.single_color = single_color
self.view_triangle_only_on_last = view_triangle_only_on_last
self.disable_view_triangle = disable_view_triangle
self.line_opacity = line_opacity
# Only needed for rendering
self.map_data: Optional[Dict[str, Any]] = None
self.thor_top_downs: Optional[Dict[str, np.ndarray]] = None
self.controller: Optional[Controller] = None
def init_top_down_render(self):
self.map_data = self.get_translator()
self.thor_top_downs = self.make_top_down_views()
# No controller needed after this point
if self.controller is not None:
self.controller.stop()
self.controller = None
@staticmethod
def iterate_scenes(
all_scenes: Sequence[Tuple[str, int, int, int, int]]
) -> Generator[str, None, None]:
for scenes in all_scenes:
for wall in range(scenes[1], scenes[2] + 1):
for furniture in range(scenes[3], scenes[4] + 1):
roomname = scenes[0].format(wall, furniture)
yield roomname
def cached_map_data_path(self, roomname: str) -> str:
return os.path.join(self.room_path, "map_data__{}.json".format(roomname))
def get_translator(self) -> Dict[str, Any]:
roomname = list(ThorViz.iterate_scenes(self.scenes))[0]
json_file = self.cached_map_data_path(roomname)
if not os.path.exists(json_file):
self.make_controller()
self.controller.reset(roomname)
map_data = self.get_agent_map_data()
get_logger().info("Dumping {}".format(json_file))
with open(json_file, "w") as f:
json.dump(map_data, f, indent=4, sort_keys=True)
else:
with open(json_file, "r") as f:
map_data = json.load(f)
pos_translator = ThorPositionTo2DFrameTranslator(
self.viz_rows_cols,
self.position_to_tuple(map_data["cam_position"]),
map_data["cam_orth_size"],
)
map_data["pos_translator"] = pos_translator
get_logger().debug("Using map_data {}".format(map_data))
return map_data
def cached_image_path(self, roomname: str) -> str:
return os.path.join(
self.room_path, "{}__r{}_c{}.png".format(roomname, *self.viz_rows_cols)
)
def make_top_down_views(self) -> Dict[str, np.ndarray]:
top_downs = {}
for roomname in self.iterate_scenes(self.scenes):
fname = self.cached_image_path(roomname)
if not os.path.exists(fname):
self.make_controller()
self.dump_top_down_view(roomname, fname)
top_downs[roomname] = cv2.imread(fname)
return top_downs
def crop_viz_image(self, viz_image: np.ndarray) -> np.ndarray:
# Top-down view of room spans vertically near the center of the frame in RoboTHOR:
y_min = int(self.viz_rows_cols[0] * 0.3)
y_max = int(self.viz_rows_cols[0] * 0.8)
# But it covers approximately the entire width:
x_min = 0
x_max = self.viz_rows_cols[1]
cropped_viz_image = viz_image[y_min:y_max, x_min:x_max, :]
return cropped_viz_image
def make_controller(self):
if self.controller is None:
self.controller = Controller()
self.controller.step({"action": "ChangeQuality", "quality": "Very High"})
self.controller.step(
{
"action": "ChangeResolution",
"x": self.viz_rows_cols[1],
"y": self.viz_rows_cols[0],
}
)
def get_agent_map_data(self):
self.controller.step({"action": "ToggleMapView"})
cam_position = self.controller.last_event.metadata["cameraPosition"]
cam_orth_size = self.controller.last_event.metadata["cameraOrthSize"]
to_return = {
"cam_position": cam_position,
"cam_orth_size": cam_orth_size,
}
self.controller.step({"action": "ToggleMapView"})
return to_return
@staticmethod
def position_to_tuple(position: Dict[str, float]) -> Tuple[float, float, float]:
return position["x"], position["y"], position["z"]
@staticmethod
def add_lines_to_map(
ps: Sequence[Any],
frame: np.ndarray,
pos_translator: ThorPositionTo2DFrameTranslator,
opacity: float,
color: Optional[Tuple[int, ...]] = None,
) -> np.ndarray:
if len(ps) <= 1:
return frame
if color is None:
color = (255, 0, 0)
img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA
opacity = int(round(255 * opacity)) # Define transparency for the triangle.
draw = ImageDraw.Draw(img2)
for i in range(len(ps) - 1):
draw.line(
tuple(reversed(pos_translator(ps[i])))
+ tuple(reversed(pos_translator(ps[i + 1]))),
fill=color + (opacity,),
width=int(frame.shape[0] / 100),
)
img = Image.alpha_composite(img1, img2)
return np.array(img.convert("RGB"))
@staticmethod
def add_line_to_map(
p0: Any,
p1: Any,
frame: np.ndarray,
pos_translator: ThorPositionTo2DFrameTranslator,
opacity: float,
color: Optional[Tuple[int, ...]] = None,
) -> np.ndarray:
if p0 == p1:
return frame
if color is None:
color = (255, 0, 0)
img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA
opacity = int(round(255 * opacity)) # Define transparency for the triangle.
draw = ImageDraw.Draw(img2)
draw.line(
tuple(reversed(pos_translator(p0))) + tuple(reversed(pos_translator(p1))),
fill=color + (opacity,),
width=int(frame.shape[0] / 100),
)
img = Image.alpha_composite(img1, img2)
return np.array(img.convert("RGB"))
@staticmethod
def add_agent_view_triangle(
position: Any,
rotation: Dict[str, float],
frame: np.ndarray,
pos_translator: ThorPositionTo2DFrameTranslator,
scale: float = 1.0,
opacity: float = 0.1,
) -> np.ndarray:
p0 = np.array((position[0], position[2]))
p1 = copy.copy(p0)
p2 = copy.copy(p0)
theta = -2 * math.pi * (rotation["y"] / 360.0)
rotation_mat = np.array(
[[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]]
)
offset1 = scale * np.array([-1 / 2.0, 1])
offset2 = scale * np.array([1 / 2.0, 1])
p1 += np.matmul(rotation_mat, offset1)
p2 += np.matmul(rotation_mat, offset2)
img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA
opacity = int(round(255 * opacity)) # Define transparency for the triangle.
points = [tuple(reversed(pos_translator(p))) for p in [p0, p1, p2]]
draw = ImageDraw.Draw(img2)
draw.polygon(points, fill=(255, 255, 255, opacity))
img = Image.alpha_composite(img1, img2)
return np.array(img.convert("RGB"))
@staticmethod
def visualize_agent_path(
positions: Sequence[Any],
frame: np.ndarray,
pos_translator: ThorPositionTo2DFrameTranslator,
single_color: bool = False,
view_triangle_only_on_last: bool = False,
disable_view_triangle: bool = False,
line_opacity: float = 1.0,
trajectory_start_end_color_str: Tuple[str, str] = ("red", "green"),
) -> np.ndarray:
if single_color:
frame = ThorViz.add_lines_to_map(
list(map(ThorViz.position_to_tuple, positions)),
frame,
pos_translator,
line_opacity,
tuple(
map(
lambda x: int(round(255 * x)),
col.Color(trajectory_start_end_color_str[0]).rgb,
)
),
)
else:
if len(positions) > 1:
colors = list(
col.Color(trajectory_start_end_color_str[0]).range_to(
col.Color(trajectory_start_end_color_str[1]), len(positions) - 1
)
)
for i in range(len(positions) - 1):
frame = ThorViz.add_line_to_map(
ThorViz.position_to_tuple(positions[i]),
ThorViz.position_to_tuple(positions[i + 1]),
frame,
pos_translator,
opacity=line_opacity,
color=tuple(map(lambda x: int(round(255 * x)), colors[i].rgb)),
)
if view_triangle_only_on_last:
positions = [positions[-1]]
if disable_view_triangle:
positions = []
for position in positions:
frame = ThorViz.add_agent_view_triangle(
ThorViz.position_to_tuple(position),
rotation=position["rotation"],
frame=frame,
pos_translator=pos_translator,
opacity=0.05 + view_triangle_only_on_last * 0.2,
)
return frame
def dump_top_down_view(self, room_name: str, image_path: str):
get_logger().debug("Dumping {}".format(image_path))
self.controller.reset(room_name)
self.controller.step(
{"action": "Initialize", "gridSize": 0.1, "makeAgentsVisible": False}
)
self.controller.step({"action": "ToggleMapView"})
top_down_view = self.controller.last_event.cv2img
cv2.imwrite(image_path, top_down_view)
def make_fig(self, episode: Any, episode_id: str) -> Figure:
trajectory: Sequence[Dict[str, Any]] = self._access(
episode, self.path_to_trajectory
)
if self.thor_top_downs is None:
self.init_top_down_render()
roomname = "_".join(episode_id.split("_")[:3])
im = self.visualize_agent_path(
trajectory,
self.thor_top_downs[roomname],
self.map_data["pos_translator"],
single_color=self.single_color,
view_triangle_only_on_last=self.view_triangle_only_on_last,
disable_view_triangle=self.disable_view_triangle,
line_opacity=self.line_opacity,
)
fig, ax = plt.subplots(figsize=self.figsize)
ax.set_title(episode_id, fontsize=self.fontsize)
ax.imshow(self.crop_viz_image(im)[:, :, ::-1])
ax.axis("off")
return fig
class ThorMultiViz(ThorViz):
def __init__(
self,
path_to_trajectory_prefix: Sequence[str] = ("task_info", "followed_path"),
agent_suffixes: Sequence[str] = ("1", "2"),
label: str = "thor_trajectories",
trajectory_start_end_color_strs: Sequence[Tuple[str, str]] = (
("red", "green"),
("cyan", "purple"),
),
**kwargs
):
super().__init__(label=label, **kwargs)
self.path_to_trajectory_prefix = list(path_to_trajectory_prefix)
self.agent_suffixes = list(agent_suffixes)
self.trajectory_start_end_color_strs = list(trajectory_start_end_color_strs)
def make_fig(self, episode: Any, episode_id: str) -> Figure:
if self.thor_top_downs is None:
self.init_top_down_render()
roomname = "_".join(episode_id.split("_")[:3])
im = self.thor_top_downs[roomname]
for agent, start_end_color in zip(
self.agent_suffixes, self.trajectory_start_end_color_strs
):
path = self.path_to_trajectory_prefix[:]
path[-1] = path[-1] + agent
trajectory = self._access(episode, path)
im = self.visualize_agent_path(
trajectory,
im,
self.map_data["pos_translator"],
single_color=self.single_color,
view_triangle_only_on_last=self.view_triangle_only_on_last,
disable_view_triangle=self.disable_view_triangle,
line_opacity=self.line_opacity,
trajectory_start_end_color_str=start_end_color,
)
fig, ax = plt.subplots(figsize=self.figsize)
ax.set_title(episode_id, fontsize=self.fontsize)
ax.imshow(self.crop_viz_image(im)[:, :, ::-1])
ax.axis("off")
return fig
================================================
FILE: allenact_plugins/robothor_plugin/scripts/__init__.py
================================================
================================================
FILE: allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.py
================================================
import gzip
import json
import os
from typing import Sequence, Optional
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
ObjectNavDatasetTaskSampler,
)
def create_debug_dataset_from_train_dataset(
scene: str,
target_object_type: Optional[str],
episodes_subset: Sequence[int],
train_dataset_path: str,
base_debug_output_path: str,
):
downloaded_episodes = os.path.join(
train_dataset_path, "episodes", scene + ".json.gz"
)
assert os.path.exists(downloaded_episodes), (
"'{}' doesn't seem to exist or is empty. Make sure you've downloaded to download the appropriate"
" training dataset with"
" datasets/download_navigation_datasets.sh".format(downloaded_episodes)
)
# episodes
episodes = ObjectNavDatasetTaskSampler.load_dataset(
scene=scene, base_directory=os.path.join(train_dataset_path, "episodes")
)
if target_object_type is not None:
ids = {
"{}_{}_{}".format(scene, target_object_type, epit)
for epit in episodes_subset
}
else:
ids = {"{}_{}".format(scene, epit) for epit in episodes_subset}
debug_episodes = [ep for ep in episodes if ep["id"] in ids]
assert len(ids) == len(debug_episodes), (
f"Number of input ids ({len(ids)}) does not equal"
f" number of output debug tasks ({len(debug_episodes)})"
)
# sort by episode_ids
debug_episodes = [
idep[1]
for idep in sorted(
[(int(ep["id"].split("_")[-1]), ep) for ep in debug_episodes],
key=lambda x: x[0],
)
]
assert len(debug_episodes) == len(episodes_subset)
episodes_dir = os.path.join(base_debug_output_path, "episodes")
os.makedirs(episodes_dir, exist_ok=True)
episodes_file = os.path.join(episodes_dir, scene + ".json.gz")
json_str = json.dumps(debug_episodes)
json_bytes = json_str.encode("utf-8")
with gzip.GzipFile(episodes_file, "w") as fout:
fout.write(json_bytes)
assert os.path.exists(episodes_file)
if __name__ == "__main__":
CURRENT_PATH = os.getcwd()
SCENE = "FloorPlan_Train1_1"
TARGET = "Television"
EPISODES = [0, 7, 11, 12]
BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "robothor-objectnav", "debug")
create_debug_dataset_from_train_dataset(
scene=SCENE,
target_object_type=TARGET,
episodes_subset=EPISODES,
train_dataset_path=os.path.join(
CURRENT_PATH, "datasets", "robothor-objectnav", "train"
),
base_debug_output_path=BASE_OUT,
)
================================================
FILE: allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.py
================================================
import os
from allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import (
create_debug_dataset_from_train_dataset,
)
if __name__ == "__main__":
CURRENT_PATH = os.getcwd()
SCENE = "FloorPlan_Train1_1"
EPISODES = [3, 4, 5, 6]
BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "robothor-pointnav", "debug")
create_debug_dataset_from_train_dataset(
scene=SCENE,
target_object_type=None,
episodes_subset=EPISODES,
train_dataset_path=os.path.join(
CURRENT_PATH, "datasets", "robothor-pointnav", "train"
),
base_debug_output_path=BASE_OUT,
)
================================================
FILE: allenact_plugins/setup.py
================================================
import glob
import os
from pathlib import Path
from setuptools import find_packages, setup
def parse_req_file(fname, initial=None):
"""Reads requires.txt file generated by setuptools and outputs a
new/updated dict of extras as keys and corresponding lists of dependencies
as values.
The input file's contents are similar to a `ConfigParser` file, e.g.
pkg_1
pkg_2
pkg_3
[extras1]
pkg_4
pkg_5
[extras2]
pkg_6
pkg_7
"""
reqs = {} if initial is None else initial
cline = None
with open(fname, "r") as f:
for line in f.readlines():
line = line[:-1].strip()
if len(line) == 0:
continue
if line[0] == "[":
# Add new key for current extras (if missing in dict)
cline = line[1:-1].strip()
if cline not in reqs:
reqs[cline] = []
else:
# Only keep dependencies from extras
if cline is not None:
reqs[cline].append(line)
return reqs
def get_version(fname):
"""Reads PKG-INFO file generated by setuptools and extracts the Version
number."""
res = "UNK"
with open(fname, "r") as f:
for line in f.readlines():
line = line[:-1]
if line.startswith("Version:"):
res = line.replace("Version:", "").strip()
break
if res in ["UNK", ""]:
raise ValueError(f"Missing Version number in {fname}")
return res
def run_setup():
base_dir = os.path.abspath(os.path.dirname(Path(__file__)))
if not os.path.exists(
os.path.join(base_dir, "allenact_plugins.egg-info/dependency_links.txt")
):
# Build mode for sdist
# Extra dependencies required for various plugins
extras = {}
for plugin_path in glob.glob(os.path.join(base_dir, "*_plugin")):
plugin_name = os.path.basename(plugin_path).replace("_plugin", "")
extra_reqs_path = os.path.join(plugin_path, "extra_requirements.txt")
if os.path.exists(extra_reqs_path):
with open(extra_reqs_path, "r") as f:
# Filter out non-PyPI dependencies
extras[plugin_name] = [
clean_dep
for clean_dep in (dep.strip() for dep in f.readlines())
if clean_dep != ""
and not clean_dep.startswith("#")
and "@ git+https://github.com/" not in clean_dep
]
extras["all"] = sum(extras.values(), [])
os.chdir(os.path.join(base_dir, ".."))
with open(".VERSION", "r") as f:
__version__ = f.readline().strip()
else:
# Install mode from sdist
__version__ = get_version(
os.path.join(base_dir, "allenact_plugins.egg-info/PKG-INFO")
)
extras = parse_req_file(
os.path.join(base_dir, "allenact_plugins.egg-info/requires.txt")
)
setup(
name="allenact_plugins",
version=__version__,
description="Plugins for the AllenAct framework",
long_description=(
"A collection of plugins/extensions for use within the AllenAct framework."
),
classifiers=[
"Intended Audience :: Science/Research",
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: MIT License",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Programming Language :: Python",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
],
keywords=["reinforcement learning", "embodied-AI", "AI", "RL", "SLAM"],
url="https://github.com/allenai/allenact",
author="Allen Institute for Artificial Intelligence",
author_email="lucaw@allenai.org",
license="MIT",
packages=find_packages(include=["allenact_plugins", "allenact_plugins.*"]),
install_requires=[f"allenact=={__version__}"],
setup_requires=["pytest-runner"],
tests_require=["pytest", "pytest-cov"],
extras_require=extras,
)
if __name__ == "__main__":
run_setup()
================================================
FILE: conda/environment-10.1.yml
================================================
channels:
- defaults
- pytorch
dependencies:
- cudatoolkit=10.1
- pytorch>=1.6.0,!=1.8.0
- torchvision>=0.7.0,<0.10.0
================================================
FILE: conda/environment-10.2.yml
================================================
channels:
- defaults
- pytorch
dependencies:
- cudatoolkit=10.2
- pytorch>=1.6.0,!=1.8.0
- torchvision>=0.7.0,<0.10.0
================================================
FILE: conda/environment-11.1.yml
================================================
channels:
- defaults
- pytorch
- nvidia
dependencies:
- cudatoolkit=11.1
- pytorch>=1.6.0,!=1.8.0
- torchvision>=0.7.0
================================================
FILE: conda/environment-9.2.yml
================================================
channels:
- defaults
- pytorch
dependencies:
- cudatoolkit=9.2
- pytorch>=1.6.0,!=1.8.0
- torchvision>=0.7.0,<0.10.0
================================================
FILE: conda/environment-base.yml
================================================
channels:
- defaults
- pytorch
- conda-forge
dependencies:
- python=3.8
- certifi
- chardet=4.0.0
- cloudpickle=1.6.0
- cycler=0.10.0
- decorator=4.4.2
- filelock=3.0.12
- future=0.18.2
- gym>=0.17.0,<0.20.0
- idna>=2.10
- imageio>=2.9.0
- imageio-ffmpeg>=0.4.3
- kiwisolver=1.3.1
- matplotlib>=3.3.1
- networkx
- numpy>=1.19.1
- opencv
- conda-forge::pillow>=8.2.0,<9.0.0
- pip
- proglog>=0.1.9
- protobuf>=3.14.0
- pyglet>=1.5.0
- pyparsing>=2.4.7
- python-dateutil>=2.8.1
- pytorch::pytorch>=1.6.0,!=1.8.0
- pytorch::torchvision>=0.7.0
- requests>=2.25.1
- setproctitle
- six>=1.15.0
- tensorboardx>=2.1
- tqdm
- urllib3>=1.26.2
- attrs
- pip:
- moviepy>=1.0.3
- scipy>=1.5.4
- compress-pickle>=1.2.0
================================================
FILE: conda/environment-cpu.yml
================================================
channels:
- defaults
- pytorch
dependencies:
- cpuonly
- pytorch>=1.6.0,!=1.8.0
- torchvision>=0.7.0,<0.10.0
================================================
FILE: conda/environment-dev.yml
================================================
channels:
- defaults
- conda-forge
dependencies:
- black>=24.2.0
- docformatter>=1.3.1
- gitpython
- markdown>=3.3
- mkdocs>=1.1.2
- mkdocs-material>=5.5.3
- mkdocs-material-extensions>=1.0
- mypy
- pre-commit
- pytest>=6.1.1
- ruamel.yaml
- pip:
- pydoc-markdown>=3.4.0
================================================
FILE: constants.py
================================================
import os
from pathlib import Path
ABS_PATH_OF_TOP_LEVEL_DIR = os.path.abspath(os.path.dirname(Path(__file__)))
ABS_PATH_OF_DOCS_DIR = os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "docs")
================================================
FILE: datasets/.gitignore
================================================
*
!.gitignore
!*.sh
!.habitat_datasets_download_info.json
!.habitat_downloader_helper.py
!habitat/configs/debug_habitat_pointnav.yaml
================================================
FILE: datasets/.habitat_datasets_download_info.json
================================================
{
"pointnav-gibson-v1": {
"link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v1/pointnav_gibson_v1.zip",
"rel_path": "data/datasets/pointnav/gibson/v1/",
"config_url": "configs/datasets/imagenav/gibson.yaml"
},
"pointnav-gibson-v2": {
"link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v2/pointnav_gibson_v2.zip",
"rel_path": "data/datasets/pointnav/gibson/v2/",
"config_url": "configs/datasets/pointnav/gibson.yaml"
},
"pointnav-mp3d-v1": {
"link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/mp3d/v1/pointnav_mp3d_v1.zip",
"rel_path": "data/datasets/pointnav/mp3d/v1/",
"config_url": "configs/datasets/imagenav/mp3d.yaml"
},
"objectnav-mp3d-v1": {
"link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/objectnav/m3d/v1/objectnav_mp3d_v1.zip",
"rel_path": "data/datasets/objectnav/mp3d/v1/",
"config_url": "configs/datasets/objectnav/mp3d.yaml"
},
"eqa-mp3d-v1": {
"link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/eqa/mp3d/v1/eqa_mp3d_v1.zip",
"rel_path": "data/datasets/eqa/mp3d/v1/",
"config_url": "configs/datasets/eqa/mp3d.yaml"
},
"vln-r2r-mp3d-v1": {
"link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/vln/mp3d/r2r/v1/vln_r2r_mp3d_v1.zip",
"rel_path": "data/datasets/vln/mp3d/r2r/v1",
"config_url": "configs/datasets/vln/mp3d_r2r.yaml"
}
}
================================================
FILE: datasets/.habitat_downloader_helper.py
================================================
import json
import os
import re
import shutil
import sys
from pathlib import Path
from urllib.request import urlopen
from allenact.utils.misc_utils import all_equal
DATASET_DIR = os.path.abspath(os.path.dirname(Path(__file__)))
def get_habitat_download_info(allow_create: bool = False):
"""Get a dictionary giving a specification of where habitat data lives
online.
# Parameters
allow_create: Whether or not we should try to regenerate the json file that represents
the above dictionary. This is potentially unsafe so please only set this to `True`
if you're sure it will download what you want.
"""
json_save_path = os.path.join(DATASET_DIR, ".habitat_datasets_download_info.json")
if allow_create and not os.path.exists(json_save_path):
url = "https://raw.githubusercontent.com/facebookresearch/habitat-lab/master/README.md"
output = urlopen(url).read().decode("utf-8")
lines = [l.strip() for l in output.split("\n")]
task_table_started = False
table_lines = []
for l in lines:
if l.count("|") > 3 and l[0] == l[-1] == "|":
if task_table_started:
table_lines.append(l)
elif "Task" in l and "Link" in l:
task_table_started = True
table_lines.append(l)
elif task_table_started:
break
url_pat = re.compile("\[.*\]\((.*)\)")
def get_url(in_str: str):
match = re.match(pattern=url_pat, string=in_str)
if match:
return match.group(1)
else:
return in_str
header = None
rows = []
for i, l in enumerate(table_lines):
l = l.strip("|")
entries = [get_url(e.strip().replace("`", "")) for e in l.split("|")]
if i == 0:
header = [e.lower().replace(" ", "_") for e in entries]
elif not all_equal(entries):
rows.append(entries)
link_ind = header.index("link")
extract_ind = header.index("extract_path")
config_ind = header.index("config_to_use")
assert link_ind >= 0
data_info = {}
for row in rows:
id = row[link_ind].split("/")[-1].replace(".zip", "").replace("_", "-")
data_info[id] = {
"link": row[link_ind],
"rel_path": row[extract_ind],
"config_url": row[config_ind],
}
with open(json_save_path, "w") as f:
json.dump(data_info, f)
with open(json_save_path, "r") as f:
return json.load(f)
if __name__ == "__main__":
habitat_dir = os.path.join(DATASET_DIR, "habitat")
os.makedirs(habitat_dir, exist_ok=True)
os.chdir(habitat_dir)
download_info = get_habitat_download_info(allow_create=False)
if len(sys.argv) != 2 or sys.argv[1] not in download_info:
print(
"Incorrect input, expects a single input where this input is one of "
f" {['test-scenes', *sorted(download_info.keys())]}."
)
quit(1)
task_key = sys.argv[1]
task_dl_info = download_info[task_key]
output_archive_name = "__TO_OVERWRITE__.zip"
deletable_dir_name = "__TO_DELETE__"
cmd = f"wget {task_dl_info['link']} -O {output_archive_name}"
if os.system(cmd):
print(f"ERROR: `{cmd}` failed.")
quit(1)
cmd = f"unzip {output_archive_name} -d {deletable_dir_name}"
if os.system(cmd):
print(f"ERROR: `{cmd}` failed.")
quit(1)
download_to_path = task_dl_info["rel_path"].replace("data/", "")
if download_to_path[-1] == "/":
download_to_path = download_to_path[:-1]
os.makedirs(download_to_path, exist_ok=True)
cmd = f"rsync -avz {deletable_dir_name}/ {download_to_path}/"
if os.system(cmd):
print(f"ERROR: `{cmd}` failed.")
quit(1)
os.remove(output_archive_name)
shutil.rmtree(deletable_dir_name)
================================================
FILE: datasets/download_habitat_datasets.sh
================================================
#!/bin/bash
# Move to the directory containing this file
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" || exit
mkdir -p habitat
mkdir -p habitat/scene_datasets
mkdir -p habitat/datasets
mkdir -p habitat/configs
cd habitat || exit
output_archive_name=__TO_OVERWRITE__.zip
deletable_dir_name=__TO_DELETE__
install_test_scenes_and_data() {
if ! wget http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip -O $output_archive_name; then
echo "Could not unzip download test scenes from http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip"
exit 1
fi
if ! unzip $output_archive_name -d $deletable_dir_name; then
echo "Could not unzip $output_archive_name to $deletable_dir_name"
exit 1
fi
rsync -avz $deletable_dir_name/data/datasets . && \
rsync -avz $deletable_dir_name/data/scene_datasets . && \
rm $output_archive_name && \
rm -r $deletable_dir_name
}
install_scene_data() {
python3 ../.habitat_downloader_helper.py "$1"
}
if [ "$1" = "test-scenes" ]
then
install_test_scenes_and_data
else
install_scene_data $1
fi
================================================
FILE: datasets/download_navigation_datasets.sh
================================================
#!/bin/bash
# Move to the directory containing this file
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" || exit
install_dataset() {
dataset_name="$1"
download_suffix="$2"
if ! mkdir "$dataset_name" ; then
echo "Could not create directory " $(pwd)/$dataset_name "Does it already exist? If so, delete it."
exit 1
fi
url_archive_name=$dataset_name$download_suffix.tar.gz
output_archive_name=__TO_OVERWRITE__.tar.gz
wget https://prior-datasets.s3.us-east-2.amazonaws.com/embodied-ai/navigation/$url_archive_name -O $output_archive_name
tar -xf "$output_archive_name" -C "$dataset_name" --strip-components=1 && rm $output_archive_name
echo "saved folder: "$dataset_name""
}
# Download, Unzip, and Remove zip
if [ "$1" = "robothor-pointnav" ]
then
echo "Downloading RoboTHOR PointNav Dataset ..."
install_dataset "$1" "-v0"
cd ..
echo "Generating RoboTHOR PointNav Debug Dataset ..."
PYTHONPATH=. python ./allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.py
elif [ "$1" = "robothor-objectnav" ]
then
echo "Downloading RoboTHOR ObjectNav Dataset ..."
install_dataset "$1" "-challenge-2021"
cd ..
echo "Generating RoboTHOR ObjectNav Debug Dataset ..."
PYTHONPATH=. python ./allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.py
elif [ "$1" = "ithor-pointnav" ]
then
echo "Downloading iTHOR PointNav Dataset ..."
install_dataset "$1" "-v0"
cd ..
echo "Generating iTHOR PointNav Debug Dataset ..."
PYTHONPATH=. python ./allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.py
elif [ "$1" = "ithor-objectnav" ]
then
echo "Downloading iTHOR ObjectNav Dataset ..."
install_dataset "$1" "-v0"
cd ..
echo "Generating iTHOR ObjectNav Debug Dataset ..."
PYTHONPATH=. python ./allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.py
elif [ "$1" = "all-thor" ]
then
bash download_navigation_datasets.sh "robothor-pointnav"
bash download_navigation_datasets.sh "robothor-objectnav"
bash download_navigation_datasets.sh "ithor-pointnav"
bash download_navigation_datasets.sh "ithor-objectnav"
else
echo "\nFailed: Usage download_navigation_datasets.sh robothor-pointnav | robothor-objectnav | ithor-pointnav | ithor-objectnav | all-thor"
exit 1
fi
================================================
FILE: dev_requirements.txt
================================================
black==24.2.0
ruamel.yaml
gitpython
markdown==3.3
mypy
pytest
docformatter==1.3.1
docstr-coverage==1.2.0
mkdocs==1.1.2
mkdocs-material==5.5.3
pre-commit
pydoc-markdown==3.4.0
compress-pickle==1.2.0
================================================
FILE: docs/.gitignore
================================================
api/
================================================
FILE: docs/CNAME
================================================
allenact.org
================================================
FILE: docs/FAQ.md
================================================
# FAQ
## How do I file a bug regarding the code or documentation?
Please file bugs by submitting an [issue](https://github.com/allenai/allenact/issues). We also welcome contributions from the community, including new features and bugfixes on existing functionality. Please refer to our [contribution guidelines](CONTRIBUTING.md).
## How do I generate documentation?
Documentation is generated using [mkdoc](https://www.mkdocs.org/) and
[pydoc-markdown](https://pypi.org/project/pydoc-markdown/).
### Building documentation locally
The `mkdocs` command used to build our documentation relies on all documentation existing
as subdirectories of the `docs` folder. To ensure that all relevant markdown files are placed into
this directory, you should always run
```bash
bash scripts/build_docs.sh
```
from the top-level project directory before running any of the `mkdocs` commands below.
If you have made no changes to the documentation and only wish to build documentation on
your local machine, run the following from within the `allenact` root directory. Note: This will generate HTML documentation within the `site` folder
```bash
mkdocs build
```
### Serving documentation locally
If you have made no changes to the documentation and only wish to serve documentation on your local
machine (with live reloading of modified documentation), run the following from within the `allenact` root directory.
```bash
mkdocs serve
```
Then navigate to [http://127.0.0.1:8000/](http://127.0.0.1:8000/)
### Modifying and serving documentation locally
If you have made changes to the documentation, you will need to run a documentation builder script
before you serve it on your local machine.
```bash
bash scripts/build_docs.sh
mkdocs serve
```
Then navigate to [http://127.0.0.1:8000/](http://127.0.0.1:8000/)
Alternatively, the `site` directory (once built) can be served as a static webpage on your local machine
without installing any dependencies by running `python -m http.server 8000` from within the `site` directory.
================================================
FILE: docs/css/extra.css
================================================
/* Allow word-breaks in headers */
h1 {
word-wrap: break-word;
}
/* Don't have the edit button as it's broken for us */
.md-content__button {
display: none;
}
================================================
FILE: docs/getting_started/abstractions.md
================================================
# Primary abstractions
Our package relies on a collection of fundamental abstractions to define how, and in what task, an agent should be
trained and evaluated. A subset of these abstractions are described in plain language below. Each of the below sections
end with a link to the (formal) documentation of the abstraction as well as a link to an example implementation of the
abstraction (if relevant). The following provides a high-level illustration of how these abstractions interact.

## Experiment configuration
In `allenact`, experiments are defined by implementing the abstract `ExperimentConfig` class. The methods
of this implementation are then called during training/inference to properly set up the desired experiment. For example,
the `ExperimentConfig.create_model` method will be called at the beginning of training to create the model
to be trained.
See either the ["designing your first minigrid experiment"](/tutorials/minigrid-tutorial) or the
["designing an experiment for point navigation"](/tutorials/training-a-pointnav-model)
tutorials to get an in-depth description of how these experiment configurations are defined in practice.
See also the [abstract `ExperimentConfig` class](/api/allenact/base_abstractions/experiment_config#experimentconfig)
and an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_environment/#ithorenvironment).
## Task sampler
A task sampler is responsible for generating a sequence of tasks for agents to solve. The sequence of tasks can be
randomly generated (e.g. in training) or extracted from an ordered pool (e.g. in validation or testing).
See the [abstract `TaskSampler` class](/api/allenact/base_abstractions/task/#tasksampler)
and an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_task_samplers/#objectnavtasksampler).
## Task
Tasks define the scope of the interaction between agents and an environment (including the action types agents are
allowed to execute), as well as metrics to evaluate the agents' performance. For example, we might define a task
`ObjectNaviThorGridTask` in which agents receive observations obtained from the environment (e.g. RGB images) or directly from
the task (e.g. a target object class) and are allowed to execute actions such as `MoveAhead`, `RotateRight`,
`RotateLeft`, and `End` whenever agents determine they have reached their target. The metrics might include a
success indicator or some quantitative metric on the optimality of the followed path.
See the [abstract `Task` class](/api/allenact/base_abstractions/task/#task)
and an [example implementation](/api/allenact_plugins/robothor_plugin/robothor_tasks/#objectnavtask).
## Sensor
Sensors provide observations extracted from an environment (e.g. RGB or depth images) or directly from a task (e.g. the
end point in point navigation or target object class in semantic navigation) that can be directly consumed by
agents.
See the [abstract `Sensor` class](/api/allenact/base_abstractions/sensor/#sensor)
and an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_sensors/#rgbsensorthor).
## Actor critic model
The actor-critic agent is responsible for computing batched action probabilities and state values given the
observations provided by sensors, internal state representations, previous actions, and potentially
other inputs.
See the [abstract `ActorCriticModel` class](/api/allenact/algorithms/onpolicy_sync/policy/#ActorCriticModel)
and an
[example implementation](/api/projects/objectnav_baselines/models/object_nav_models#ObjectNavBaselineActorCritic).
## Training pipeline
The training pipeline, defined in the
[`ExperimentConfig`'s `training_pipeline` method](/api/allenact/base_abstractions/experiment_config/#training_pipeline),
contains one or more training stages where different
[losses can be combined or sequentially applied](/howtos/defining-a-new-training-pipeline).
## Losses
Actor-critic losses compute a combination of action loss and value loss out of collected experience that can be used to
train actor-critic models with back-propagation, e.g. PPO or A2C.
See the
[`AbstractActorCriticLoss` class](/api/allenact/algorithms/onpolicy_sync/losses/abstract_loss#abstractactorcriticloss)
and an [example implementation](/api/allenact/algorithms/onpolicy_sync/losses/ppo/#ppo).
Off-policy losses implement generic training iterations in which a batch of data is run through a model (that can be a
subgraph of an [`ActorCriticModel`](#actor-critic-model)) and a loss is
computed on the model's output.
See the
[`AbstractOffPolicyLoss` class](/api/allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss#abstractoffpolicyloss)
and an [example implementation](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy/#MiniGridOffPolicyExpertCELoss).
================================================
FILE: docs/getting_started/running-your-first-experiment.md
================================================
# Running your first experiment
Assuming you have [installed the full library](../installation/installation-allenact.md#full-library), you can run your
first experiment by calling
```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o experiment_output/minigrid -s 12345
```
from the `allenact` root directory.
* With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file
will be found in the `projects/tutorials` directory.
* With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers).
* With `-o experiment_output/minigrid` we set the output folder into which results and logs will be saved.
* With `-s 12345` we set the random seed.
If everything was installed correctly, a simple model will be trained (and validated) in the MiniGrid environment and
a new folder `experiment_output/minigrid` will be created containing:
* a `checkpoints/MiniGridTutorial/LOCAL_TIME_STR/` subfolder with model weight checkpoints,
* a `used_configs/MiniGridTutorial/LOCAL_TIME_STR/` subfolder with all used configuration files,
* and a tensorboard log file under `tb/MiniGridTutorial/LOCAL_TIME_STR/`.
Here `LOCAL_TIME_STR` is a string that records the time when the experiment was started (e.g. the string
`"2020-08-21_18-19-47"` corresponds to an experiment started on August 21st 2020, 47 seconds past 6:19pm.
If we have Tensorboard installed, we can track training progress with
```bash
tensorboard --logdir experiment_output/minigrid/tb
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).
After 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder.
The training curves should look similar to:

If everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4.
(For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the
not-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example
with a different random seed). The validation curves should look similar to:

A detailed tutorial describing how the `minigrid_tutorial` experiment configuration was created can be found
[here](../tutorials/minigrid-tutorial.md).
To run your own custom experiment simply define a new experiment configuration in a file
`projects/YOUR_PROJECT_NAME/experiments/my_custom_experiment.py` after which you may run it with
`PYTHONPATH=. python allenact/main.py my_custom_experiment -b projects/YOUR_PROJECT_NAME/experiments`.
================================================
FILE: docs/getting_started/structure.md
================================================
# Structure of the codebase
The codebase consists of the following directories: `allenact`, `datasets`, `docs`, `overrides`, `allenact_plugins`,
`pretrained_model_ckpts`, `projects`, `scripts`, and `tests`. Below, we explain the overall structure and how
different components of the codebase are organized.
## [`allenact` directory](https://github.com/allenai/allenact/tree/master/allenact)
Contains runtime algorithms for on-policy and off-policy training and inference, base abstractions used throughout
the code base and basic models to be used as building blocks in future models.
* `allenact.algorithms` includes on-policy and off-policy training nd inference algorithms and abstractions for losses,
policies, rollout storage, etc.
* `allenact.base_abstractions` includes the base `ExperimentConfig`, distributions, base `Sensor`, `TaskSampler`, `Task`,
etc.
* `allenact.embodiedai` includes basic CNN, and RNN state encoders, besides basic `ActorCriticModel` implementations
for embodied AI tasks.
## [`datasets` directory](https://github.com/allenai/allenact/tree/master/datasets)
A directory made to store task-specific datasets. For example, the script `datasets/download_navigation_datasets.sh` can
be used to automatically download task dataset files for Point Navigation within the RoboTHOR environment
and it will place these files into a new `datasets/robothor-pointnav` directory.
## [`docs` directory](https://github.com/allenai/allenact/tree/master/docs)
Contains documentation for the framework, including guides for installation and first experiments, how-to's for
the definition and usage of different abstractions, tutorials and per-project documentation.
## [`overrides` directory](https://github.com/allenai/allenact/tree/master/overrides)
Files within this directory are used to the look and structure of the documentation generated when running `mkdocs`.
See our [FAQ](../FAQ.md) for information on how to generate this documentation for yourself.
## [`allenact_plugins` directory](https://github.com/allenai/allenact/tree/master/allenact_plugins)
Contains implementations of `ActorCriticModel`s and `Task`s in different environments. Each plugin folder is
named as `{environment}_plugin` and contains three subfolders:
1. `configs` to host useful configuration for the environment or tasks.
1. `data` to store data to be consumed by the environment or tasks.
1. `scripts` to setup the plugin or gather and process data.
## [`pretrained_model_ckpts` directory](https://github.com/allenai/allenact/tree/master/pretrained_model_ckpts)
Directory into which pretrained model checkpoints will be saved. See also the
`pretrained_model_ckpts/download_navigation_model_ckpts.sh` which can be used to download such checkpoints.
## [`projects` directory](https://github.com/allenai/allenact/tree/master/projects)
Contains project-specific code like experiment configurations and scripts to process results, generate visualizations
or prepare data.
## [`scripts` directory](https://github.com/allenai/allenact/tree/master/scripts)
Includes framework-wide scripts to build the documentation, format code, run_tests and start an xserver. The latter can
be used for OpenGL-based environments having super-user privileges in Linux, assuming NVIDIA drivers and `xserver-xorg`
are installed.
## [`tests` directory](https://github.com/allenai/allenact/tree/master/tests)
Includes unit tests for `allenact`.
## [`allenact.utils` directory](https://github.com/allenai/allenact/tree/master/allenact/utils)
It includes different types of utilities, mainly divided into:
* `allenact.utils.experiment_utils`, including the `TrainingPipeline`, `PipelineStage` and other utilities to configure an
experiment.
* `allenact.utils.model_utils`, including generic CNN creation, forward-pass helpers and other utilities.
* `allenact.utils.tensor_utils`, including functions to batch observations, convert tensors into video, scale image tensors, etc.
* `allenact.utils.viz_utils`, including a `VizSuite` class that can be instantiated with different visualization plugins during
inference.
* `allenact.utils.system`, including logging and networking helpers.
Other utils files, including `allenact.utils.misc_utils`, contain a number of helper functions for different purposes.
================================================
FILE: docs/howtos/changing-rewards-and-losses.md
================================================
# Changing rewards and losses
In order to train actor-critic agents, we need to specify
* `rewards` at the task level, and
* `losses` at the training pipeline level.
## Rewards
We will use the [object navigation task in `iTHOR`](/api/allenact_plugins/ithor_plugin/ithor_tasks/#objectnavtask) as a
running example. We can see how the `ObjectNaviThorGridTask._step(self, action: int) -> RLStepResult` method computes
the reward for the latest action by invoking a function like:
```python
def judge(self) -> float:
reward = -0.01
if not self.last_action_success:
reward += -0.03
if self._took_end_action:
reward += 1.0 if self._success else -1.0
return float(reward)
```
Any reward shaping can be easily added by e.g. modifying the definition of an existing class:
```python
class NavigationWithShaping(allenact_plugins.ithor_plugin.ithor_tasks.ObjectNaviThorGridTask):
def judge(self) -> float:
reward = super().judge()
if self.previous_state is not None:
reward += float(my_reward_shaping_function(
self.previous_state,
self.current_state,
))
self.previous_state = self.current_state
return reward
```
## Losses
We support [A2C](/api/allenact/algorithms/onpolicy_sync/losses/a2cacktr#a2c),
[PPO](/api/allenact/algorithms/onpolicy_sync/losses/ppo#ppo), and
[imitation](/api/allenact/algorithms/onpolicy_sync/losses/imitation#imitation) losses amongst others. We can easily
include [DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf) or variations thereof by
assuming the availability of an expert providing optimal actions to agents and combining imitation and PPO losses in
different ways through multiple stages:
```python
class MyExperimentConfig(allenact.base_abstractions.experiment_config.ExperimentConfig):
...
@classmethod
def training_pipeline(cls, **kwargs):
dagger_steps = int(3e4)
ppo_steps = int(3e4)
ppo_steps2 = int(1e6)
...
return allenact.utils.experiment_utils.TrainingPipeline(
named_losses={
"imitation_loss": allenact.algorithms.onpolicy_sync.losses.imitation.Imitation(),
"ppo_loss": allenact.algorithms.onpolicy_sync.losses.ppo.PPO(
**allenact.algorithms.onpolicy_sync.losses.ppo.PPOConfig,
),
},
...
pipeline_stages=[
allenact.utils.experiment_utils.PipelineStage(
loss_names=["imitation_loss", "ppo_loss"],
teacher_forcing=allenact.utils.experiment_utils.LinearDecay(
startp=1.0, endp=0.0, steps=dagger_steps,
),
max_stage_steps=dagger_steps,
),
allenact.utils.experiment_utils.PipelineStage(
loss_names=["ppo_loss", "imitation_loss"],
max_stage_steps=ppo_steps
),
allenact.utils.experiment_utils.PipelineStage(
loss_names=["ppo_loss"],
max_stage_steps=ppo_steps2,
),
],
)
```
================================================
FILE: docs/howtos/defining-a-new-model.md
================================================
# Defining a new model
All actor-critic models must implement the interface described by the
[ActorCriticModel class](/api/allenact/algorithms/onpolicy_sync/policy/#actorcriticmodel). This interface includes two methods that need to be
implemented:
* `recurrent_memory_specification`, returning a description of the model's recurrent memory; and
* `forward`, returning an [ActorCriticOutput](/api/allenact/base_abstractions/misc/#actorcriticoutput) given the current observation,
hidden state and previous actions.
For convenience, we provide a [recurrent network module](/api/allenact/embodiedai/models/basic_models/#rnnstateencoder) and
[a simple CNN module](/api/allenact/embodiedai/models/basic_models/#simplecnn) from the Habitat baseline navigation
models, that will be used in this example.
### Actor-critic model interface
As an example, let's build an object navigation agent.
```python
class ObjectNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]):
"""Baseline recurrent actor critic model for object-navigation.
# Attributes
action_space : The space of actions available to the agent. Currently only discrete
actions are allowed (so this space will always be of type `gym.spaces.Discrete`).
observation_space : The observation space expected by the agent. This observation space
should include (optionally) 'rgb' images and 'depth' images and is required to
have a component corresponding to the goal `goal_sensor_uuid`.
goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor`
as an example of such a sensor.
hidden_size : The hidden size of the GRU RNN.
object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal
object type.
"""
def __init__(
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
goal_sensor_uuid: str,
rgb_uuid: Optional[str],
depth_uuid: Optional[str],
hidden_size=512,
object_type_embedding_dim=8,
trainable_masked_hidden_state: bool = False,
num_rnn_layers=1,
rnn_type="GRU",
):
"""Initializer.
See class documentation for parameter definitions.
"""
super().__init__(action_space=action_space, observation_space=observation_space)
self.goal_sensor_uuid = goal_sensor_uuid
self._n_object_types = self.observation_space.spaces[self.goal_sensor_uuid].n
self._hidden_size = hidden_size
self.object_type_embedding_size = object_type_embedding_dim
self.visual_encoder = SimpleCNN(
observation_space=self.observation_space,
output_size=self._hidden_size,
rgb_uuid=rgb_uuid,
depth_uuid=depth_uuid,
)
self.state_encoder = RNNStateEncoder(
(0 if self.is_blind else self._hidden_size) + object_type_embedding_dim,
self._hidden_size,
trainable_masked_hidden_state=trainable_masked_hidden_state,
num_layers=num_rnn_layers,
rnn_type=rnn_type,
)
self.actor = LinearActorHead(self._hidden_size, action_space.n)
self.critic = LinearCriticHead(self._hidden_size)
self.object_type_embedding = nn.Embedding(
num_embeddings=self._n_object_types,
embedding_dim=object_type_embedding_dim,
)
self.train()
@property
def recurrent_hidden_state_size(self) -> int:
"""The recurrent hidden state size of the model."""
return self._hidden_size
@property
def is_blind(self) -> bool:
"""True if the model is blind (e.g. neither 'depth' or 'rgb' is an
input observation type)."""
return self.visual_encoder.is_blind
@property
def num_recurrent_layers(self) -> int:
"""Number of recurrent hidden layers."""
return self.state_encoder.num_recurrent_layers
def _recurrent_memory_specification(self):
return dict(
rnn=(
(
("layer", self.num_recurrent_layers),
("sampler", None),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
)
def get_object_type_encoding(
self, observations: Dict[str, torch.FloatTensor]
) -> torch.FloatTensor:
"""Get the object type encoding from input batched observations."""
# noinspection PyTypeChecker
return self.object_type_embedding( # type:ignore
observations[self.goal_sensor_uuid].to(torch.int64)
)
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
"""Processes input batched observations to produce new actor and critic
values. Processes input batched observations (along with prior hidden
states, previous actions, and masks denoting which recurrent hidden
states should be masked) and returns an `ActorCriticOutput` object
containing the model's policy (distribution over actions) and
evaluation of the current state (value).
# Parameters
observations : Batched input observations.
memory : `Memory` containing the hidden states from initial timepoints.
prev_actions : Tensor of previous actions taken.
masks : Masks applied to hidden states. See `RNNStateEncoder`.
# Returns
Tuple of the `ActorCriticOutput` and recurrent hidden state.
"""
target_encoding = self.get_object_type_encoding(
cast(Dict[str, torch.FloatTensor], observations)
)
x = [target_encoding]
if not self.is_blind:
perception_embed = self.visual_encoder(observations)
x = [perception_embed] + x
x_cat = torch.cat(x, dim=-1) # type: ignore
x_out, rnn_hidden_states = self.state_encoder(
x_cat, memory.tensor("rnn"), masks
)
return (
ActorCriticOutput(
distributions=self.actor(x_out), values=self.critic(x_out), extras={}
),
memory.set_tensor("rnn", rnn_hidden_states),
)
```
================================================
FILE: docs/howtos/defining-a-new-task.md
================================================
# Defining a new task
In order to use new tasks in our experiments, we need to define two classes:
* A [Task](/api/allenact/base_abstractions/task#task), including, among others, a `step` implementation providing a
[RLStepResult](/api/allenact/base_abstractions/misc#rlstepresult), a `metrics` method providing quantitative performance measurements
for agents and, optionally, a `query_expert` method that can be used e.g. with an
[imitation loss](/api/allenact/algorithms/onpolicy_sync/losses/imitation#imitation) during training.
* A [TaskSampler](/api/allenact/base_abstractions/task#tasksampler), that allows instantiating new Tasks for the agents to solve during
training, validation and testing.
## Task
Let's define a semantic navigation task, where agents have to navigate from a starting point in an environment to an
object of a specific class using a minimal amount of steps and deciding when the goal has been reached.
We need to define the methods `action_space`, `render`, `_step`, `reached_terminal_state`, `class_action_names`, `close`,
`metrics`, and `query_expert` from the base `Task` definition.
### Initialization, action space and termination
Let's start with the definition of the action space and task initialization:
```python
...
from allenact_plugins.ithor_plugin.ithor_constants import (
MOVE_AHEAD,
ROTATE_LEFT,
ROTATE_RIGHT,
LOOK_DOWN,
LOOK_UP,
END,
)
...
class ObjectNaviThorGridTask(Task[IThorEnvironment]):
_actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END)
def __init__(
self,
env: IThorEnvironment,
sensors: List[Sensor],
task_info: Dict[str, Any],
max_steps: int,
**kwargs
) -> None:
super().__init__(
env=env,
sensors=sensors,
task_info=task_info,
max_steps=max_steps, **kwargs
)
self._took_end_action: bool = False
self._success: Optional[bool] = False
@property
def action_space(self):
return gym.spaces.Discrete(len(self._actions))
@classmethod
def class_action_names(cls) -> Tuple[str, ...]:
return cls._actions
def reached_terminal_state(self) -> bool:
return self._took_end_action
def close(self) -> None:
self.env.stop()
...
```
### Step method
Next, we define the main method `_step` that will be called every time the agent produces a new action:
```python
class ObjectNaviThorGridTask(Task[IThorEnvironment]):
...
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
assert isinstance(action, int)
action = cast(int, action)
action_str = self.class_action_names()[action]
if action_str == END:
self._took_end_action = True
self._success = self.is_goal_object_visible()
self.last_action_success = self._success
else:
self.env.step({"action": action_str})
self.last_action_success = self.env.last_action_success
step_result = RLStepResult(
observation=self.get_observations(),
reward=self.judge(),
done=self.is_done(),
info={"last_action_success": self.last_action_success},
)
return step_result
...
def is_goal_object_visible(self) -> bool:
return any(
o["objectType"] == self.task_info["object_type"]
for o in self.env.visible_objects()
)
def judge(self) -> float:
reward = -0.01
if not self.last_action_success:
reward += -0.03
if self._took_end_action:
reward += 1.0 if self._success else -1.0
return float(reward)
```
### Metrics, rendering and expert actions
Finally, we define methods to render and evaluate the current task, and optionally generate expert actions to be used
e.g. for DAgger training.
```python
def render(self, mode: str = "rgb", *args, **kwargs) -> numpy.ndarray:
assert mode == "rgb", "only rgb rendering is implemented"
return self.env.current_frame
def metrics(self) -> Dict[str, Any]:
if not self.is_done():
return {}
else:
return {"success": self._success, "ep_length": self.num_steps_taken()}
def query_expert(self, **kwargs) -> Tuple[int, bool]:
return my_objnav_expert_implementation(self)
```
## TaskSampler
We also need to define the corresponding TaskSampler, which must contain implementations for methods `__len__`,
`total_unique`, `last_sampled_task`, `next_task`, `close`, `reset`, and `set_seed`. Currently,
an additional method `all_observation_spaces_equal` is used to ensure compatibility with the current
[RolloutBlockStorage](/api/allenact/algorithms/onpolicy_sync/storage#rolloutblockstorage).
Let's define a tasks sampler able to provide an infinite number of object navigation tasks for AI2-THOR.
### Initialization and termination
```python
class ObjectNavTaskSampler(TaskSampler):
def __init__(
self,
scenes: List[str],
object_types: str,
sensors: List[Sensor],
max_steps: int,
env_args: Dict[str, Any],
action_space: gym.Space,
seed: Optional[int] = None,
deterministic_cudnn: bool = False,
*args,
**kwargs
) -> None:
self.env_args = env_args
self.scenes = scenes
self.object_types = object_types
self.grid_size = 0.25
self.env: Optional[IThorEnvironment] = None
self.sensors = sensors
self.max_steps = max_steps
self._action_sapce = action_space
self.scene_id: Optional[int] = None
self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None
set_seed(seed)
self.reset()
def close(self) -> None:
if self.env is not None:
self.env.stop()
def reset(self):
self.scene_id = 0
def _create_environment(self) -> IThorEnvironment:
env = IThorEnvironment(
make_agents_visible=False,
object_open_speed=0.05,
restrict_to_initially_reachable_points=True,
**self.env_args,
)
return env
```
### Task sampling
Finally, we need to define methods to determine the number of available tasks (possibly infinite) and sample tasks:
```python
@property
def length(self) -> Union[int, float]:
return float("inf")
@property
def total_unique(self) -> Optional[Union[int, float]]:
return None
@property
def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]:
return self._last_sampled_task
@property
def all_observation_spaces_equal(self) -> bool:
return True
def next_task(self) -> Optional[ObjectNaviThorGridTask]:
self.scene_id = random.randint(0, len(self.scenes) - 1)
self.scene = self.scenes[self.scene_id]
if self.env is not None:
if scene != self.env.scene_name:
self.env.reset(scene)
else:
self.env = self._create_environment()
self.env.reset(scene_name=scene)
self.env.randomize_agent_location()
task_info = {"object_type": random.sample(self.object_types, 1)}
self._last_sampled_task = ObjectNaviThorGridTask(
env=self.env,
sensors=self.sensors,
task_info=task_info,
max_steps=self.max_steps,
action_space=self._action_sapce,
)
return self._last_sampled_task
```
================================================
FILE: docs/howtos/defining-a-new-training-pipeline.md
================================================
# Defining a new training pipeline
Defining a new training pipeline, or even new learning algorithms, is straightforward with the modular design in
`AllenAct`.
A convenience [Builder](/api/allenact/utils/experiment_utils#builder) object allows us to defer the instantiation
of objects of the class passed as their first argument while allowing passing additional keyword arguments to their
initializers.
## On-policy
We can implement a training pipeline which trains with a single stage using PPO:
```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
...
@classmethod
def training_pipeline(cls, **kwargs):
ppo_steps = int(1e6)
lr = 2.5e-4
num_mini_batch = 2 if not torch.cuda.is_available() else 6
update_repeats = 4
num_steps = 128
metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks
save_interval = 10000
gamma = 0.99
use_gae = True
gae_lambda = 1.0
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={
"ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
...
```
Alternatively, we could use a more complex pipeline that includes dataset aggregation
([DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf)). This requires the existence of an
expert (implemented in the task definition) that provides optimal actions to agents. We have implemented such a
pipeline by extending the above configuration as follows:
```python
class ObjectNavThorDaggerThenPPOExperimentConfig(ExperimentConfig):
...
SENSORS = [
...
ExpertActionSensor(nactions=6), # Notice that we have added
# an expert action sensor.
]
...
@classmethod
def training_pipeline(cls, **kwargs):
dagger_steps = int(1e4) # Much smaller number of steps as we're using imitation learning
ppo_steps = int(1e6)
lr = 2.5e-4
num_mini_batch = 1 if not torch.cuda.is_available() else 6
update_repeats = 4
num_steps = 128
metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks
save_interval = 10000
gamma = 0.99
use_gae = True
gae_lambda = 1.0
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={
"ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
"imitation_loss": Imitation(), # We add an imitation loss.
},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[ # The pipeline now has two stages, in the first
# we use DAgger (imitation loss + teacher forcing).
# In the second stage we no longer use teacher
# forcing and add in the ppo loss.
PipelineStage(
loss_names=["imitation_loss"],
teacher_forcing=LinearDecay(
startp=1.0, endp=0.0, steps=dagger_steps,
),
max_stage_steps=dagger_steps,
),
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
```
## Off-policy
We can also define off-policy stages where an external dataset is used, in this case, for Behavior Cloning:
```python
class BCOffPolicyBabyAIGoToLocalExperimentConfig(ExperimentConfig):
...
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = int(1e7)
num_steps=128
return TrainingPipeline(
save_interval=10000, # Save every 10000 steps (approximately)
metric_accumulate_interval=1,
optimizer_builder=Builder(optim.Adam, dict(lr=2.5e-4)),
num_mini_batch=0, # no on-policy training
update_repeats=0, # no on-policy training
num_steps=num_steps // 4, # rollouts from environment tasks
named_losses={
"offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss(
total_episodes_in_epoch=int(1e6) # dataset contains 1M episodes
),
},
gamma=0.99,
use_gae=True,
gae_lambda=1.0,
max_grad_norm=0.5,
advance_scene_rollout_period=None,
pipeline_stages=[
PipelineStage(
loss_names=[], # no on-policy losses
max_stage_steps=total_train_steps,
# We only train from off-policy data:
offpolicy_component=OffPolicyPipelineComponent(
data_iterator_builder=lambda **kwargs: create_minigrid_offpolicy_data_iterator(
path=DATASET_PATH, # external dataset
nrollouts=128, # per trainer batch size
rollout_len=num_steps, # For truncated-BPTT
instr_len=5,
**kwargs,
),
loss_names=["offpolicy_expert_ce_loss"], # off-policy losses
updates=16, # 16 batches per rollout
),
),
],
)
```
Note that, in this example, `128 / 4 = 32` steps will be sampled from tasks in a MiniGrid environment (which can be
useful to track the agent's performance), while a subgraph of the model (in this case the entire Actor) is
trained from batches of 128-step truncated episodes sampled from an offline dataset stored under `DATASET_PATH`.
================================================
FILE: docs/howtos/defining-an-experiment.md
================================================
# Defining an experiment
Let's look at an example experiment configuration for an object navigation example with an actor-critic agent observing
RGB images from the environment and target object classes from the task. This is a simplified example where the
agent is confined to a single `iTHOR` scene (`FloorPlan1`) and needs to find a single object (a tomato). To see how one
might running a "full"/"hard" version of navigation within AI2-THOR, see our tutorials
[PointNav in RoboTHOR](../tutorials/training-a-pointnav-model.md) and
[Swapping in a new environment](../tutorials/transfering-to-a-different-environment-framework.md).
The interface to be implemented by the experiment specification is defined in
[allenact.base_abstractions.experiment_config](/api/allenact/base_abstractions/experiment_config#experimentconfig). If you'd
like to skip ahead and see the finished configuration, [see here](https://github.com/allenai/allenact/blob/master/projects/tutorials/object_nav_ithor_ppo_one_object.py).
We begin by making the following imports:
```python
from math import ceil
from typing import Dict, Any, List, Optional
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor, GoalObjectTypeThorSensor
from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler
from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
from projects.objectnav_baselines.models.object_nav_models import (
ObjectNavBaselineActorCritic,
)
from allenact.utils.experiment_utils import Builder, PipelineStage, TrainingPipeline, LinearDecay
```
Now first method to implement is `tag`, which provides a string identifying the experiment:
```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
...
@classmethod
def tag(cls):
return "ObjectNavThorPPO"
...
```
## Model creation
Next, `create_model` will be used to instantiate an
[baseline object navigation actor-critic model](/api/projects/objectnav_baselines/models/object_nav_models#ObjectNavBaselineActorCritic):
```python
class ObjectNavThorExperimentConfig(ExperimentConfig):
...
# A simple setting, train/valid/test are all the same single scene
# and we're looking for a single object
OBJECT_TYPES = ["Tomato"]
TRAIN_SCENES = ["FloorPlan1_physics"]
VALID_SCENES = ["FloorPlan1_physics"]
TEST_SCENES = ["FloorPlan1_physics"]
# Setting up sensors and basic environment details
SCREEN_SIZE = 224
SENSORS = [
RGBSensorThor(
height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True,
),
GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),
]
...
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return ObjectNavBaselineActorCritic(
action_space=gym.spaces.Discrete(len(ObjectNaviThorGridTask.class_action_names())),
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
rgb_uuid=cls.SENSORS[0].uuid,
depth_uuid=None,
goal_sensor_uuid="goal_object_type_ind",
hidden_size=512,
object_type_embedding_dim=8,
)
...
```
## Training pipeline
We now implement a training pipeline which trains with a single stage using PPO.
In the below we use [Builder](/api/allenact/utils/experiment_utils#builder) objects, which allow us to defer the instantiation
of objects of the class passed as their first argument while allowing passing additional keyword arguments to their
initializers. This is necessary when instantiating things like PyTorch optimizers who take as input the list of
parameters associated with our agent's model (something we can't know until the `create_model` function has been called).
```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
...
@classmethod
def training_pipeline(cls, **kwargs):
ppo_steps = int(1e6)
lr = 2.5e-4
num_mini_batch = 2 if not torch.cuda.is_available() else 6
update_repeats = 4
num_steps = 128
metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks
save_interval = 10000
gamma = 0.99
use_gae = True
gae_lambda = 1.0
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={
"ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
...
```
Alternatively, we could use a more sophisticated pipeline that begins training with dataset aggregation
([DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf)) before moving to training
with PPO. This requires the existence of an
expert (implemented in the task definition) that provides optimal actions to agents. We have implemented such a
pipeline by extending the above configuration as follows
```python
class ObjectNavThorDaggerThenPPOExperimentConfig(ObjectNavThorPPOExperimentConfig):
...
SENSORS = [
RGBSensorThor(
height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True,
),
GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),
ExpertActionSensor(nactions=6), # Notice that we have added an expert action sensor.
]
...
@classmethod
def training_pipeline(cls, **kwargs):
dagger_steps = int(1e4) # Much smaller number of steps as we're using imitation learning
ppo_steps = int(1e6)
lr = 2.5e-4
num_mini_batch = 1 if not torch.cuda.is_available() else 6
update_repeats = 4
num_steps = 128
metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks
save_interval = 10000
gamma = 0.99
use_gae = True
gae_lambda = 1.0
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={
"ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
"imitation_loss": Imitation(), # We add an imitation loss.
},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[ # The pipeline now has two stages, in the first
# we use DAgger (imitation loss + teacher forcing).
# In the second stage we no longer use teacher
# forcing and add in the ppo loss.
PipelineStage(
loss_names=["imitation_loss"],
teacher_forcing=LinearDecay(
startp=1.0, endp=0.0, steps=dagger_steps,
),
max_stage_steps=dagger_steps,
),
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
```
A version of our experiment config file for which we have implemented this two-stage training
can be found [here](https://github.com/allenai/allenact/blob/master/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py).
This two-stage configuration `ObjectNavThorDaggerThenPPOExperimentConfig` is actually implemented _as a subclass of `ObjectNavThorPPOExperimentConfig`_.
This is a common pattern used in AllenAct and lets one skip a great deal of boilerplate when defining a new
experiment as a slight modification of an old one. Of course one must then be careful: changes to the superclass
configuration will propagate to all subclassed configurations.
## Machine configuration
In `machine_params` we define machine configuration parameters that will be used for training, validation and test:
```python
class ObjectNavThorPPOExperimentConfig(allenact.base_abstractions.experiment_config.ExperimentConfig):
...
@classmethod
def machine_params(cls, mode="train", **kwargs):
num_gpus = torch.cuda.device_count()
has_gpu = num_gpus != 0
if mode == "train":
nprocesses = 20 if has_gpu else 4
gpu_ids = [0] if has_gpu else []
elif mode == "valid":
nprocesses = 1
gpu_ids = [1 % num_gpus] if has_gpu else []
elif mode == "test":
nprocesses = 1
gpu_ids = [0] if has_gpu else []
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
return {"nprocesses": nprocesses, "gpu_ids": gpu_ids}
...
```
In the above we use the availability of cuda (`torch.cuda.device_count() != 0`) to determine whether
we should use parameters appropriate for local machines or for a server. We might optionally add a list of
`sampler_devices` to assign devices (likely those not used for running our agent) to task sampling workers.
## Task sampling
The above has defined the model we'd like to use, the types of losses we wish to use during training,
and the machine specific parameters that should be used during training. Critically we have not yet
defined which task we wish to train our agent to complete. This is done by implementing the
`ExperimentConfig.make_sampler_fn` function
```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
...
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return ObjectNavTaskSampler(**kwargs)
...
```
Now, before training starts, our trainer will know to generate a collection of task
samplers using `make_sampler_fn` for training (and possibly validation or testing).
The `kwargs` parameters used in the above function call can be different for each
training process, we implement such differences using the
`ExperimentConfig.train_task_sampler_args` function
```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
...
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
self.TRAIN_SCENES,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_period"] = "manual"
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
return res
...
```
Now training process `i` out of `n` total processes will be instantiated with the parameters
`ObjectNavThorPPOExperimentConfig.train_task_sampler_args(i, n, ...)`. Similar functions
(`valid_task_sampler_args` and `test_task_sampler_args`) exist for generating validation
and test parameters. Note also that with this function we can assign devices to run
our environment for each worker. See the documentation of `ExperimentConfig` for more information.
## Running the experiment
We are now in the position to run the experiment (with seed 12345) using the command
```bash
python main.py object_nav_ithor_ppo_one_object -b projects/tutorials -s 12345
```
================================================
FILE: docs/howtos/running-a-multi-agent-experiment.md
================================================
# To-do
================================================
FILE: docs/howtos/visualizing-results.md
================================================
# To-do
================================================
FILE: docs/installation/download-datasets.md
================================================
# Downloading datasets
**Note:** These instructions assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and, generally, [installed
specific plugin requirements](../installation/installation-allenact.md#plugins-extra-requirements).
The below provides instructions on how to download datasets necessary for defining the train, validation, and
test sets used within the ObjectNav/PointNav tasks in the `iTHOR` and `RoboTHOR` environments.
## Point Navigation (PointNav)
### RoboTHOR
To get the PointNav dataset for `RoboTHOR` run the following command:
```bash
bash datasets/download_navigation_datasets.sh robothor-pointnav
```
This will download the dataset into `datasets/robothor-pointnav`.
### iTHOR
To get the PointNav dataset for `iTHOR` run the following command:
```bash
bash datasets/download_navigation_datasets.sh ithor-pointnav
```
This will download the dataset into `datasets/ithor-pointnav`.
## Object Navigation (ObjectNav)
### RoboTHOR
To get the ObjectNav dataset for `RoboTHOR` run the following command:
```bash
bash datasets/download_navigation_datasets.sh robothor-objectnav
```
This will download the dataset into `datasets/robothor-objectnav`.
### iTHOR
To get the ObjectNav dataset for `iTHOR` run the following command:
```bash
bash datasets/download_navigation_datasets.sh ithor-objectnav
```
This will download the dataset into `datasets/ithor-objectnav`.
================================================
FILE: docs/installation/installation-allenact.md
================================================
# Installation of AllenAct
**Note 1:** This library has been tested *only in python 3.6.*/3.7.*. The following assumes you have a working
version of *python 3.6/3.7* installed locally.
**Note 2:** If you are installing `allenact` intending to use a GPU for training/inference and your
current machine uses an older version of CUDA you may need to manually install the version of
PyTorch that supports your CUDA version. In such a case, after installing the below requirements, you
should follow the directions for installing PyTorch with older
versions of CUDA available on the [PyTorch homepage](https://pytorch.org/).
In order to install `allenact` and/or its requirements we recommend creating a new
[python virtual environment](https://docs.python.org/3/tutorial/venv.html) and installing all
of the below requirements into this virtual environment.
Alternatively, we also document how to [install a conda environment](#installing-a-conda-environment)
with all the requirements, which is especially useful if you plan to train models in [Habitat](https://aihabitat.org/).
## Different ways to use `allenact`
There are three main installation paths depending on how you wish to use `allenact`.
1. You want to use the `allenact` abstractions and training engine for your own task/environment and don't really
care about using any of our plugins that offer additional support (in the form of models, sensors, task samplers, etc.)
for select tasks/environments like AI2-THOR, Habitat, and MiniGrid.
- If this sounds like you, install the [standalone framework](#standalone-framework).
1. You want to use `allenact` as above but would also like to use some of our additional plugins.
- If this sounds like you, install the [framework and plugins](#framework-and-plugins).
1. You want full access to everything in `allenact` (including all plugins and all of our projects and baselines)
and want to have the option to edit the internal implementation of `allenact` to suit your desire.
- If this sounds like you, install the [full library](#full-library).
## Standalone framework
You can install `allenact` easily using pip:
```bash
pip install allenact
```
If you'd like to install the latest development version of `allenact` (possibly unstable) directly from GitHub see the
next section.
### Bleeding edge pip install
To install the latest `allenact` framework, you can use
```bash
pip install -e "git+https://github.com/allenai/allenact.git@main#egg=allenact&subdirectory=allenact"
```
and, similarly, you can also use
```bash
pip install -e "git+https://github.com/allenai/allenact.git@main#egg=allenact_plugins[all]&subdirectory=allenact_plugins"
```
to install all plugins.
Depending on your machine configuration, you may need to use `pip3` instead of `pip` in the commands
above.
## Framework and plugins
To install `allenact` and all available plugins, run
```bash
pip install allenact allenact_plugins[all]
```
which will install `allenact` and `allenact_plugins` packages along with the requirements for _all_
of the plugins (when possible). If you only want to install the requirements for some subset of plugins, you can
specify these plugins with the `allenact_plugins[plugin1,plugin2]` notation. For instance, to install requirements
for the `ithor_plugin` and the `minigrid_plugin`, simply run:
```bash
pip install allenact allenact_plugins[ithor,minigrid]
```
A list of all available plugins can be found [here](https://github.com/allenai/allenact/tree/master/allenact_plugins).
## Full library
Clone the `allenact` repository to your local machine and move into the top-level directory
```bash
git clone git@github.com:allenai/allenact.git
cd allenact
```
Below we describe two alternative ways to install all dependencies via `pip` or `conda`.
### Installing requirements with `pip`
All requirements for `allenact` (not including plugin requirements) may be installed by running the following command:
```bash
pip install -r requirements.txt; pip install -r dev_requirements.txt
```
To install plugin requirements, see below.
#### Plugins extra requirements
To install the specific requirements of each plugin, we need to additionally call
```bash
pip install -r allenact_plugins/_plugin/extra_requirements.txt
```
from the top-level directory.
### Installing a `conda` environment
_If you are unfamiliar with Conda, please familiarize yourself with their [introductory documentation](https://docs.conda.io/projects/conda/en/latest/).
If you have not already, you will need to first [install Conda (i.e. Anaconda or Miniconda)](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)
on your machine. We suggest installing [Miniconda](https://docs.conda.io/projects/conda/en/latest/glossary.html#miniconda-glossary)
as it's relatively lightweight._
The `conda` folder contains YAML files specifying [Conda environments](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file)
compatible with AllenAct. These environment files include:
* `environment-base.yml` - A base environment file to be used on all machines (it includes
[PyTorch](https://pytorch.org/) with the latest `cudatoolkit`).
* `environment-dev.yml` - Additional dev dependencies.
* `environment-.yml` - Additional dependencies, where `` is the CUDA version used on your
machine (if you are using linux, you might find this version by running `/usr/local/cuda/bin/nvcc --version`).
* `environment-cpu.yml` - Additional dependencies to be used on machines where GPU support is not needed (everything
will be run on the CPU).
For the moment let's assume you're using `environment-base.yml` above. To install a conda environment with name `allenact`
using this file you can simply run the following (*this will take a few minutes*):
```bash
conda env create --file ./conda/environment-base.yml --name allenact
```
The above is very simple but has the side effect of creating a new `src` directory where it will
place some of AllenAct's dependencies. To get around this, instead of running the above you can instead
run the commands:
```bash
export MY_ENV_NAME=allenact
export CONDA_BASE="$(dirname $(dirname "${CONDA_EXE}"))"
export PIP_SRC="${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc"
conda env create --file ./conda/environment-base.yml --name $MY_ENV_NAME
```
These additional commands tell conda to place these dependencies under the `${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc` directory rather
than under `src`, this is more in line with where we'd expect dependencies to be placed when running `pip install ...`.
If needed, you can use one of the `environment-.yml` environment files to install the proper version of
the `cudatoolkit` by running:
```bash
conda env update --file ./conda/environment-.yml --name allenact
```
or the CPU-only version:
```bash
conda env update --file ./conda/environment-cpu.yml --name allenact
```
#### Using the `conda` environment
Now that you've installed the conda environment as above, you can activate it by running:
```bash
conda activate allenact
```
after which you can run everything as you would normally.
#### Installing supported environments with `conda`
Each supported plugin contains a YAML environment file that can be applied upon the existing `allenact` environment. To
install the specific requirements of each plugin, we need to additionally call
```bash
conda env update --file allenact_plugins/_plugin/extra_environment.yml --name $MY_ENV_NAME
```
from the top-level directory.
**Habitat:** Note that, for habitat, we provide two environment types, regarding whether our machine is connected to a
display. More details can be found [here](../installation/installation-framework.md#installation-of-habitat).
================================================
FILE: docs/installation/installation-framework.md
================================================
# Installation of supported environments
In general, each supported environment can be installed by just following the instructions to
[install the full library and specific requirements of every plugin](../installation/installation-allenact.md#full-library)
either [via pip](../installation/installation-allenact.md#installing-requirements-with-pip) or
[via Conda](../installation/installation-allenact.md#installing-a-conda-environment).
Below we provide additional installation instructions for a number of environments that we support and
provide some guidance for problems commonly experienced when using these environments.
## Installation of iTHOR (`ithor` plugin)
The first time you will run an experiment with `iTHOR` (or any script that uses `ai2thor`)
the library will download all of the assets it requires to render the scenes automatically.
However, the datasets must be manually downloaded as described [here](../installation/download-datasets.md).
**Trying to use `iTHOR` on a machine without an attached display?**
**Note:** These instructions assume you have
[installed the full library](../installation/installation-allenact.md#full-library).
If you wish to run `iTHOR` on a machine without an attached display (for instance, a remote server such as an AWS
machine) you will also need to run a script that launches `xserver` processes on your GPUs. This can be done
with the following command:
```bash
sudo python scripts/startx.py &
```
Notice that you need to run the command with `sudo` (i.e. administrator privileges). If you do not have `sudo`
access (for example if you are running this on a shared university machine) you
can ask your administrator to run it for you. You only need to run it once (as
long as you do not turn off your machine).
## Installation of RoboTHOR (`robothor` plugin)
`RoboTHOR` is installed in the same way as `iTHOR`. For more information see the above section on installing `iTHOR`.
## Installation of Habitat
Installing habitat requires
1. Installing the `habitat-lab` and `habitat-sim` packages.
- This may be done by either following the [directions provided by Habitat themselves](https://github.com/facebookresearch/habitat-lab#installation)
or by using our `conda` installation instructions below.
1. Downloading the scene assets (i.e. the Gibson or Matterport scene files) relevant to whichever task you're interested in.
- Unfortunately we cannot legally distribute these files to you directly. Instead you will need to download these
yourself. See [here](https://github.com/facebookresearch/habitat-lab#Gibson) for how you can download
the Gibson files and [here](https://github.com/facebookresearch/habitat-lab#matterport3d) for directions on
how to download the Matterport flies.
1. Downloading the dataset files for the task you're interested in (e.g. PointNav, ObjectNav, etc).
- See [here](https://github.com/facebookresearch/habitat-lab#task-datasets) for links to these dataset files.
### Using `conda`
Habitat has recently released the option to install their simulator using `conda` which avoids having
to manually build dependencies or use Docker. This does not guarantee that the installation process
is completely painless (it is difficult to avoid all possible build issues) but we've found it
to be a nice alternative to using Docker. To use this installation option please first
install an AllenAct `conda` environment using the instructions available [here](../installation/installation-allenact.md#installing-a-conda-environment).
After installing this environment, you can then install `habitat-sim` and `habitat-lab` by running:
If you are on a machine with an attached display:
```bash
export MY_ENV_NAME=allenact
export CONDA_BASE="$(dirname $(dirname "${CONDA_EXE}"))"
export PIP_SRC="${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc"
conda env update --file allenact_plugins/habitat_plugin/extra_environment.yml --name $MY_ENV_NAME
```
If you are on a machine without an attached display (e.g. a server), replace the last command by:
```bash
conda env update --file allenact_plugins/habitat_plugin/extra_environment_headless.yml --name $MY_ENV_NAME
```
After these steps, feel free to proceed to download the required scene assets and task-specific dataset files as
described above.
================================================
FILE: docs/javascripts/extra.js
================================================
// The below can be used to open all nav links in the documentation, code found at
// from https://github.com/squidfunk/mkdocs-material/issues/767#issuecomment-384558269
// from the user Akkadius.
/*
document.addEventListener("DOMContentLoaded", function() {
load_navpane();
});
function load_navpane() {
var width = window.innerWidth;
if (width <= 1200) {
return;
}
var nav = document.getElementsByClassName("md-nav");
for (var i = 0; i < nav.length; i++) {
if (typeof nav.item(i).style === "undefined") {
continue;
}
if (nav.item(i).getAttribute("data-md-level") && nav.item(i).getAttribute("data-md-component")) {
nav.item(i).style.display = 'block';
nav.item(i).style.overflow = 'visible';
}
}
var nav = document.getElementsByClassName("md-nav__toggle");
for(var i = 0; i < nav.length; i++) {
nav.item(i).checked = true;
}
}
*/
================================================
FILE: docs/notebooks/firstbook.md
================================================
# To-do
================================================
FILE: docs/projects/advisor_2020/README.md
================================================
# Experiments for Advisor
## TODO:
1. Add details taken from https://unnat.github.io/advisor/.
2. Cite the arxiv paper.
3. Give a list of things you can run with bash commands.
4. Ideally be able to recreate a large set of experiments.
================================================
FILE: docs/projects/babyai_baselines/README.md
================================================
# Baseline experiments for the BabyAI environment
We perform a collection of baseline experiments within the BabyAI environment
on the GoToLocal task, see the `projects/babyai_baselines/experiments/go_to_local` directory.
For instance, to train a model using PPO, run
```bash
python main.py go_to_local.ppo --experiment_base projects/babyai_baselines/experiments
```
Note that these experiments will be quite slow when not using a GPU as the BabyAI model architecture is surprisingly
large. Specifying a GPU (if available) can be done from the command line using hooks we created using
[gin-config](https://github.com/google/gin-config). E.g. to train using the 0th GPU device, add
```bash
--gp "machine_params.gpu_id = 0"
```
to the above command.
================================================
FILE: docs/projects/gym_baselines/README.md
================================================
# Baseline models Gym (for MuJoCo environments)
This project contains the code for training baseline models for the tasks under the [MuJoCo](https://gym.openai.com/envs/#mujoco) group of Gym environments, included ["Ant-v2"](https://gym.openai.com/envs/Ant-v2/), ["HalfCheetah-v2"](https://gym.openai.com/envs/HalfCheetah-v2/), ["Hopper-v2"](https://gym.openai.com/envs/Hopper-v2/), ["Humanoid-v2"](https://gym.openai.com/envs/Humanoid-v2/), ["InvertedDoublePendulum-v2"](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["InvertedPendulum-v2"](https://gym.openai.com/envs/InvertedPendulum-v2/), [Reacher-v2](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["Swimmer-v2"](https://gym.openai.com/envs/Swimmer-v2/), and [Walker2d-v2"](https://gym.openai.com/envs/Walker2d-v2/).
Provided are experiment configs for training a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](https://allenact.org/api/allenact_plugins/gym_plugin/gym_models/#memorylessactorcritic), with a [Gaussian distribution](https://allenact.org/api/allenact_plugins/gym_plugin/gym_distributions/#gaussiandistr) to sample actions for all continuous-control environments under the `MuJoCo` group of `Gym` environments.
The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm.
To train an experiment run the following command from the `allenact` root directory:
```bash
python main.py -o
```
Where `` is the path of the directory where we want the model weights
and logs to be stored and `` is the path to the python file containing
the experiment configuration. An example usage of this command would be:
```bash
python main.py projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py -o /YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo
```
This trains a lightweight implementation with separate MLPs for actors and critic with a Gaussian distribution to sample actions in the "Ant-v2" environment, and stores the model weights and logs
to `/YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo`.
## Results
In our experiments, the rewards for MuJoCo environments we obtained after training using PPO are similar to those reported by OpenAI Gym Baselines(1M steps). The Humanoid environment is compared with the original PPO paper where training 50M steps using PPO. Due to the time constraint, we only tested our baseline across two seeds so far.
| Environment | Gym Baseline Reward | Ours Reward |
| ----------- | ------------------- | ----------- |
|[Ant-v2](https://gym.openai.com/envs/Ant-v2/)| 1083.2 |1098.6(reached 4719 in 25M steps) |
| [HalfCheetah-v2](https://gym.openai.com/envs/HalfCheetah-v2/) | 1795.43 | 1741(reached 4019 in 18M steps) |
|[Hopper-v2](https://gym.openai.com/envs/Hopper-v2/)|2316.16|2266|
|[Humanoid-v2](https://gym.openai.com/envs/Humanoid-v2/)|4000+|4500+(reached 6500 in 70M steps)|
| [InvertedPendulum-v2](https://gym.openai.com/envs/InvertedPendulum-v2/) | 809.43 | 1000 |
|[Reacher-v2](https://gym.openai.com/envs/Reacher-v2/)|-6.71|-7.045|
|[Swimmer-v2](https://gym.openai.com/envs/Swimmer-v2/)|111.19|124.7|
|[Walker2d](https://gym.openai.com/envs/Walker2d-v2/)|3424.95|2723 in 10M steps|
================================================
FILE: docs/projects/objectnav_baselines/README.md
================================================
# Baseline models ObjectNav (for RoboTHOR/iTHOR)
This project contains the code for training baseline models for the ObjectNav task. In ObjectNav, the agent
spawns at a location in an environment and is tasked to explore the environment until it finds an object of a
certain type (such as TV or Basketball). Once the agent is confident that it has the object within sight
it executes the `END` action which terminates the episode. If the agent is within a set
distance to the target (in our case 1.0 meters) and the target is visible within its observation frame
the agent succeeded, otherwise it failed.
Provided are experiment configs for training a simple convolutional model with
an GRU using `RGB`, `Depth` or `RGB-D` (i.e. `RGB+Depth`) as inputs in
[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).
The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm. For the RoboTHOR environment we also have and experiment
(`objectnav_robothor_rgb_resnetgru_dagger.py`) showing how a model can be trained using DAgger,
a form of imitation learning.
To train an experiment run the following command from the `allenact` root directory:
```bash
python main.py -o -c
```
Where `` is the path of the directory where we want the model weights
and logs to be stored and `` is the path to the python file containing
the experiment configuration. An example usage of this command would be:
```bash
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet_ddppo.py -o storage/objectnav-robothor-rgb
```
This trains a simple convolutional neural network with a GRU using RGB input
passed through a pretrained ResNet-18 visual encoder on the
PointNav task in the RoboTHOR environment and stores the model weights and logs
to `storage/pointnav-robothor-rgb`.
## RoboTHOR ObjectNav 2021 Challenge
The experiment configs found under the `projects/objectnav_baselines/experiments/robothor` directory are designed
to conform to the requirements of the [RoboTHOR ObjectNav 2021 Challenge](https://ai2thor.allenai.org/robothor/cvpr-2021-challenge).
### Training a baseline
To train a baseline ResNet->GRU model taking RGB-D inputs, run the following command
```bash
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet_ddppo.py -o storage/objectnav-robothor-rgbd
```
By default, when using a machine with a GPU, the above experiment will attempt to train using 60 parallel processes
across all available GPUs. See the `TRAIN_GPU_IDS` constant in `experiments/objectnav_thor_base.py` and
the `NUM_PROCESSES` constant in `experiments/robothor/objectnav_robothor_base.py` if you'd like to change which
GPUs are used or how many processes are run respectively.
### Downloading our pretrained model checkpoint
We provide a pretrained model obtained allowing the above command to run for all 300M training steps and then selecting
the model checkpoint with best validation-set performance (for us occuring at ~170M training steps). You can download
this model checkpoint by running
```bash
bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021
```
from the top-level directory. This will download the pretrained model weights and save them at the path
```bash
pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt
```
### Running inference on the pretrained model
You can run inference on the above pretrained model (on the test dataset) by running
```bash
export SAVED_MODEL_PATH=pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.py -c $SAVED_MODEL_PATH --eval
```
To discourage "cheating", the test dataset has been scrubbed of the information needed to actually compute the success rate / SPL
of your model and so running the above will only save the trajectories your models take. To evaluate these
trajectories you will have to submit them to our leaderboard, see [here for more details](https://github.com/allenai/robothor-challenge/).
If you'd like to get a sense of if your model is doing well before submitting to the leaderboard, you can obtain the
success rate / SPL of it on our validation dataset. To do this, you can simply comment-out the line
```python
TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/test")
```
within the `projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py` file and rerun the above
`python main.py ...` command (when the test dataset is not given, the code defaults to using the validation set).
================================================
FILE: docs/projects/pointnav_baselines/README.md
================================================
# Baseline models for the Point Navigation task in the Habitat, RoboTHOR and iTHOR environments
This project contains the code for training baseline models on the PointNav task. In this setting the agent
spawns at a location in an environment and is tasked to move to another location. The agent is given a "compass"
that tells it the distance and bearing to the target position at every frame. Once the agent is confident that
it has reached the end it executes the `END` action which terminates the episode. If the agent is within a set
distance to the target (in our case 0.2 meters) the agent succeeded, else it failed.
Provided are experiment configs for training a simple convolutional model with
an GRU using `RGB`, `Depth` or `RGBD` as inputs in [Habitat](https://github.com/facebookresearch/habitat-sim),
[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).
The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm.
To train an experiment run the following command from the `allenact` root directory:
```bash
python main.py -o -c -b
```
Where `` is the path of the directory where we want the model weights
and logs to be stored, `` is the directory where our
experiment file is located and `` is the name of the python module containing
the experiment. An example usage of this command would be:
```bash
python main.py -o storage/pointnav-robothor-depth -b projects/pointnav_baselines/experiments/robothor/ pointnav_robothor_depth_simpleconvgru_ddppo
```
This trains a simple convolutional neural network with a GRU using Depth input on the
PointNav task in the RoboTHOR environment and stores the model weights and logs
to `storage/pointnav-robothor-rgb`.
================================================
FILE: docs/projects/two_body_problem_2019/README.md
================================================
# Experiments for the Two Body Problem paper
## TODO:
1. Add details taken from https://prior.allenai.org/projects/two-body-problem
2. Cite the CVPR paper.
3. Give a list of things you can run with bash commands.
4. At least a subset of the experiments.
================================================
FILE: docs/tutorials/distributed-objectnav-tutorial.md
================================================
# Tutorial: Distributed training across multiple nodes.
**Note** The provided commands to execute in this tutorial assume include a configuration script to
[clone the full library](../installation/installation-allenact.md#full-library). Setting up headless THOR might
require superuser privileges. We also assume [NCCL](https://developer.nvidia.com/nccl) is available for communication
across computation nodes and all nodes have a running `ssh` server.
The below introduced experimental tools and commands for distributed training assume a Linux OS (tested on Ubuntu
18.04).
In this tutorial, we:
1. Introduce the available API for training across multiple nodes, as well as experimental scripts for distributed
configuration, training start and termination, and remote command execution.
1. Introduce the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenAct`. Note that, in contrast with
previous tutorials using AI2-THOR, this time we don't require an xserver (in Linux) to be active.
1. Show a training example for RoboTHOR ObjectNav on a cluster, with each node having sufficient GPUs and GPU memory to
host 60 experience samplers collecting rollout data.
Thanks to the massive parallelization of experience collection and model training enabled by
[DD-PPO](https://arxiv.org/abs/1911.00357), we can greatly speed up training by scaling across multiple nodes:

## The task: ObjectNav
In ObjectNav, the goal for the agent is to navigate to an object (possibly unseen during training) of a known given
class and signal task completion when it determines it has reached the goal.
## Implementation
For this tutorial, we'll use the readily available `objectnav_baselines` project, which includes configurations for
a wide variety of object navigation experiments for both iTHOR and RoboTHOR. Since those configuration files are
defined for a single-node setup, we will mainly focus on the changes required in the `machine_params` and
`training_pipeline` methods.
Note that, in order to use the headless version of AI2-THOR, we currently need to install a specific THOR commit,
different from the default one in `robothor_plugin`. Note that this command is included in the configuration script
below, so **we don't need to run this**:
```bash
pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48
```
The experiment config starts as follows:
```python
import math
from typing import Optional, Sequence
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
Builder,
LinearDecay,
MultiLinearDecay,
TrainingPipeline,
PipelineStage,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_rgb_resnet18gru_ddppo import (
ObjectNavRoboThorRGBPPOExperimentConfig as BaseConfig,
)
class DistributedObjectNavRoboThorRGBPPOExperimentConfig(BaseConfig):
def tag(self) -> str:
return "DistributedObjectNavRoboThorRGBPPO"
```
We override ObjectNavRoboThorBaseConfig's THOR_COMMIT_ID to match the installed headless one:
```python
THOR_COMMIT_ID = "91139c909576f3bf95a187c5b02c6fd455d06b48"
```
Also indicate that we're using headless THOR (for `task_sampler_args` methods):
```python
THOR_IS_HEADLESS = True
```
**Temporary hack** Disable the `commit_id` argument passed to the THOR `Controller`'s `init` method:
```python
def env_args(self):
res = super().env_args()
res.pop("commit_id", None)
return res
```
And, of course, define the number of nodes. This will be used by `machine_params` and `training_pipeline` below.
We override the existing `ExperimentConfig`'s `init` method to include control on the number of nodes:
```python
def __init__(
self,
distributed_nodes: int = 1,
num_train_processes: Optional[int] = None,
train_gpu_ids: Optional[Sequence[int]] = None,
val_gpu_ids: Optional[Sequence[int]] = None,
test_gpu_ids: Optional[Sequence[int]] = None,
):
super().__init__(
num_train_processes=num_train_processes,
train_gpu_ids=train_gpu_ids,
val_gpu_ids=val_gpu_ids,
test_gpu_ids=test_gpu_ids,
)
self.distributed_nodes = distributed_nodes
```
### Machine parameters
**Note:** We assume that all nodes are identical (same number and model of GPUs and drivers).
The `machine_params` method will be invoked by `runner.py` with different arguments, e.g. to determine the
configuration for validation or training.
When working in distributed settings, `AllenAct` needs to know the total number of trainers across all nodes as well
as the local number of trainers. This is accomplished through the introduction of a `machine_id` keyword argument,
which will be used to define the training parameters as follows:
```python
def machine_params(self, mode="train", **kwargs):
params = super().machine_params(mode, **kwargs)
if mode == "train":
params.devices = params.devices * self.distributed_nodes
params.nprocesses = params.nprocesses * self.distributed_nodes
params.sampler_devices = params.sampler_devices * self.distributed_nodes
if "machine_id" in kwargs:
machine_id = kwargs["machine_id"]
assert (
0 <= machine_id < self.distributed_nodes
), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]"
local_worker_ids = list(
range(
len(self.train_gpu_ids) * machine_id,
len(self.train_gpu_ids) * (machine_id + 1),
)
)
params.set_local_worker_ids(local_worker_ids)
# Confirm we're setting up train params nicely:
print(
f"devices {params.devices}"
f"\nnprocesses {params.nprocesses}"
f"\nsampler_devices {params.sampler_devices}"
f"\nlocal_worker_ids {params.local_worker_ids}"
)
elif mode == "valid":
# Use all GPUs at their maximum capacity for training
# (you may run validation in a separate machine)
params.nprocesses = (0,)
return params
```
In summary, we need to specify which indices in `devices`, `nprocesses` and `sampler_devices` correspond to the
local `machine_id` node (whenever a `machine_id` is given as a keyword argument), otherwise we specify the global
configuration.
### Training pipeline
In preliminary ObjectNav experiments, we observe that small batches are useful during the initial training steps in
terms of sample efficiency, whereas large batches are preferred during the rest of training.
In order to scale to the larger amount of collected data in multi-node settings, we will proceed with a two-stage
pipeline:
1. In the first stage, we'll enforce a number of updates per amount of collected data similar to the
configuration with a single node by enforcing more batches per rollout (for about 30 million steps).
1. In the second stage we'll switch to a configuration with larger learning rate and batch size to be
used up to the grand total of 300 million experience steps.
We first define a helper method to generate a learning rate curve with decay for each stage:
```python
@staticmethod
def lr_scheduler(small_batch_steps, transition_steps, ppo_steps, lr_scaling):
safe_small_batch_steps = int(small_batch_steps * 1.02)
large_batch_and_lr_steps = ppo_steps - safe_small_batch_steps - transition_steps
# Learning rate after small batch steps (assuming decay to 0)
break1 = 1.0 - safe_small_batch_steps / ppo_steps
# Initial learning rate for large batch (after transition from initial to large learning rate)
break2 = lr_scaling * (
1.0 - (safe_small_batch_steps + transition_steps) / ppo_steps
)
return MultiLinearDecay(
[
# Base learning rate phase for small batch (with linear decay towards 0)
LinearDecay(steps=safe_small_batch_steps, startp=1.0, endp=break1,),
# Allow the optimizer to adapt its statistics to the changes with a larger learning rate
LinearDecay(steps=transition_steps, startp=break1, endp=break2,),
# Scaled learning rate phase for large batch (with linear decay towards 0)
LinearDecay(steps=large_batch_and_lr_steps, startp=break2, endp=0,),
]
)
```
The training pipeline looks like:
```python
def training_pipeline(self, **kwargs):
# These params are identical to the baseline configuration for 60 samplers (1 machine)
ppo_steps = int(300e6)
lr = 3e-4
num_mini_batch = 1
update_repeats = 4
num_steps = 128
save_interval = 5000000
log_interval = 10000 if torch.cuda.is_available() else 1
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
# We add 30 million steps for small batch learning
small_batch_steps = int(30e6)
# And a short transition phase towards large learning rate
# (see comment in the `lr_scheduler` helper method
transition_steps = int(2 / 3 * self.distributed_nodes * 1e6)
# Find exact number of samplers per GPU
assert (
self.num_train_processes % len(self.train_gpu_ids) == 0
), "Expected uniform number of samplers per GPU"
samplers_per_gpu = self.num_train_processes // len(self.train_gpu_ids)
# Multiply num_mini_batch by the largest divisor of
# samplers_per_gpu to keep all batches of same size:
num_mini_batch_multiplier = [
i
for i in reversed(
range(1, min(samplers_per_gpu // 2, self.distributed_nodes) + 1)
)
if samplers_per_gpu % i == 0
][0]
# Multiply update_repeats so that the product of this factor and
# num_mini_batch_multiplier is >= self.distributed_nodes:
update_repeats_multiplier = int(
math.ceil(self.distributed_nodes / num_mini_batch_multiplier)
)
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={"ppo_loss": PPO(**PPOConfig, show_ratios=False)},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
# We increase the number of batches for the first stage to reach an
# equivalent number of updates per collected rollout data as in the
# 1 node/60 samplers setting
PipelineStage(
loss_names=["ppo_loss"],
max_stage_steps=small_batch_steps,
num_mini_batch=num_mini_batch * num_mini_batch_multiplier,
update_repeats=update_repeats * update_repeats_multiplier,
),
# The we proceed with the base configuration (leading to larger
# batches due to the increased number of samplers)
PipelineStage(
loss_names=["ppo_loss"],
max_stage_steps=ppo_steps - small_batch_steps,
),
],
# We use the MultiLinearDecay curve defined by the helper function,
# setting the learning rate scaling as the square root of the number
# of nodes. Linear scaling might also works, but we leave that
# check to the reader.
lr_scheduler_builder=Builder(
LambdaLR,
{
"lr_lambda": self.lr_scheduler(
small_batch_steps=small_batch_steps,
transition_steps=transition_steps,
ppo_steps=ppo_steps,
lr_scaling=math.sqrt(self.distributed_nodes),
)
},
),
)
```
## Multi-node configuration
**Note:** In the following, we'll assume you don't have an available setup for distributed execution, such as
[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup and run
distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for
a rather basic usage pattern that might not suit your needs.
If we haven't set up AllenAct with the headless version of AI2-THOR in our nodes, we can define a configuration script
similar to:
```bash
#!/bin/bash
# Prepare a virtualenv for allenact
sudo apt-get install -y python3-venv
python3 -mvenv ~/allenact_venv
source ~/allenact_venv/bin/activate
pip install -U pip wheel
# Install AllenAct
cd ~
git clone https://github.com/allenai/allenact.git
cd allenact
# Install AllenaAct + RoboTHOR plugin dependencies
pip install -r requirements.txt
pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt
# Download + setup datasets
bash datasets/download_navigation_datasets.sh robothor-objectnav
# Install headless AI2-THOR and required libvulkan1
sudo apt-get install -y libvulkan1
pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48
# Download AI2-THOR binaries
python -c "from ai2thor.controller import Controller; c=Controller(); c.stop()"
echo DONE
```
and save it as `headless_robothor_config.sh`. Note that some of the configuration steps in the script assume you have
superuser privileges.
Then, we can just copy this file to the first node in our cluster and run it with:
```bash
source
```
If everything went well, we should be able to
```bash
cd ~/allenact && source ~/allenact_venv/bin/activate
```
Note that we might need to install `libvulkan1` in each node (even if the AllenAct setup is shared across nodes) if it
is not already available.
### Local filesystems
If our cluster does not use a shared filesystem, we'll need to propagate the setup to the rest of nodes. Assuming
we can just `ssh` with the current user to all nodes, we can propagate our config with
```bash
scripts/dconfig.py --runs_on \
--config_script
```
and we can check the state of the installation with the `scripts/dcommand.py` tool:
```bash
scripts/dcommand.py --runs_on \
--command 'tail -n 5 ~/log_allenact_distributed_config'
```
If everything went fine, all requirements are ready to start running our experiment.
## Run your experiment
**Note:** In this section, we again assume you don't have an available setup for distributed execution, such as
[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup/run
distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for
a rather basic usage pattern that might not suit your needs.
Our experimental extension to AllenAct's `main.py` script allows using practically identical commands to the ones
used in a single-node setup to start our experiments. From the root `allenact` directory, we can simply invoke
```bash
scripts/dmain.py projects/tutorials/distributed_objectnav_tutorial.py \
--config_kwargs '{"distributed_nodes":3}' \
--runs_on \
--env_activate_path ~/allenact_venv/bin/activate \
--allenact_path ~/allenact \
--distributed_ip_and_port :
```
This script will do several things for you, including synchronization of the changes in the `allenact` directory
to all machines, enabling virtual environments in each node, sharing the same random seed for all `main.py` instances,
assigning `--machine_id` parameters required for multi-node training, and redirecting the process output to a log file
under the output results folder.
Note that by changing the value associated with the `distributed_nodes` key in the `config_kwargs` map and the `runs_on`
list of IPs, we can easily scale our training to e.g. 1, 3, or 8 nodes as shown in the chart above. Note that for this
call to work unmodified, you should have sufficient GPUs/GPU memory to host 60 samplers per node.
## Track and stop your experiment
You might have noticed that, when your experiment started with the above command, a file was created under
`~/.allenact`. This file includes IP addresses and screen session IDs for all nodes. It can be used
by the already introduced `scripts/dcommand.py` script, if we omit the `--runs_on` argument, to call a command on each
node via ssh; but most importantly it is used by the `scripts/dkill.py` script to terminate all screen sessions hosting
our training processes.
### Experiment tracking
A simple way to check all machines are training, assuming you have `nvidia-smi` installed in all nodes, is to just call
```bash
scripts/dcommand.py
```
from the root `allenact` directory. If everything is working well, the GPU usage stats from `nvidia-smi` should reflect
ongoing activity. You can also add different commands to be executed by each node. It is of course also possible to run
tensorboard on any of the nodes, if that's your preference.
### Experiment termination
Just call
```bash
scripts/dkill.py
```
After killing all involved screen sessions, you will be asked about whether you also want to delete the "killfile"
stored under the `~/.allenact` directory (which might be your preferred option once all processes are terminated).
We hope this tutorial will help you start quickly testing new ideas! Even if we've only explored moderates settings of
up to 480 experience samplers, you might want to consider some additional changes (like the
[choice for the optimizer](https://arxiv.org/abs/2103.07013)) if you plan to run at larger scale.
================================================
FILE: docs/tutorials/gym-mujoco-tutorial.md
================================================
# Tutorial: OpenAI gym MuJoCo environment.
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the
`gym_plugin`. The latter can be installed by
```bash
pip install -r allenact_plugins/gym_plugin/extra_requirements.txt
```
The environments for this tutorial use [MuJoCo](http://www.mujoco.org/)(**Mu**lti-**Jo**int dynamics in **Co**ntact)
physics simulator, which is also required to be installed properly with instructions
[here](https://github.com/openai/mujoco-py).
## The task
For this tutorial, we'll focus on one of the continuous-control environments under the `mujoco` group of `gym`
environments: [Ant-v2](https://gym.openai.com/envs/Ant-v2/). In this task, the goal
is to make a four-legged creature, "ant", walk forward as fast as possible. A random agent of "Ant-v2" is shown below.
.
To achieve the goal, we need to provide continuous control for the agent moving forward with four legs with the
`x` velocity as high as possible for at most 1000 episodes steps. The agent is failed, or done, if the `z` position
is out of the range [0.2, 1.0]. The dimension of the action space is 8 and 111 for the dimension of the observation
space that maps to different body parts, including 3D position `(x,y,z)`, orientation(quaternion `x`,`y`,`z`,`w`)
of the torso, and the joint angles, 3D velocity `(x,y,z)`, 3D angular velocity `(x,y,z)`, and joint velocities.
The rewards for the agent "ant" are composed of the forward rewards, healthy rewards, control cost, and contact cost.
## Implementation
For this tutorial, we'll use the readily available `gym_plugin`, which includes a
[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a
[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and
[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a
[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`
environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).
The experiment config, similar to the one used for the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:
```python
from typing import Dict, Optional, List, Any, cast
import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from allenact.utils.viz_utils import VizSuite, AgentViewViz
class HandManipulateTutorialExperimentConfig(ExperimentConfig):
@classmethod
def tag(cls) -> str:
return "GymMuJoCoTutorial"
```
### Sensors and Model
As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide
full observations from the state of the `gym` environment to our model.
```python
SENSORS = [
GymMuJoCoSensor("Ant-v2", uuid="gym_mujoco_data"),
]
```
We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,
[MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since
this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a
[Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.
```python
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=gym.spaces.Box(
-3.0, 3.0, (8,), "float32"
), # 8 actors, each in the range [-3.0, 3.0]
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
```
### Task samplers
We use an available `TaskSampler` implementation for `gym` environments that allows to sample
[GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):
[GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task
sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created
above, which contain a custom identifier for the actual observation space (`gym_mujoco_data`) also used by the model.
```python
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="Ant-v2", **kwargs)
```
For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three
modes, `train, valid, test`:
```python
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="train", seeds=seeds
)
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="valid", seeds=seeds
)
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)
```
Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,
during testing (or validation), we sample a fixed number of tasks.
```python
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["Ant-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
```
Note that we just sample 4 tasks for validation and testing in this case, which suffice to illustrate the model's
success.
### Machine parameters
In this tutorial, we just train the model on the CPU. We allocate a larger number of samplers for training (8) than
for validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also
include a video visualizer (`AgentViewViz`) in test mode.
```python
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
visualizer = None
if mode == "test":
visualizer = VizSuite(
mode=mode,
video_viz=AgentViewViz(
label="episode_vid",
max_clip_length=400,
vector_task_source=("render", {"mode": "rgb_array"}),
fps=30,
),
)
return {
"nprocesses": 8 if mode == "train" else 1, # rollout
"devices": [],
"visualizer": visualizer,
}
```
### Training pipeline
The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate
and 10 single-batch update repeats per rollout. The reward should exceed 4,000
in 20M steps in the test. In order to make the "ant" run with an obvious fast speed, we train the agents using PPO
with 3e7 steps.
```python
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
lr = 3e-4
ppo_steps = int(3e7)
clip_param = 0.2
value_loss_coef = 0.5
entropy_coef = 0.0
num_mini_batch = 4 # optimal 64
update_repeats = 10
max_grad_norm = 0.5
num_steps = 2048
gamma = 0.99
use_gae = True
gae_lambda = 0.95
advance_scene_rollout_period = None
save_interval = 200000
metric_accumulate_interval = 50000
return TrainingPipeline(
named_losses=dict(
ppo_loss=PPO(
clip_param=clip_param,
value_loss_coef=value_loss_coef,
entropy_coef=entropy_coef,
),
), # type:ignore
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=advance_scene_rollout_period,
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
),
)
```
## Training and validation
We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_mujoco_tutorial.py`.
To start training from scratch, we just need to invoke
```bash
PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_mujoco_output -s 0 -e
```
from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.
If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/gym_mujoco_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).
After 30,000,000 steps, the script will terminate. If everything went well, the `valid` success rate should be 1
and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a
little below 1,000.
## Testing
The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the
directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:
```bash
PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_mujoco_output \
-s 0 \
-e \
--eval \
--checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE
```
If everything went well, the `test` success rate should converge to 1, the `test` success rate should be 1
and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a
little below 1,000. The `gif` results can be seen in the image tab of Tensorboard while testing.
The output should be something like this:
.
And the `gif` results can be seen in the image tab of Tensorboard while testing.

If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running
remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display
available:
```bash
DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_mujoco_output \
-s 0 \
-e \
--eval \
--checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE
```
================================================
FILE: docs/tutorials/gym-tutorial.md
================================================
# Tutorial: OpenAI gym for continuous control.
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the
`gym_plugin`. The latter can be installed by
```bash
pip install -r allenact_plugins/gym_plugin/extra_requirements.txt
```
In this tutorial, we:
1. Introduce the `gym_plugin`, which enables some of the tasks in [OpenAI's gym](https://gym.openai.com/) for training
and inference within AllenAct.
1. Show an example of continuous control with an arbitrary action space covering 2 policies for one of the `gym` tasks.
## The task
For this tutorial, we'll focus on one of the continuous-control environments under the `Box2D` group of `gym`
environments: [LunarLanderContinuous-v2](https://gym.openai.com/envs/LunarLanderContinuous-v2/). In this task, the goal
is to smoothly land a lunar module in a landing pad, as shown below.
.
To achieve this goal, we need to provide continuous control for a main engine and directional one (2 real values). In
order to solve the task, the expected reward is of at least 200 points. The controls for main and directional engines
are both in the range [-1.0, 1.0] and the observation space is composed of 8 scalars indicating `x` and `y` positions,
`x` and `y` velocities, lander angle and angular velocity, and left and right ground contact. Note that these 8 scalars
provide a full observation of the state.
## Implementation
For this tutorial, we'll use the readily available `gym_plugin`, which includes a
[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a
[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and
[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a
[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`
environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).
The experiment config, similar to the one used for the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:
```python
from typing import Dict, Optional, List, Any, cast
import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from allenact.utils.viz_utils import VizSuite, AgentViewViz
class GymTutorialExperimentConfig(ExperimentConfig):
@classmethod
def tag(cls) -> str:
return "GymTutorial"
```
### Sensors and Model
As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide
full observations from the state of the `gym` environment to our model.
```python
SENSORS = [
GymBox2DSensor("LunarLanderContinuous-v2", uuid="gym_box_data"),
]
```
We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,
[MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since
this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a
[Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.
```python
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return MemorylessActorCritic(
input_uuid="gym_box_data",
action_space=gym.spaces.Box(
-1.0, 1.0, (2,)
), # 2 actors, each in the range [-1.0, 1.0]
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
```
### Task samplers
We use an available `TaskSampler` implementation for `gym` environments that allows to sample
[GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):
[GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task
sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created
above, which contain a custom identifier for the actual observation space (`gym_box_data`) also used by the model.
```python
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(**kwargs)
```
For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three
modes, `train, valid, test`:
```python
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="train", seeds=seeds
)
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="valid", seeds=seeds
)
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)
```
Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,
during testing (or validation), we sample a fixed number of tasks.
```python
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 3
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["LunarLanderContinuous-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
```
Note that we just sample 3 tasks for validation and testing in this case, which suffice to illustrate the model's
success.
### Machine parameters
Given the simplicity of the task and model, we can just train the model on the CPU. During training, success should
reach 100% in less than 10 minutes, whereas solving the task (evaluation reward > 200) might take about 20 minutes
(on a laptop CPU).
We allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to
CPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode.
```python
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
visualizer = None
if mode == "test":
visualizer = VizSuite(
mode=mode,
video_viz=AgentViewViz(
label="episode_vid",
max_clip_length=400,
vector_task_source=("render", {"mode": "rgb_array"}),
fps=30,
),
)
return {
"nprocesses": 8 if mode == "train" else 1,
"devices": [],
"visualizer": visualizer,
}
```
### Training pipeline
The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate
and 80 single-batch update repeats per rollout:
```python
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
ppo_steps = int(1.2e6)
return TrainingPipeline(
named_losses=dict(
ppo_loss=PPO(clip_param=0.2, value_loss_coef=0.5, entropy_coef=0.0,),
), # type:ignore
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-3)),
num_mini_batch=1,
update_repeats=80,
max_grad_norm=100,
num_steps=2000,
gamma=0.99,
use_gae=False,
gae_lambda=0.95,
advance_scene_rollout_period=None,
save_interval=200000,
metric_accumulate_interval=50000,
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}, # type:ignore
),
)
```
## Training and validation
We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_tutorial.py`.
To start training from scratch, we just need to invoke
```bash
PYTHONPATH=. python allenact/main.py gym_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_output -s 54321 -e
```
from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.
If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/gym_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).
After 1,200,000 steps, the script will terminate. If everything went well, the `valid` success rate should quickly
converge to 1 and the mean reward to above 250, while the average episode length should stay below or near 300.
## Testing
The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the
directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:
```bash
PYTHONPATH=. python allenact/main.py gym_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_output \
-s 54321 \
-e \
--eval \
--checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \
--approx_ckpt_step_interval 800000 # Skip some checkpoints
```
The option `--approx_ckpt_step_interval 800000` tells AllenAct that we only want to evaluate checkpoints
which were saved every ~800000 steps, this lets us avoid evaluating every saved checkpoint. If everything went well,
the `test` success rate should converge to 1, the episode length below or near 300 steps, and the mean reward to above
250. The images tab in tensorboard will contain videos for the sampled test episodes.
.
If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running
remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display
available:
```bash
DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_output \
-s 54321 \
-e \
--eval \
--checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \
--approx_ckpt_step_interval 800000
```
================================================
FILE: docs/tutorials/index.md
================================================
# AllenAct Tutorials
**Note** The provided commands to execute these tutorials assume you have
[installed the full library](../installation/installation-allenact.md#full-library)
and the specific requirements for each used plugin.
We provide several tutorials to help ramp up researchers to the field of Embodied-AI as well as to the AllenAct framework.
## [Navigation in MiniGrid](../tutorials/minigrid-tutorial.md)

We train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the [MiniGrid](https://github.com/maximecb/gym-minigrid) environment.
This tutorial presents:
* Writing an experiment configuration file with a simple training pipeline from scratch.
* Using one of the supported environments with minimal user effort.
* Training, validation and testing your experiment from the command line.
[Follow the tutorial here.](../tutorials/minigrid-tutorial.md)
## [PointNav in RoboTHOR](../tutorials/training-a-pointnav-model.md)

We train an agent on the Point Navigation task within the RoboTHOR Embodied-AI environment.
This tutorial presents:
* The basics of the Point Navigation task, a common task in Embodied AI
* Using an external dataset
* Writing an experiment configuration file with a simple training pipeline from scratch.
* Use one of the supported environments with minimal user effort.
* Train, validate and test your experiment from the command line.
* Testing a pre-trained model
[Follow the tutorial here.](../tutorials/training-a-pointnav-model.md)
## [Swapping in a new environment](../tutorials/transfering-to-a-different-environment-framework.md)

This tutorial demonstrates how easy it is modify the experiment config created in the RoboTHOR PointNav tutorial to work with the iTHOR and Habitat environments.
[Follow the tutorial here.](../tutorials/transfering-to-a-different-environment-framework.md)
## [Using a pretrained model](../tutorials/running-inference-on-a-pretrained-model.md)

This tutorial shows how to run inference on one or more checkpoints of a pretrained model and generate
visualizations of different types.
[Follow the tutorial here.](../tutorials/running-inference-on-a-pretrained-model.md)
## [Off-policy training](../tutorials/offpolicy-tutorial.md)
This tutorial shows how to train an Actor using an off-policy dataset with expert actions.
[Follow the tutorial here.](../tutorials/offpolicy-tutorial.md)
## [OpenAI gym for continuous control](../tutorials/gym-tutorial.md)

We train an agent to complete the `LunarLanderContinuous-v2` task from
[OpenAI gym](https://gym.openai.com/envs/LunarLanderContinuous-v2).
This tutorial presents:
* A `gym` plugin fopr `AllenAct`.
* A continuous control example with multiple actors using PPO.
[Follow the tutorial here.](../tutorials/gym-tutorial.md)
## [Multi-node training for RoboTHOR ObjectNav](../tutorials/distributed-objectnav-tutorial.md)

We train an agent to navigate to an object in a fraction of the time
required for training in one node by distributing training across multiple
nodes.
This tutorial presents:
1. The AllenAct API for training across multiple nodes, as well as
experimental scripts for distributed configuration, training start
and termination, and remote command execution.
2. The introduction of the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenaAct`.
[Follow the tutorial here.](../tutorials/distributed-objectnav-tutorial.md)
================================================
FILE: docs/tutorials/minigrid-tutorial.md
================================================
# Tutorial: Navigation in MiniGrid.
In this tutorial, we will train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the
[MiniGrid](https://github.com/maximecb/gym-minigrid) environment. We will demonstrate how to:
* Write an experiment configuration file with a simple training pipeline from scratch.
* Use one of the supported environments with minimal user effort.
* Train, validate and test your experiment from the command line.
This tutorial assumes the [installation instructions](../installation/installation-allenact.md) have already been
followed and that, to some extent, this framework's [abstractions](../getting_started/abstractions.md) are known.
The `extra_requirements` for `minigrid_plugin` and `babyai_plugin` can be installed with.
```bash
pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt; pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt
```
## The task
A `MiniGrid-Empty-Random-5x5-v0` task consists of a grid of dimensions 5x5 where an agent spawned at a random
location and orientation has to navigate to the visitable bottom right corner cell of the grid by sequences of three
possible actions (rotate left/right and move forward). A visualization of the environment with expert steps in a random
`MiniGrid-Empty-Random-5x5-v0` task looks like

The observation for the agent is a subset of the entire grid, simulating a simplified limited field of view, as
depicted by the highlighted rectangle (observed subset of the grid) around the agent (red arrow). Gray cells correspond
to walls.
## Experiment configuration file
Our complete experiment consists of:
* Training a basic actor-critic agent with memory to solve randomly sampled navigation tasks.
* Validation on a fixed set of tasks (running in parallel with training).
* A second stage where we test saved checkpoints with a larger fixed set of tasks.
The entire configuration for the experiment, including training, validation, and testing, is encapsulated in a single
class implementing the `ExperimentConfig` abstraction. For this tutorial, we will follow the config under
`projects/tutorials/minigrid_tutorial.py`.
The `ExperimentConfig` abstraction is used by the
[OnPolicyTrainer](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicytrainer) class (for training) and the
[OnPolicyInference](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicyinference) class (for validation and testing)
invoked through the entry script `main.py` that calls an orchestrating
[OnPolicyRunner](../api/allenact/algorithms/onpolicy_sync/runner.md#onpolicyrunner) class. It includes:
* A `tag` method to identify the experiment.
* A `create_model` method to instantiate actor-critic models.
* A `make_sampler_fn` method to instantiate task samplers.
* Three `{train,valid,test}_task_sampler_args` methods describing initialization parameters for task samplers used in
training, validation, and testing; including assignment of workers to devices for simulation.
* A `machine_params` method with configuration parameters that will be used for training, validation, and testing.
* A `training_pipeline` method describing a possibly multi-staged training pipeline with different types of losses,
an optimizer, and other parameters like learning rates, batch sizes, etc.
### Preliminaries
We first import everything we'll need to define our experiment.
```python
from typing import Dict, Optional, List, Any, cast
import gym
from gym_minigrid.envs import EmptyRandomEnv5x5
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvRNN
from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor
from allenact_plugins.minigrid_plugin.minigrid_tasks import (
MiniGridTaskSampler,
MiniGridTask,
)
```
We now create the `MiniGridTutorialExperimentConfig` class which we will use to define our experiment.
For pedagogical reasons, we will add methods to this class one at a time below with a description of what
these classes do.
```python
class MiniGridTutorialExperimentConfig(ExperimentConfig):
```
An experiment is identified by a `tag`.
```python
@classmethod
def tag(cls) -> str:
return "MiniGridTutorial"
```
### Sensors and Model
A readily available Sensor type for MiniGrid,
[EgocentricMiniGridSensor](../api/allenact_plugins/minigrid_plugin/minigrid_sensors.md#egocentricminigridsensor),
allows us to extract observations in a format consumable by an `ActorCriticModel` agent:
```python
SENSORS = [
EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),
]
```
The three `view_channels` include objects, colors and states corresponding to a partial observation of the environment
as an image tensor, equivalent to that from `ImgObsWrapper` in
[MiniGrid](https://github.com/maximecb/gym-minigrid#wrappers). The
relatively large `agent_view_size` means the view will only be clipped by the environment walls in the forward and
lateral directions with respect to the agent's orientation.
We define our `ActorCriticModel` agent using a lightweight implementation with recurrent memory for MiniGrid
environments, [MiniGridSimpleConvRNN](../api/allenact_plugins/minigrid_plugin/minigrid_models.md#minigridsimpleconvrnn):
```python
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return MiniGridSimpleConvRNN(
action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())),
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
num_objects=cls.SENSORS[0].num_objects,
num_colors=cls.SENSORS[0].num_colors,
num_states=cls.SENSORS[0].num_states,
)
```
### Task samplers
We use an available TaskSampler implementation for MiniGrid environments that allows to sample both random and
deterministic `MiniGridTasks`,
[MiniGridTaskSampler](../api/allenact_plugins/minigrid_plugin/minigrid_tasks.md#minigridtasksampler):
```python
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return MiniGridTaskSampler(**kwargs)
```
This task sampler will during training (or validation/testing), randomly initialize new tasks for the agent to complete.
While it is not quite as important for this task type (as we test our agent in the same setting it is trained on) there
are a lot of good reasons we would like to sample tasks differently during training than during validation or testing.
One good reason, that is applicable in this tutorial, is that, during training, we would like to be able to sample tasks
forever while, during testing, we would like to sample a fixed number of tasks (as otherwise we would never finish
testing!). In `allenact` this is made possible by defining different arguments for the task sampler:
```python
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="train")
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="valid")
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="test")
```
where, for convenience, we have defined a `_get_sampler_args` method:
```python
def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 20 + 20 * (mode == "test") # 20 tasks for valid, 40 for test
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
max_tasks=max_tasks, # see above
env_class=self.make_env, # builder for third-party environment (defined below)
sensors=self.SENSORS, # sensors used to return observations to the agent
env_info=dict(), # parameters for environment builder (none for now)
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
)
@staticmethod
def make_env(*args, **kwargs):
return EmptyRandomEnv5x5()
```
Note that the `env_class` argument to the Task Sampler is the one determining which task type we are going to train the
model for (in this case, `MiniGrid-Empty-Random-5x5-v0` from
[gym-minigrid](https://github.com/maximecb/gym-minigrid#empty-environment))
. The sparse reward is
[given by the environment](https://github.com/maximecb/gym-minigrid/blob/6e22a44dc67414b647063692258a4f95ce789161/gym_minigrid/minigrid.py#L819)
, and the maximum task length is 100. For training, we opt for a default random sampling, whereas for validation and
test we define fixed sets of randomly sampled tasks without needing to explicitly define a dataset.
In this toy example, the maximum number of different tasks is 32. For validation we sample 320 tasks using 16 samplers,
or 640 for testing, so we can be fairly sure that all possible tasks are visited at least once during evaluation.
### Machine parameters
Given the simplicity of the task and model, we can quickly train the model on the CPU:
```python
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
return {
"nprocesses": 128 if mode == "train" else 16,
"devices": [],
}
```
We allocate a larger number of samplers for training (128) than for validation or testing (16), and we default to CPU
usage by returning an empty list of `devices`.
### Training pipeline
The last definition required before starting to train is a training pipeline. In this case, we just use a single PPO
stage with linearly decaying learning rate:
```python
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
ppo_steps = int(150000)
return TrainingPipeline(
named_losses=dict(ppo_loss=PPO(**PPOConfig)), # type:ignore
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
num_mini_batch=4,
update_repeats=3,
max_grad_norm=0.5,
num_steps=16,
gamma=0.99,
use_gae=True,
gae_lambda=0.95,
advance_scene_rollout_period=None,
save_interval=10000,
metric_accumulate_interval=1,
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore
),
)
```
You can see that we use a `Builder` class to postpone the construction of some of the elements, like the optimizer,
for which the model weights need to be known.
## Training and validation
We have a complete implementation of this experiment's configuration class in `projects/tutorials/minigrid_tutorial.py`.
To start training from scratch, we just need to invoke
```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o /PATH/TO/minigrid_output -s 12345
```
from the `allenact` root directory.
* With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file
will be found in the `projects/tutorials` directory.
* With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers).
* With `-o minigrid_output` we set the output folder into which results and logs will be saved.
* With `-s 12345` we set the random seed.
If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/minigrid_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).
After 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder.
The training curves should look similar to:

If everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4.
(For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the
not-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example
with a different random seed). The validation curves should look similar to:

## Testing
The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a particular checkpoint, we need to pass the `--eval` flag and specify the checkpoint with the
`--checkpoint CHECKPOINT_PATH` option:
```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/minigrid_output \
-s 12345 \
--eval \
--checkpoint /PATH/TO/minigrid_output/checkpoints/MiniGridTutorial/YOUR_START_DATE/exp_MiniGridTutorial__stage_00__steps_000000151552.pt
```
Again, if everything went well, the `test` success rate should converge to 1 and the mean episode length to a value
below 4. Detailed results are saved under a `metrics` subfolder in the output folder.
The test curves should look similar to:

================================================
FILE: docs/tutorials/offpolicy-tutorial.md
================================================
# Tutorial: Off-policy training.
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the `extra_requirements`
for the `babyai_plugin` and `minigrid_plugin`. The latter can be installed with:
```bash
pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt; pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt
```
In this tutorial we'll learn how to train an agent from an external dataset by imitating expert actions via
Behavior Cloning. We'll use a [BabyAI agent](/api/allenact_plugins/babyai_plugin/babyai_models#BabyAIRecurrentACModel) to solve
`GoToLocal` tasks on [MiniGrid](https://github.com/maximecb/gym-minigrid); see the
`projects/babyai_baselines/experiments/go_to_local` directory for more details.
This tutorial assumes `AllenAct`'s [abstractions](../getting_started/abstractions.md) are known.
## The task
In a `GoToLocal` task, the agent immersed in a grid world has to navigate to a specific object in the presence of
multiple distractors, requiring the agent to understand `go to` instructions like "go to the red ball". For further
details, please consult the [original paper](https://arxiv.org/abs/1810.08272).
## Getting the dataset
We will use a large dataset (**more than 4 GB**) including expert demonstrations for `GoToLocal` tasks. To download
the data we'll run
```bash
PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py GoToLocal
```
from the project's root directory, which will download `BabyAI-GoToLocal-v0.pkl` and `BabyAI-GoToLocal-v0_valid.pkl` to
the `allenact_plugins/babyai_plugin/data/demos` directory.
We will also generate small versions of the datasets, which will be useful if running on CPU, by calling
```bash
PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py
```
from the project's root directory, which will generate `BabyAI-GoToLocal-v0-small.pkl` under the same
`allenact_plugins/babyai_plugin/data/demos` directory.
## Data storage
In order to train with an off-policy dataset, we need to define an `ExperienceStorage`. In AllenAct, an
`ExperienceStorage` object has two primary functions:
1. It stores/manages relevant data (e.g. similarly to the `Dataset` class in PyTorch).
2. It loads stored data into batches that will be used for loss computation (e.g. similarly to the `Dataloader`
class in PyTorch).
Unlike a PyTorch `Dataset` however, an `ExperienceStorage` object can build its dataset **at runtime** by processing
rollouts from the agent. This flexibility allows for us to, for exmaple, implement the experience replay datastructure
used in deep Q-learning. For this tutorial we won't need this additional functionality as our off-policy dataset
is a fixed collection of expert trajectories.
An example of a `ExperienceStorage` for BabyAI expert demos might look as follows:
```python
class MiniGridExpertTrajectoryStorage(ExperienceStorage, StreamingStorageMixin):
def __init__(
self,
data_path: str,
num_samplers: int,
rollout_len: int,
instr_len: Optional[int],
restrict_max_steps_in_dataset: Optional[int] = None,
device: torch.device = torch.device("cpu"),
):
...
def data(self) -> List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]:
...
def set_partition(self, index: int, num_parts: int):
...
def initialize(self, *, observations: ObservationType, **kwargs):
...
def add(
self,
observations: ObservationType,
memory: Optional[Memory],
actions: torch.Tensor,
action_log_probs: torch.Tensor,
value_preds: torch.Tensor,
rewards: torch.Tensor,
masks: torch.Tensor,
):
...
def to(self, device: torch.device):
...
def total_experiences(self) -> int:
...
def reset_stream(self):
...
def empty(self) -> bool:
...
def _get_next_ind(self):
...
def _fill_rollout_queue(self, q: queue.Queue, sampler: int):
...
def get_data_for_rollout_ind(self, sampler_ind: int) -> Dict[str, np.ndarray]:
...
def next_batch(self) -> Dict[str, torch.Tensor]:
...
```
A complete example can be found in
[MiniGridExpertTrajectoryStorage](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridExpertTrajectoryStorage).
## Loss function
Off-policy losses must implement the
[`GenericAbstractLoss`](/api/allenact/base_abstractions/misc/#genericabstractloss)
interface. In this case, we minimize the cross-entropy between the actor's policy and the expert action:
```python
class MiniGridOffPolicyExpertCELoss(GenericAbstractLoss):
def __init__(self, total_episodes_in_epoch: Optional[int] = None):
super().__init__()
self.total_episodes_in_epoch = total_episodes_in_epoch
def loss( # type: ignore
self,
*, # No positional arguments
model: ModelType,
batch: ObservationType,
batch_memory: Memory,
stream_memory: Memory,
) -> LossOutput:
rollout_len, nrollouts = cast(torch.Tensor, batch["minigrid_ego_image"]).shape[
:2
]
# Initialize Memory if empty
if len(stream_memory) == 0:
spec = model.recurrent_memory_specification
for key in spec:
dims_template, dtype = spec[key]
# get sampler_dim and all_dims from dims_template (and nrollouts)
dim_names = [d[0] for d in dims_template]
sampler_dim = dim_names.index("sampler")
all_dims = [d[1] for d in dims_template]
all_dims[sampler_dim] = nrollouts
stream_memory.check_append(
key=key,
tensor=torch.zeros(
*all_dims,
dtype=dtype,
device=cast(torch.Tensor, batch["minigrid_ego_image"]).device,
),
sampler_dim=sampler_dim,
)
# Forward data (through the actor and critic)
ac_out, stream_memory = model.forward(
observations=batch,
memory=stream_memory,
prev_actions=None, # type:ignore
masks=cast(torch.FloatTensor, batch["masks"]),
)
# Compute the loss from the actor's output and expert action
expert_ce_loss = -ac_out.distributions.log_prob(batch["expert_action"]).mean()
info = {"expert_ce": expert_ce_loss.item()}
return LossOutput(
value=expert_ce_loss,
info=info,
per_epoch_info={},
batch_memory=batch_memory,
stream_memory=stream_memory,
bsize=rollout_len * nrollouts,
)
```
A complete example can be found in
[MiniGridOffPolicyExpertCELoss](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridOffPolicyExpertCELoss).
Note that in this case we train the entire actor, but it would also be possible to forward data through a different
subgraph of the ActorCriticModel.
## Experiment configuration
For the experiment configuration, we'll build on top of an existing
[base BabyAI GoToLocal Experiment Config](/api/projects/babyai_baselines/experiments/go_to_local/base/#basebabyaigotolocalexperimentconfig).
The complete `ExperimentConfig` file for off-policy training is
[here](/api/projects/tutorials/minigrid_offpolicy_tutorial/#bcoffpolicybabyaigotolocalexperimentconfig), but let's
focus on the most relevant aspect to enable this type of training:
providing an [OffPolicyPipelineComponent](/api/allenact/utils/experiment_utils/#offpolicypipelinecomponent) object as input to a
`PipelineStage` when instantiating the `TrainingPipeline` in the `training_pipeline` method.
```python
class BCOffPolicyBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
"""BC Off-policy imitation."""
DATASET: Optional[List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]] = None
GPU_ID = 0 if torch.cuda.is_available() else None
@classmethod
def tag(cls):
return "BabyAIGoToLocalBCOffPolicy"
@classmethod
def METRIC_ACCUMULATE_INTERVAL(cls):
# See BaseBabyAIGoToLocalExperimentConfig for how this is used.
return 1
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=-1)
num_mini_batch = ppo_info["num_mini_batch"]
update_repeats = ppo_info["update_repeats"]
# fmt: off
return cls._training_pipeline(
named_losses={
"offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss(
total_episodes_in_epoch=int(1e6)
),
},
named_storages={
"onpolicy": RolloutBlockStorage(),
"minigrid_offpolicy_expert": MiniGridExpertTrajectoryStorage(
data_path=os.path.join(
BABYAI_EXPERT_TRAJECTORIES_DIR,
"BabyAI-GoToLocal-v0{}.pkl".format(
"" if torch.cuda.is_available() else "-small"
),
),
num_samplers=cls.NUM_TRAIN_SAMPLERS,
rollout_len=cls.ROLLOUT_STEPS,
instr_len=cls.INSTR_LEN,
),
},
pipeline_stages=[
# Single stage, only with off-policy training
PipelineStage(
loss_names=["offpolicy_expert_ce_loss"], # no on-policy losses
max_stage_steps=total_train_steps, # keep sampling episodes in the stage
stage_components=[
StageComponent(
uuid="offpolicy",
storage_uuid="minigrid_offpolicy_expert",
loss_names=["offpolicy_expert_ce_loss"],
training_settings=TrainingSettings(
update_repeats=num_mini_batch * update_repeats,
num_mini_batch=1,
)
)
],
),
],
# As we don't have any on-policy losses, we set the next
# two values to zero to ensure we don't attempt to
# compute gradients for on-policy rollouts:
num_mini_batch=0,
update_repeats=0,
total_train_steps=total_train_steps,
)
# fmt: on
```
You'll have noted that it is possible to combine on-policy and off-policy training in the same stage, even though here
we apply pure off-policy training.
## Training
We recommend using a machine with a CUDA-capable GPU for this experiment. In order to start training, we just need to
invoke
```bash
PYTHONPATH=. python allenact/main.py -b projects/tutorials minigrid_offpolicy_tutorial -m 8 -o
```
Note that with the `-m 8` option we limit to 8 the number of on-policy task sampling processes used between off-policy
updates.
If everything goes well, the training success should quickly reach values around 0.7-0.8 on GPU and converge to values
close to 1 if given sufficient time to train.
If running tensorboard, you'll notice a separate group of scalars named `train-offpolicy-losses` and
`train-offpolicy-misc` with losses, approximate "experiences per second" (i.e. the number of off-policy experiences/steps
being used to update the model per second), and other tracked values in addition to the standard `train-onpolicy-*`
used for on-policy training. In the `train-metrics` and `train-misc` sections you'll find the metrics
quantifying the performance of the agent throughout training and some other plots showing training details.
*Note that the x-axis for these plots is different than for the `train-offpolicy-*` sections*. This
is because these plots use the number of rollout steps as the x-axis (i.e. steps that the trained agent
takes interactively) while the `train-offpolicy-*` plots uses the number of offpolicy "experiences" that have
been shown to the agent.
A view of the training progress about 5 hours after starting on a CUDA-capable GPU should look similar to the below
(note that training reached >99% success after about 50 minutes).

================================================
FILE: docs/tutorials/running-inference-on-a-pretrained-model.md
================================================
# Tutorial: Inference with a pre-trained model.
In this tutorial we will run inference on a pre-trained model for the PointNav task
in the RoboTHOR environment. In this task the agent is tasked with going to a specific location
within a realistic 3D environment.
For information on how to train a PointNav Model see [this tutorial](training-a-pointnav-model.md)
We will need to [install the full AllenAct library](../installation/installation-allenact.md#full-library),
the `robothor_plugin` requirements via
```bash
pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt
```
and [download the
RoboTHOR Pointnav dataset](../installation/download-datasets.md) before we get started.
For this tutorial we will download the weights of a model trained on the debug dataset.
This can be done with a handy script in the `pretrained_model_ckpts` directory:
```bash
bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-pointnav-rgb-resnet
```
This will download the weights for an RGB model that has been
trained on the PointNav task in RoboTHOR to `pretrained_model_ckpts/robothor-pointnav-rgb-resnet`
Next we need to run the inference, using the PointNav experiment config from the
[tutorial on making a PointNav experiment](training-a-pointnav-model.md).
We can do this with the following command:
```bash
PYTHONPATH=. python allenact/main.py -o -b -c --eval
```
Where `` is the location where the results of the test will be dumped, `` is the
location of the downloaded model weights, and `` is a path to the directory where
our experiment definition is stored.
For our current setup the following command would work:
```bash
PYTHONPATH=. python allenact/main.py \
training_a_pointnav_model \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \
--eval
```
For testing on all saved checkpoints we pass a directory to `--checkpoint` rather than just a single file:
```bash
PYTHONPATH=. python allenact/main.py \
training_a_pointnav_model \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30
--eval
```
## Visualization
We also show examples of visualizations that can be extracted from the `"valid"` and `"test"` modes. Currently,
visualization is still undergoing design changes and does not support multi-agent tasks, but the available functionality
is sufficient for pointnav in RoboThor.
Following up on the example above, we can make a specialized pontnav `ExperimentConfig` where we instantiate
the base visualization class, `VizSuite`, defined in
[`allenact.utils.viz_utils`](https://github.com/allenai/allenact/tree/master/allenact/utils/viz_utils.py), when in `test` mode.
Each visualization type can be thought of as a plugin to the base `VizSuite`. For example, all `episode_ids` passed to
`VizSuite` will be processed with each of the instantiated visualization types (possibly with the exception of the
`AgentViewViz`). In the example below we show how to instantiate different visualization types from 4 different data
sources.
The data sources available to `VizSuite` are:
* Task output (e.g. 2D trajectories)
* Vector task (e.g. egocentric views)
* Rollout storage (e.g. recurrent memory, taken action logprobs...)
* `ActorCriticOutput` (e.g. action probabilities)
The visualization types included below are:
* `TrajectoryViz`: Generic 2D trajectory view.
* `AgentViewViz`: RGB egocentric view.
* `ActorViz`: Action probabilities from `ActorCriticOutput[CategoricalDistr]`.
* `TensorViz1D`: Evolution of a point from RolloutStorage over time.
* `TensorViz2D`: Evolution of a vector from RolloutStorage over time.
* `ThorViz`: Specialized 2D trajectory view
[for RoboThor](https://github.com/allenai/allenact/tree/master/allenact_plugins/robothor_plugin/robothor_viz.py).
Note that we need to explicitly set the `episode_ids` that we wish to visualize. For `AgentViewViz` we have the option
of using a different (typically shorter) list of episodes or enforce the ones used for the rest of visualizations.
```python
class PointNavRoboThorRGBPPOVizExperimentConfig(PointNavRoboThorRGBPPOExperimentConfig):
"""ExperimentConfig used to demonstrate how to set up visualization code.
# Attributes
viz_ep_ids : Scene names that will be visualized.
viz_video_ids : Scene names that will have videos visualizations associated with them.
"""
viz_ep_ids = [
"FloorPlan_Train1_1_3",
"FloorPlan_Train1_1_4",
"FloorPlan_Train1_1_5",
"FloorPlan_Train1_1_6",
]
viz_video_ids = [["FloorPlan_Train1_1_3"], ["FloorPlan_Train1_1_4"]]
viz: Optional[VizSuite] = None
def get_viz(self, mode):
if self.viz is not None:
return self.viz
self.viz = VizSuite(
episode_ids=self.viz_ep_ids,
mode=mode,
# Basic 2D trajectory visualizer (task output source):
base_trajectory=TrajectoryViz(
path_to_target_location=("task_info", "target",),
),
# Egocentric view visualizer (vector task source):
egeocentric=AgentViewViz(
max_video_length=100, episode_ids=self.viz_video_ids
),
# Default action probability visualizer (actor critic output source):
action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),
# Default taken action logprob visualizer (rollout storage source):
taken_action_logprobs=TensorViz1D(),
# Same episode mask visualizer (rollout storage source):
episode_mask=TensorViz1D(rollout_source=("masks",)),
# Default recurrent memory visualizer (rollout storage source):
rnn_memory=TensorViz2D(rollout_source=("memory", "single_belief")),
# Specialized 2D trajectory visualizer (task output source):
thor_trajectory=ThorViz(
figsize=(16, 8),
viz_rows_cols=(448, 448),
scenes=("FloorPlan_Train{}_{}", 1, 1, 1, 1),
),
)
return self.viz
def machine_params(self, mode="train", **kwargs):
res = super().machine_params(mode, **kwargs)
if mode == "test":
res.set_visualizer(self.get_viz(mode))
return res
```
Running test on the same downloaded models, but using the visualization-enabled `ExperimentConfig` with
```bash
PYTHONPATH=. python allenact/main.py \
running_inference_tutorial \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \
--eval
```
generates different types of visualization and logs them in tensorboard. If everything is properly setup and
tensorboard includes the `robothor-pointnav-rgb-resnet` folder, under the `IMAGES` tab, we should see something similar
to

================================================
FILE: docs/tutorials/training-a-pointnav-model.md
================================================
# Tutorial: PointNav in RoboTHOR.

## Introduction
One of the most obvious tasks that an embodied agent should master is navigating the world it inhabits.
Before we can teach a robot to cook or clean it first needs to be able to move around. The simplest
way to formulate "moving around" into a task is by making your agent find a beacon somewhere in the environment.
This beacon transmits its location, such that at any time, the agent can get the direction and euclidian distance
to the beacon. This particular task is often called Point Navigation, or **PointNav** for short.
#### PointNav
At first glance, this task seems trivial. If the agent is given the direction and distance of the target at
all times, can it not simply follow this signal directly? The answer is no, because agents are often trained
on this task in environments that emulate real-world buildings which are not wide-open spaces, but rather
contain many smaller rooms. Because of this, the agent has to learn to navigate human spaces and use doors
and hallways to efficiently navigate from one side of the building to the other. This task becomes particularly
difficult when the agent is tested in an environment that it is not trained in. If the agent does not know
how the floor plan of an environment looks, it has to learn to predict the design of man-made structures,
to efficiently navigate across them, much like how people instinctively know how to move around a building
they have never seen before based on their experience navigating similar buildings.
#### What is an environment anyways?
Environments are worlds in which embodied agents exist. If our embodied agent is simply a neural network that is being
trained in a simulator, then that simulator is its environment. Similarly, if our agent is a
physical robot then its environment is the real world. The agent interacts with the environment by taking one
of several available actions (such as "move forward", or "turn left"). After each action, the environment
produces a new frame that the agent can analyze to determine its next step. For many tasks, including PointNav
the agent also has a special "stop" action which indicates that the agent thinks it has reached the target.
After this action is called the agent will be reset to a new location, regardless if it reached the
target. The hope is that after enough training the agent will learn to correctly assess that it has successfully
navigated to the target.

There are many simulators designed for the training
of embodied agents. In this tutorial, we will be using a simulator called [RoboTHOR](https://ai2thor.allenai.org/robothor/),
which is designed specifically to train models that can easily be transferred to a real robot, by providing a
photo-realistic virtual environment and a real-world replica of the environment that researchers can have access to.
RoboTHOR contains 60 different virtual scenes with different floor plans and furniture and 15 validation scenes.
It is also important to mention that **AllenAct**
has a class abstraction called Environment. This is not the actual simulator game engine or robotics controller,
but rather a shallow wrapper that provides a uniform interface to the actual environment.
#### Learning algorithm
Finally, let us briefly touch on the algorithm that we will use to train our embodied agent to navigate. While
*AllenAct* offers us great flexibility to train models using complex pipelines, we will be using a simple
pure reinforcement learning approach for this tutorial. More specifically, we will be using DD-PPO,
a decentralized and distributed variant of the ubiquitous PPO algorithm. For those unfamiliar with Reinforcement
Learning we highly recommend [this tutorial](http://karpathy.github.io/2016/05/31/rl/) by Andrej Karpathy, and [this
book](http://www.incompleteideas.net/book/the-book-2nd.html) by Sutton and Barto. Essentially what we are doing
is letting our agent explore the environment on its own, rewarding it for taking actions that bring it closer
to its goal and penalizing it for actions that take it away from its goal. We then optimize the agent's model
to maximize this reward.
## Requirements
To train the model on the PointNav task, we need to [install the RoboTHOR environment](../installation/installation-framework.md)
and [download the RoboTHOR PointNav dataset](../installation/download-datasets.md)
The dataset contains a list of episodes with thousands of randomly generated starting positions and target locations for each of the scenes
as well as a precomputed cache of distances, containing the shortest path from each point in a scene, to every other point in that scene.
This is used to reward the agent for moving closer to the target in terms of geodesic distance - the actual path distance (as opposed to a
straight line distance).
## Config File Setup
Now comes the most important part of the tutorial, we are going to write an experiment config file.
If this is your first experience with experiment config files in AllenAct, we suggest that you
first see our how-to on [defining an experiment](../howtos/defining-an-experiment.md) which will
walk you through creating a simplified experiment config file.
Unlike a library that can be imported into python, **AllenAct** is structured as a framework with a runner script called
`main.py` which will run the experiment specified in a config file. This design forces us to keep meticulous records of
exactly which settings were used to produce a particular result,
which can be very useful given how expensive RL models are to train.
The `projects/` directory is home to different projects using `AllenAct`. Currently it is populated with baselines
of popular tasks and tutorials.
We already have all the code for this tutorial stored in `projects/tutorials/training_a_pointnav_model.py`. We will
be using this file to run our experiments, but you can create a new directory in `projects/` and start writing your
experiment there.
We start off by importing everything we will need:
```python
import glob
import os
from math import ceil
from typing import Dict, Any, List, Optional, Sequence
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.utils.experiment_utils import (
Builder,
PipelineStage,
TrainingPipeline,
LinearDecay,
evenly_distribute_count_into_bins,
)
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
PointNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
from projects.pointnav_baselines.models.point_nav_models import (
ResnetTensorPointNavActorCritic,
)
```
Next we define a new experiment config class:
```python
class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig):
"""A Point Navigation experiment configuration in RoboThor."""
```
We then define the task parameters. For PointNav, these include the maximum number of steps our agent
can take before being reset (this prevents the agent from wandering on forever), and a configuration
for the reward function that we will be using.
```python
# Task Parameters
MAX_STEPS = 500
REWARD_CONFIG = {
"step_penalty": -0.01,
"goal_success_reward": 10.0,
"failed_stop_reward": 0.0,
"shaping_weight": 1.0,
}
```
In this case, we set the maximum number of steps to 500.
We give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal
in as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination.
If the agent selects the `stop` action without reaching the target we do not punish it (although this is
sometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves
closer to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should
be and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around
with them.
Next, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render
every frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set
to a 224 by 224 box).
```python
# Simulator Parameters
CAMERA_WIDTH = 640
CAMERA_HEIGHT = 480
SCREEN_SIZE = 224
```
Next, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel
processes that will be used to train the model. In general, more processes result in faster training,
but since each process is a unique instance of the environment in which we are training they can take up a
lot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may
need to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to
be the number of training scenes in RoboTHOR, which allows each process to load only a single scene into
memory, saving time and space.
`TRAINING_GPUS` takes the ids of the GPUS on which
the model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which
the validation and testing will occur. During training, a validation process is constantly running and evaluating
the current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea.
If our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default
to running everything on the CPU with only 1 process.
```python
ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
NUM_PROCESSES = 20
TRAINING_GPUS: Sequence[int] = [0]
VALIDATION_GPUS: Sequence[int] = [0]
TESTING_GPUS: Sequence[int] = [0]
```
Since we are using a dataset to train our model we need to define the path to where we have stored it. If we
download the dataset instructed above we can define the path as follows
```python
TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug")
VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug")
```
Next, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the
raw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we
specified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks
the point our agent needs to move to. It tells us the direction and distance to our goal at every time step.
```python
SENSORS = [
RGBSensorThor(
height=SCREEN_SIZE,
width=SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GPSCompassSensorRoboThor(),
]
```
For the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct*
the preprocessor abstraction is designed with large models with frozen weights in mind. These models often
hail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a
complex embedding, which then gets stored and used as input to our trainable model instead of the original image.
Most other preprocessing work is done in the sensor classes (as we just saw with the RGB
sensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should
use this abstraction.
```python
PREPROCESSORS = [
Builder(
ResNetPreprocessor,
{
"input_height": SCREEN_SIZE,
"input_width": SCREEN_SIZE,
"output_width": 7,
"output_height": 7,
"output_dims": 512,
"pool": False,
"torchvision_resnet_model": models.resnet18,
"input_uuids": ["rgb_lowres"],
"output_uuid": "rgb_resnet",
},
),
]
```
Next, we must define all of the observation inputs that our model will use. These are just
the hardcoded ids of the sensors we are using in the experiment.
```python
OBSERVATIONS = [
"rgb_resnet",
"target_coordinates_ind",
]
```
Finally, we must define the settings of our simulator. We set the camera dimensions to the values
we defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a
turn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the
agent moves forward, it will do so by 0.25 meters.
```python
ENV_ARGS = dict(
width=CAMERA_WIDTH,
height=CAMERA_HEIGHT,
rotateStepDegrees=30.0,
visibilityDistance=1.0,
gridSize=0.25,
)
```
Now we move on to the methods that we must define to finish implementing an experiment config. Firstly we
have a simple method that just returns the name of the experiment.
```python
@classmethod
def tag(cls):
return "PointNavRobothorRGBPPO"
```
Next, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms
we will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4.
We specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters
respectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval`
sets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how
often we save the model weights and run validation on them.
```python
@classmethod
def training_pipeline(cls, **kwargs):
ppo_steps = int(250000000)
lr = 3e-4
num_mini_batch = 1
update_repeats = 3
num_steps = 30
save_interval = 5000000
log_interval = 1000
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={"ppo_loss": PPO(**PPOConfig)},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
```
The `machine_params` method returns the hardware parameters of each
process, based on the list of devices we defined above.
```python
def machine_params(self, mode="train", **kwargs):
sampler_devices: List[int] = []
if mode == "train":
workers_per_device = 1
gpu_ids = (
[]
if not torch.cuda.is_available()
else list(self.TRAINING_GPUS) * workers_per_device
)
nprocesses = (
8
if not torch.cuda.is_available()
else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
)
sampler_devices = list(self.TRAINING_GPUS)
elif mode == "valid":
nprocesses = 1
gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS
elif mode == "test":
nprocesses = 1
gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
sensor_preprocessor_graph = (
SensorPreprocessorGraph(
source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
preprocessors=self.PREPROCESSORS,
)
if mode == "train"
or (
(isinstance(nprocesses, int) and nprocesses > 0)
or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
)
else None
)
return MachineParams(
nprocesses=nprocesses,
devices=gpu_ids,
sampler_devices=sampler_devices
if mode == "train"
else gpu_ids, # ignored with > 1 gpu_ids
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
```
Now we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch,
so any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which
unsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different
actions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space`
We specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define
the size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and
distance to the target) with `goal_dims`.
```python
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return ResnetTensorPointNavActorCritic(
action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
goal_sensor_uuid="target_coordinates_ind",
rgb_resnet_preprocessor_uuid="rgb_resnet",
hidden_size=512,
goal_dims=32,
)
```
We also need to define the task sampler that we will be using. This is a piece of code that generates instances
of tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting
our tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets
the agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the
`stop` action.
```python
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return PointNavDatasetTaskSampler(**kwargs)
```
You might notice that we did not specify the task sampler's arguments, but are rather passing them in. The
reason for this is that each process will have its own task sampler, and we need to specify exactly which scenes
each process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this.
```python
@staticmethod
def _partition_inds(n: int, num_parts: int):
return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
np.int32
)
def _get_sampler_args_for_scene_split(
self,
scenes_dir: str,
process_ind: int,
total_processes: int,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
path = os.path.join(scenes_dir, "*.json.gz")
scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
if len(scenes) == 0:
raise RuntimeError(
(
"Could find no scene dataset information in directory {}."
" Are you sure you've downloaded them? "
" If not, see https://allenact.org/installation/download-datasets/ information"
" on how this can be done."
).format(scenes_dir)
)
if total_processes > len(scenes): # oversample some scenes -> bias
if total_processes % len(scenes) != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisible by the number of scenes"
)
scenes = scenes * int(ceil(total_processes / len(scenes)))
scenes = scenes[: total_processes * (len(scenes) // total_processes)]
else:
if len(scenes) % total_processes != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisor of the number of scenes"
)
inds = self._partition_inds(len(scenes), total_processes)
return {
"scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
"seed": seeds[process_ind] if seeds is not None else None,
"deterministic_cudnn": deterministic_cudnn,
"rewards_config": self.REWARD_CONFIG,
}
```
The very last things we need to define are the sampler arguments themselves. We define them separately for a train,
validation, and test sampler, but in this case, they are almost the same. The arguments need to include the location
of the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above
and are just referencing here. The only consequential differences between these task samplers are the path to the dataset
we are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since
we want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of
RoboTHOR are private we are also testing on our validation set.
```python
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.TRAIN_DATASET_DIR
res["loop_dataset"] = True
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
res["allow_flipping"] = True
return res
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.VAL_DATASET_DIR, "episodes"),
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.VAL_DATASET_DIR
res["loop_dataset"] = False
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
return res
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.VAL_DATASET_DIR, "episodes"),
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.VAL_DATASET_DIR
res["loop_dataset"] = False
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
return res
```
This is it! If we copy all of the code into a file we should be able to run our experiment!
## Training Model On Debug Dataset
We can test if our installation worked properly by training our model on a small dataset of 4 episodes. This
should take about 20 minutes on a computer with a NVIDIA GPU.
We can now train a model by running:
```bash
PYTHONPATH=. python allenact/main.py -o -c -b
```
If using the same configuration as we have set up, the following command should work:
```bash
PYTHONPATH=. python allenact/main.py training_a_pointnav_model -o storage/robothor-pointnav-rgb-resnet-resnet -b projects/tutorials
```
If we start up a tensorboard server during training and specify that `output_dir=storage` the output should look
something like this:

## Training Model On Full Dataset
We can also train the model on the full dataset by changing back our dataset path and running the same command as above.
But be aware, training this takes nearly 2 days on a machine with 8 GPU.
## Testing Model
To test the performance of a model please refer to [this tutorial](running-inference-on-a-pretrained-model.md).
## Conclusion
In this tutorial, we learned how to create a new PointNav experiment using **AllenAct**. There are many simple
and obvious ways to modify the experiment from here - changing the model, the learning algorithm and the environment
each requires very few lines of code changed in the above file, allowing us to explore our embodied ai research ideas
across different frameworks with ease.
================================================
FILE: docs/tutorials/training-pipelines.md
================================================
# Tutorial: IL to RL with a training pipeline
================================================
FILE: docs/tutorials/transfering-to-a-different-environment-framework.md
================================================
# Tutorial: Swapping in a new environment
**Note** The provided paths in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library).
## Introduction
This tutorial was designed as a continuation of the `Robothor PointNav Tutorial` and explains
how to modify the experiment config created in that tutorial to work with the iTHOR and
Habitat environments.
Cross-platform support is one of the key design goals of `allenact`. This is achieved through
a total decoupling of the environment code from the engine, model and algorithm code, so that
swapping in a new environment is as plug and play as possible. Crucially we will be able to
run a model on different environments without touching the model code at all, which will allow
us to train neural networks in one environment and test them in another.
## RoboTHOR to iTHOR

Since both the `RoboTHOR` and the `iTHOR` environment stem from the same family and are developed
by the same organization, switching between the two is incredibly easy. We only have to change
the path parameter to point to an iTHOR dataset rather than the RoboTHOR one.
```python
# Dataset Parameters
TRAIN_DATASET_DIR = "datasets/ithor-pointnav/train"
VAL_DATASET_DIR = "datasets/ithor-pointnav/val"
```
We also have to download the `iTHOR-PointNav` dataset, following [these instructions](../installation/download-datasets.md).
We might also want to modify the `tag` method to accurately reflect our config but this will not change
the behavior at all and is merely a bookkeeping convenience.
```python
@classmethod
def tag(cls):
return "PointNavRobothorRGBPPO"
```
## RoboTHOR to Habitat

To train experiments using the Habitat framework we need to install it following [these instructions](../installation/installation-framework.md).
Since the roboTHOR and Habitat simulators are sufficiently different and have different parameters to configure
this transformation takes a bit more effort, but we only need to modify the environment config and TaskSampler (we
have to change the former because the habitat simulator accepts a different format of configuration and the latter
because the habitat dataset is formatted differently and thus needs to be parsed differently.)
As part of our environment modification, we need to switch from using RoboTHOR sensors to using Habitat sensors.
The implementation of sensors we provide offer an uniform interface across all the environments so we simply have
to swap out our sensor classes:
```python
SENSORS = [
DepthSensorHabitat(
height=SCREEN_SIZE,
width=SCREEN_SIZE,
use_normalization=True,
),
TargetCoordinatesSensorHabitat(coordinate_dims=2),
]
```
Next we need to define the simulator config:
```python
CONFIG = get_habitat_config("configs/gibson.yaml")
CONFIG.defrost()
CONFIG.NUM_PROCESSES = NUM_PROCESSES
CONFIG.SIMULATOR_GPU_IDS = TRAIN_GPUS
CONFIG.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR
CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = ["*"]
CONFIG.DATASET.DATA_PATH = TRAIN_SCENES
CONFIG.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"]
CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH
CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT
CONFIG.SIMULATOR.TURN_ANGLE = 30
CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25
CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS
CONFIG.TASK.TYPE = "Nav-v0"
CONFIG.TASK.SUCCESS_DISTANCE = 0.2
CONFIG.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR"
CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2
CONFIG.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass"
CONFIG.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"]
CONFIG.TASK.SPL.TYPE = "SPL"
CONFIG.TASK.SPL.SUCCESS_DISTANCE = 0.2
CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = 0.2
CONFIG.MODE = "train"
```
This `CONFIG` object holds very similar values to the ones `ENV_ARGS` held in the RoboTHOR example. We
decided to leave this way of passing in configurations exposed to the user to offer maximum customization
of the underlying environment.
Finally we need to replace the task sampler and its argument generating functions:
```python
# Define Task Sampler
from allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return PointNavTaskSampler(**kwargs)
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind]
return {
"env_config": config,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.action_names())),
"distance_to_goal": self.DISTANCE_TO_GOAL,
}
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
config = self.CONFIG.clone()
config.defrost()
config.DATASET.DATA_PATH = self.VALID_SCENES_PATH
config.MODE = "validate"
config.freeze()
return {
"env_config": config,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.action_names())),
"distance_to_goal": self.DISTANCE_TO_GOAL,
}
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
config = self.TEST_CONFIGS[process_ind]
return {
"env_config": config,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.action_names())),
"distance_to_goal": self.DISTANCE_TO_GOAL,
}
```
As we can see this code looks very similar as well, we simply need to pass slightly different parameters.
## Conclusion
In this tutorial, we learned how to modify our experiment configurations to work with different environments. By
providing a high level of modularity and out-of-the-box support for both `Habitat` and `THOR`, two of the most popular embodied frameworks out there **AllenAct** hopes to give researchers the ability to validate their results across many platforms and help guide them towards genuine progress. The source code for this tutorial can be found in `/projects/framework_transfer_tutorial`.
================================================
FILE: main.py
================================================
#!/usr/bin/env python3
"""Entry point to training/validating/testing for a user given experiment
name."""
import allenact.main
if __name__ == "__main__":
allenact.main.main()
================================================
FILE: mkdocs.yml
================================================
site_name: AllenAct
site_description: An open source framework for research in Embodied-AI from AI2
site_url: https://allenact.org
theme:
name: material
custom_dir: overrides
palette:
primary: blue
accent: grey
logo: img/AI2_Avatar_White.png
favicon: img/AllenAct_A.svg
highlightjs: true
hljs_languages:
- python
- typescript
- json
extra_css:
- css/extra.css
google_analytics: [UA-120916510-8, allenact.org]
repo_name: allenai/allenact
repo_url: https://github.com/allenai/allenact
docs_dir: docs
nav:
- Overview: index.md
- Installation:
- Install AllenAct: installation/installation-allenact.md
- Install environments: installation/installation-framework.md
- Download datasets: installation/download-datasets.md
- Getting started:
- Run your first experiment: getting_started/running-your-first-experiment.md
- Primary abstractions: getting_started/abstractions.md
- Structure of the codebase: getting_started/structure.md
- Tutorials:
- AllenAct Tutorials: tutorials/index.md
- Navigation in Minigrid: tutorials/minigrid-tutorial.md
- PointNav in RoboTHOR: tutorials/training-a-pointnav-model.md
- Swapping environments: tutorials/transfering-to-a-different-environment-framework.md
- Using a pre-trained model: tutorials/running-inference-on-a-pretrained-model.md
- Off-policy training: tutorials/offpolicy-tutorial.md
- OpenAI gym for continuous control: tutorials/gym-tutorial.md
- Multi-node ObjectNav training: tutorials/distributed-objectnav-tutorial.md
- OpenAI gym for MuJoCo tasks: tutorials/gym-mujoco-tutorial.md
# - IL to RL with pipelines: tutorials/training-pipelines.md
- HowTos:
- Define an experiment: howtos/defining-an-experiment.md
- Change rewards and losses: howtos/changing-rewards-and-losses.md
- Define a new model: howtos/defining-a-new-model.md
- Define a new task: howtos/defining-a-new-task.md
- Define a new training pipeline: howtos/defining-a-new-training-pipeline.md
# - Visualize results: howtos/visualizing-results.md
# - Run a multi-agent experiment: howtos/running-a-multi-agent-experiment.md
- Projects:
- BabyAI baselines: projects/babyai_baselines/README.md
- PointNav baselines: projects/pointnav_baselines/README.md
- ObjectNav baselines: projects/objectnav_baselines/README.md
# - Advisor code: projects/advisor_2020/README.md
# - Two Body Problem code: projects/two_body_problem_2019/README.md
- FAQ: FAQ.md
- Contributing: CONTRIBUTING.md
- Licence: LICENSE.md
- API:
- allenact:
- _constants: api/allenact/_constants.md
- embodiedai:
- mapping:
- mapping_utils:
- map_builders: api/allenact/embodiedai/mapping/mapping_utils/map_builders.md
- point_cloud_utils: api/allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.md
- mapping_losses: api/allenact/embodiedai/mapping/mapping_losses.md
- mapping_models:
- active_neural_slam: api/allenact/embodiedai/mapping/mapping_models/active_neural_slam.md
- preprocessors:
- resnet: api/allenact/embodiedai/preprocessors/resnet.md
- sensors:
- vision_sensors: api/allenact/embodiedai/sensors/vision_sensors.md
- models:
- aux_models: api/allenact/embodiedai/models/aux_models.md
- basic_models: api/allenact/embodiedai/models/basic_models.md
- resnet: api/allenact/embodiedai/models/resnet.md
- fusion_models: api/allenact/embodiedai/models/fusion_models.md
- visual_nav_models: api/allenact/embodiedai/models/visual_nav_models.md
- storage:
- vdr_storage: api/allenact/embodiedai/storage/vdr_storage.md
- aux_losses:
- losses: api/allenact/embodiedai/aux_losses/losses.md
- base_abstractions:
- experiment_config: api/allenact/base_abstractions/experiment_config.md
- misc: api/allenact/base_abstractions/misc.md
- task: api/allenact/base_abstractions/task.md
- sensor: api/allenact/base_abstractions/sensor.md
- preprocessor: api/allenact/base_abstractions/preprocessor.md
- distributions: api/allenact/base_abstractions/distributions.md
- algorithms:
- onpolicy_sync:
- losses:
- grouped_action_imitation: api/allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.md
- imitation: api/allenact/algorithms/onpolicy_sync/losses/imitation.md
- abstract_loss: api/allenact/algorithms/onpolicy_sync/losses/abstract_loss.md
- ppo: api/allenact/algorithms/onpolicy_sync/losses/ppo.md
- a2cacktr: api/allenact/algorithms/onpolicy_sync/losses/a2cacktr.md
- misc: api/allenact/algorithms/onpolicy_sync/misc.md
- runner: api/allenact/algorithms/onpolicy_sync/runner.md
- policy: api/allenact/algorithms/onpolicy_sync/policy.md
- engine: api/allenact/algorithms/onpolicy_sync/engine.md
- vector_sampled_tasks: api/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.md
- storage: api/allenact/algorithms/onpolicy_sync/storage.md
- offpolicy_sync:
- losses:
- abstract_offpolicy_loss: api/allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss.md
- utils:
- model_utils: api/allenact/utils/model_utils.md
- experiment_utils: api/allenact/utils/experiment_utils.md
- spaces_utils: api/allenact/utils/spaces_utils.md
- system: api/allenact/utils/system.md
- cacheless_frcnn: api/allenact/utils/cacheless_frcnn.md
- misc_utils: api/allenact/utils/misc_utils.md
- multi_agent_viz_utils: api/allenact/utils/multi_agent_viz_utils.md
- viz_utils: api/allenact/utils/viz_utils.md
- tensor_utils: api/allenact/utils/tensor_utils.md
- cache_utils: api/allenact/utils/cache_utils.md
- allenact_plugins:
- habitat_plugin:
- habitat_constants: api/allenact_plugins/habitat_plugin/habitat_constants.md
- habitat_tasks: api/allenact_plugins/habitat_plugin/habitat_tasks.md
- habitat_sensors: api/allenact_plugins/habitat_plugin/habitat_sensors.md
- habitat_environment: api/allenact_plugins/habitat_plugin/habitat_environment.md
- habitat_preprocessors: api/allenact_plugins/habitat_plugin/habitat_preprocessors.md
- habitat_task_samplers: api/allenact_plugins/habitat_plugin/habitat_task_samplers.md
- scripts:
- agent_demo: api/allenact_plugins/habitat_plugin/scripts/agent_demo.md
- make_map: api/allenact_plugins/habitat_plugin/scripts/make_map.md
- habitat_utils: api/allenact_plugins/habitat_plugin/habitat_utils.md
- lighthouse_plugin:
- lighthouse_models: api/allenact_plugins/lighthouse_plugin/lighthouse_models.md
- lighthouse_environment: api/allenact_plugins/lighthouse_plugin/lighthouse_environment.md
- lighthouse_tasks: api/allenact_plugins/lighthouse_plugin/lighthouse_tasks.md
- lighthouse_sensors: api/allenact_plugins/lighthouse_plugin/lighthouse_sensors.md
- lighthouse_util: api/allenact_plugins/lighthouse_plugin/lighthouse_util.md
- babyai_plugin:
- babyai_constants: api/allenact_plugins/babyai_plugin/babyai_constants.md
- babyai_models: api/allenact_plugins/babyai_plugin/babyai_models.md
- scripts:
- truncate_expert_demos: api/allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.md
- get_instr_length_percentiles: api/allenact_plugins/babyai_plugin/scripts/get_instr_length_percentiles.md
- download_babyai_expert_demos: api/allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.md
- babyai_tasks: api/allenact_plugins/babyai_plugin/babyai_tasks.md
- ithor_plugin:
- ithor_tasks: api/allenact_plugins/ithor_plugin/ithor_tasks.md
- ithor_environment: api/allenact_plugins/ithor_plugin/ithor_environment.md
- ithor_constants: api/allenact_plugins/ithor_plugin/ithor_constants.md
- ithor_util: api/allenact_plugins/ithor_plugin/ithor_util.md
- ithor_sensors: api/allenact_plugins/ithor_plugin/ithor_sensors.md
- scripts:
- make_objectnav_debug_dataset: api/allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.md
- make_pointnav_debug_dataset: api/allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.md
- ithor_viz: api/allenact_plugins/ithor_plugin/ithor_viz.md
- ithor_task_samplers: api/allenact_plugins/ithor_plugin/ithor_task_samplers.md
- robothor_plugin:
- robothor_preprocessors: api/allenact_plugins/robothor_plugin/robothor_preprocessors.md
- robothor_task_samplers: api/allenact_plugins/robothor_plugin/robothor_task_samplers.md
- robothor_environment: api/allenact_plugins/robothor_plugin/robothor_environment.md
- robothor_constants: api/allenact_plugins/robothor_plugin/robothor_constants.md
- robothor_distributions: api/allenact_plugins/robothor_plugin/robothor_distributions.md
- robothor_models: api/allenact_plugins/robothor_plugin/robothor_models.md
- robothor_tasks: api/allenact_plugins/robothor_plugin/robothor_tasks.md
- scripts:
- make_objectnav_debug_dataset: api/allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.md
- make_pointnav_debug_dataset: api/allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.md
- robothor_sensors: api/allenact_plugins/robothor_plugin/robothor_sensors.md
- robothor_viz: api/allenact_plugins/robothor_plugin/robothor_viz.md
- minigrid_plugin:
- minigrid_tasks: api/allenact_plugins/minigrid_plugin/minigrid_tasks.md
- minigrid_environments: api/allenact_plugins/minigrid_plugin/minigrid_environments.md
- minigrid_offpolicy: api/allenact_plugins/minigrid_plugin/minigrid_offpolicy.md
- minigrid_sensors: api/allenact_plugins/minigrid_plugin/minigrid_sensors.md
- configs:
- minigrid_nomemory: api/allenact_plugins/minigrid_plugin/configs/minigrid_nomemory.md
- minigrid_models: api/allenact_plugins/minigrid_plugin/minigrid_models.md
- manipulathor_plugin:
- manipulathor_viz: api/allenact_plugins/manipulathor_plugin/manipulathor_viz.md
- manipulathor_tasks: api/allenact_plugins/manipulathor_plugin/manipulathor_tasks.md
- manipulathor_task_samplers: api/allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.md
- manipulathor_constants: api/allenact_plugins/manipulathor_plugin/manipulathor_constants.md
- armpointnav_constants: api/allenact_plugins/manipulathor_plugin/armpointnav_constants.md
- manipulathor_sensors: api/allenact_plugins/manipulathor_plugin/manipulathor_sensors.md
- arm_calculation_utils: api/allenact_plugins/manipulathor_plugin/arm_calculation_utils.md
- manipulathor_utils: api/allenact_plugins/manipulathor_plugin/manipulathor_utils.md
- manipulathor_environment: api/allenact_plugins/manipulathor_plugin/manipulathor_environment.md
- gym_plugin:
- gym_environment: api/allenact_plugins/gym_plugin/gym_environment.md
- gym_sensors: api/allenact_plugins/gym_plugin/gym_sensors.md
- gym_distributions: api/allenact_plugins/gym_plugin/gym_distributions.md
- gym_models: api/allenact_plugins/gym_plugin/gym_models.md
- gym_tasks: api/allenact_plugins/gym_plugin/gym_tasks.md
- constants: api/constants.md
- projects:
- gym_baselines:
- experiments:
- gym_base: api/projects/gym_baselines/experiments/gym_base.md
- gym_humanoid_base: api/projects/gym_baselines/experiments/gym_humanoid_base.md
- gym_mujoco_base: api/projects/gym_baselines/experiments/gym_mujoco_base.md
- gym_humanoid_ddppo: api/projects/gym_baselines/experiments/gym_humanoid_ddppo.md
- mujoco:
- gym_mujoco_swimmer_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_swimmer_ddppo.md
- gym_mujoco_reacher_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_reacher_ddppo.md
- gym_mujoco_walker2d_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_walker2d_ddppo.md
- gym_mujoco_halfcheetah_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_halfcheetah_ddppo.md
- gym_mujoco_humanoid_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_humanoid_ddppo.md
- gym_mujoco_inverteddoublependulum_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_inverteddoublependulum_ddppo.md
- gym_mujoco_ant_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.md
- gym_mujoco_hopper_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_hopper_ddppo.md
- gym_mujoco_invertedpendulum_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_invertedpendulum_ddppo.md
- gym_mujoco_ddppo: api/projects/gym_baselines/experiments/gym_mujoco_ddppo.md
- models:
- gym_models: api/projects/gym_baselines/models/gym_models.md
- objectnav_baselines:
- experiments:
- robothor:
- objectnav_robothor_base: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.md
- objectnav_robothor_rgb_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_ddppo.md
- objectnav_robothor_rgb_resnetgru_dagger: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_dagger.md
- objectnav_robothor_rgbd_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.md
- objectnav_robothor_rgb_resnetgru_ddppo_and_gbc: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.md
- objectnav_robothor_rgb_unfrozenresnet_gru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet_gru_ddppo.md
- objectnav_robothor_rgb_unfrozenresnet_gru_vdr_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet_gru_vdr_ddppo.md
- objectnav_robothor_depth_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_depth_resnetgru_ddppo.md
- objectnav_mixin_unfrozenresnet_gru: api/projects/objectnav_baselines/experiments/objectnav_mixin_unfrozenresnet_gru.md
- ithor:
- objectnav_ithor_rgbd_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnetgru_ddppo.md
- objectnav_ithor_depth_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnetgru_ddppo.md
- objectnav_ithor_rgb_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnetgru_ddppo.md
- objectnav_ithor_base: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_base.md
- objectnav_thor_mixin_ddppo_and_gbc: api/projects/objectnav_baselines/experiments/objectnav_thor_mixin_ddppo_and_gbc.md
- objectnav_thor_base: api/projects/objectnav_baselines/experiments/objectnav_thor_base.md
- objectnav_mixin_resnetgru: api/projects/objectnav_baselines/experiments/objectnav_mixin_resnetgru.md
- objectnav_base: api/projects/objectnav_baselines/experiments/objectnav_base.md
- objectnav_mixin_ddppo: api/projects/objectnav_baselines/experiments/objectnav_mixin_ddppo.md
- objectnav_mixin_dagger: api/projects/objectnav_baselines/experiments/objectnav_mixin_dagger.md
- models:
- object_nav_models: api/projects/objectnav_baselines/models/object_nav_models.md
- babyai_baselines:
- experiments:
- go_to_local:
- bc: api/projects/babyai_baselines/experiments/go_to_local/bc.md
- distributed_bc_offpolicy: api/projects/babyai_baselines/experiments/go_to_local/distributed_bc_offpolicy.md
- dagger: api/projects/babyai_baselines/experiments/go_to_local/dagger.md
- bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.md
- distributed_bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_local/distributed_bc_teacher_forcing.md
- ppo: api/projects/babyai_baselines/experiments/go_to_local/ppo.md
- a2c: api/projects/babyai_baselines/experiments/go_to_local/a2c.md
- base: api/projects/babyai_baselines/experiments/go_to_local/base.md
- go_to_obj:
- bc: api/projects/babyai_baselines/experiments/go_to_obj/bc.md
- dagger: api/projects/babyai_baselines/experiments/go_to_obj/dagger.md
- bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.md
- ppo: api/projects/babyai_baselines/experiments/go_to_obj/ppo.md
- a2c: api/projects/babyai_baselines/experiments/go_to_obj/a2c.md
- base: api/projects/babyai_baselines/experiments/go_to_obj/base.md
- base: api/projects/babyai_baselines/experiments/base.md
- pointnav_baselines:
- experiments:
- robothor:
- pointnav_robothor_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.md
- pointnav_robothor_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.md
- pointnav_robothor_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.md
- pointnav_robothor_rgb_simpleconvgru_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo_and_gbc.md
- pointnav_robothor_base: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_base.md
- habitat:
- pointnav_habitat_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.md
- pointnav_habitat_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.md
- pointnav_habitat_base: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_base.md
- debug_pointnav_habitat_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgbd_simpleconvgru_ddppo.md
- debug_pointnav_habitat_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgb_simpleconvgru_ddppo.md
- debug_pointnav_habitat_rgb_simpleconvgru_bc: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgb_simpleconvgru_bc.md
- pointnav_habitat_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.md
- debug_pointnav_habitat_base: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_base.md
- pointnav_base: api/projects/pointnav_baselines/experiments/pointnav_base.md
- pointnav_habitat_mixin_ddppo: api/projects/pointnav_baselines/experiments/pointnav_habitat_mixin_ddppo.md
- ithor:
- pointnav_ithor_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.md
- pointnav_ithor_depth_simpleconvgru_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo_and_gbc.md
- pointnav_ithor_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.md
- pointnav_ithor_base: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_base.md
- pointnav_ithor_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.md
- pointnav_mixin_simpleconvgru: api/projects/pointnav_baselines/experiments/pointnav_mixin_simpleconvgru.md
- pointnav_thor_mixin_ddppo: api/projects/pointnav_baselines/experiments/pointnav_thor_mixin_ddppo.md
- pointnav_thor_mixin_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/pointnav_thor_mixin_ddppo_and_gbc.md
- pointnav_thor_base: api/projects/pointnav_baselines/experiments/pointnav_thor_base.md
- models:
- point_nav_models: api/projects/pointnav_baselines/models/point_nav_models.md
- tutorials:
- pointnav_habitat_rgb_ddppo: api/projects/tutorials/pointnav_habitat_rgb_ddppo.md
- object_nav_ithor_dagger_then_ppo_one_object: api/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.md
- minigrid_offpolicy_tutorial: api/projects/tutorials/minigrid_offpolicy_tutorial.md
- navtopartner_robothor_rgb_ppo: api/projects/tutorials/navtopartner_robothor_rgb_ppo.md
- pointnav_ithor_rgb_ddppo: api/projects/tutorials/pointnav_ithor_rgb_ddppo.md
- object_nav_ithor_dagger_then_ppo_one_object_viz: api/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.md
- object_nav_ithor_ppo_one_object: api/projects/tutorials/object_nav_ithor_ppo_one_object.md
- minigrid_tutorial_conds: api/projects/tutorials/minigrid_tutorial_conds.md
- manipulathor_baselines:
- armpointnav_baselines:
- experiments:
- armpointnav_thor_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.md
- armpointnav_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_base.md
- armpointnav_mixin_ddppo: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_ddppo.md
- ithor:
- armpointnav_no_vision: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_no_vision.md
- armpointnav_ithor_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_ithor_base.md
- armpointnav_depth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_depth.md
- armpointnav_rgb: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgb.md
- armpointnav_rgbdepth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgbdepth.md
- armpointnav_disjoint_depth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_disjoint_depth.md
- armpointnav_mixin_simplegru: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_simplegru.md
- models:
- arm_pointnav_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.md
- base_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/base_models.md
- disjoint_arm_pointnav_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.md
- manipulathor_net_utils: api/projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.md
- tests:
- mapping:
- test_ai2thor_mapping: api/tests/mapping/test_ai2thor_mapping.md
- multiprocessing:
- test_frozen_attribs: api/tests/multiprocessing/test_frozen_attribs.md
- utils:
- test_spaces: api/tests/utils/test_spaces.md
- vision:
- test_pillow_rescaling: api/tests/vision/test_pillow_rescaling.md
- sync_algs_cpu:
- test_to_to_obj_trains: api/tests/sync_algs_cpu/test_to_to_obj_trains.md
- manipulathor_plugin:
- test_utils: api/tests/manipulathor_plugin/test_utils.md
- hierarchical_policies:
- test_minigrid_conditional: api/tests/hierarchical_policies/test_minigrid_conditional.md
markdown_extensions:
- toc:
permalink: '#'
- markdown.extensions.codehilite:
guess_lang: true
- meta
- admonition
- codehilite
# extra_javascript:
# - javascripts/extra.js
#plugins:
# - search
# - mkpdfs
================================================
FILE: mypy.ini
================================================
[mypy]
python_version = 3.7
follow_imports = skip
ignore_missing_imports = True
strict_optional = False
[mypy-demo.*]
ignore_errors = True
================================================
FILE: overrides/main.html
================================================
{% extends "base.html" %}
{% block extrahead %}
{% set title = config.site_name %}
{% if page and page.meta and page.meta.title %}
{% set title = title ~ " - " ~ page.meta.title %}
{% elif page and page.title and not page.is_homepage %}
{% set title = title ~ " - " ~ page.title | striptags %}
{% endif %}
{% endblock %}
================================================
FILE: pretrained_model_ckpts/.gitignore
================================================
*
!.gitignore
!*.sh
================================================
FILE: pretrained_model_ckpts/download_navigation_model_ckpts.sh
================================================
#!/bin/bash
# Move to the directory containing this file
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" || exit
# Download, Unzip, and Remove zip
if [ "$1" = "robothor-pointnav-rgb-resnet" ]
then
echo "Downloading pretrained RoboTHOR PointNav model..."
wget https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/navigation/robothor-pointnav-rgb-resnet.tar.gz
tar -xf robothor-pointnav-rgb-resnet.tar.gz && rm robothor-pointnav-rgb-resnet.tar.gz
echo "saved folder: robothor-pointnav-rgb-resnet"
elif [ "$1" = "robothor-objectnav-challenge-2021" ]
then
echo "Downloading pretrained RoboTHOR ObjectNav model..."
wget https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/navigation/robothor-objectnav-challenge-2021.tar.gz
tar -xf robothor-objectnav-challenge-2021.tar.gz && rm robothor-objectnav-challenge-2021.tar.gz
echo "saved folder: robothor-objectnav-challenge-2021"
else
echo "Failed: Usage download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021"
exit 1
fi
================================================
FILE: projects/__init__.py
================================================
================================================
FILE: projects/babyai_baselines/README.md
================================================
# Baseline experiments for the BabyAI environment
We perform a collection of baseline experiments within the BabyAI environment
on the GoToLocal task, see the `projects/babyai_baselines/experiments/go_to_local` directory.
For instance, to train a model using PPO, run
```bash
python main.py go_to_local.ppo --experiment_base projects/babyai_baselines/experiments
```
Note that these experiments will be quite slow when not using a GPU as the BabyAI model architecture is surprisingly
large. Specifying a GPU (if available) can be done from the command line using hooks we created using
[gin-config](https://github.com/google/gin-config). E.g. to train using the 0th GPU device, add
```bash
--gp "machine_params.gpu_id = 0"
```
to the above command.
================================================
FILE: projects/babyai_baselines/__init__.py
================================================
================================================
FILE: projects/babyai_baselines/experiments/__init__.py
================================================
================================================
FILE: projects/babyai_baselines/experiments/base.py
================================================
from abc import ABC
from typing import Dict, Any, List, Optional, Union, Sequence, cast
import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO, A2C
from allenact.algorithms.onpolicy_sync.losses.a2cacktr import A2CConfig
from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.misc import Loss
from allenact.base_abstractions.sensor import SensorSuite, Sensor, ExpertActionSensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import (
Builder,
LinearDecay,
PipelineStage,
TrainingPipeline,
)
from allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel
from allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask, BabyAITaskSampler
from allenact_plugins.minigrid_plugin.minigrid_sensors import (
EgocentricMiniGridSensor,
MiniGridMissionSensor,
)
class BaseBabyAIExperimentConfig(ExperimentConfig, ABC):
"""Base experimental config."""
LEVEL: Optional[str] = None
TOTAL_RL_TRAIN_STEPS: Optional[int] = None
AGENT_VIEW_SIZE: int = 7
ROLLOUT_STEPS: Optional[int] = None
NUM_TRAIN_SAMPLERS: Optional[int] = None
NUM_TEST_TASKS: Optional[int] = None
INSTR_LEN: Optional[int] = None
USE_INSTR: Optional[bool] = None
GPU_ID: Optional[int] = None
USE_EXPERT = False
SHOULD_LOG = True
PPO_NUM_MINI_BATCH = 2
ARCH: Optional[str] = None
NUM_CKPTS_TO_SAVE = 50
TEST_SEED_OFFSET = 0
DEFAULT_LR = 1e-3
@classmethod
def METRIC_ACCUMULATE_INTERVAL(cls):
return cls.NUM_TRAIN_SAMPLERS * 1000
@classmethod
def get_sensors(cls) -> Sequence[Sensor]:
assert cls.USE_INSTR is not None
return (
[
EgocentricMiniGridSensor(
agent_view_size=cls.AGENT_VIEW_SIZE, view_channels=3
),
]
+ (
[MiniGridMissionSensor(instr_len=cls.INSTR_LEN)] # type:ignore
if cls.USE_INSTR
else []
)
+ (
[
ExpertActionSensor( # type: ignore
nactions=len(BabyAITask.class_action_names())
)
]
if cls.USE_EXPERT
else []
)
)
@classmethod
def rl_loss_default(cls, alg: str, steps: Optional[int] = None):
if alg == "ppo":
assert steps is not None
return {
"loss": Builder(
PPO,
kwargs={"clip_decay": LinearDecay(steps)},
default=PPOConfig,
),
"num_mini_batch": cls.PPO_NUM_MINI_BATCH,
"update_repeats": 4,
}
elif alg == "a2c":
return {
"loss": A2C(**A2CConfig),
"num_mini_batch": 1,
"update_repeats": 1,
}
elif alg == "imitation":
return {
"loss": Imitation(),
"num_mini_batch": cls.PPO_NUM_MINI_BATCH,
"update_repeats": 4,
}
else:
raise NotImplementedError
@classmethod
def _training_pipeline(
cls,
named_losses: Dict[str, Union[Loss, Builder]],
pipeline_stages: List[PipelineStage],
num_mini_batch: int,
update_repeats: int,
total_train_steps: int,
lr: Optional[float] = None,
):
lr = cls.DEFAULT_LR if lr is None else lr
num_steps = cls.ROLLOUT_STEPS
metric_accumulate_interval = (
cls.METRIC_ACCUMULATE_INTERVAL()
) # Log every 10 max length tasks
save_interval = int(cls.TOTAL_RL_TRAIN_STEPS / cls.NUM_CKPTS_TO_SAVE)
gamma = 0.99
use_gae = "reinforce_loss" not in named_losses
gae_lambda = 0.99
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses=named_losses,
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=None,
should_log=cls.SHOULD_LOG,
pipeline_stages=pipeline_stages,
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=cls.TOTAL_RL_TRAIN_STEPS)} # type: ignore
),
)
@classmethod
def machine_params(
cls, mode="train", gpu_id="default", n_train_processes="default", **kwargs
):
if mode == "train":
if n_train_processes == "default":
nprocesses = cls.NUM_TRAIN_SAMPLERS
else:
nprocesses = n_train_processes
elif mode == "valid":
nprocesses = 0
elif mode == "test":
nprocesses = min(
100 if torch.cuda.is_available() else 8, cls.NUM_TEST_TASKS
)
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
if gpu_id == "default":
devices = [] if cls.GPU_ID is None else [cls.GPU_ID]
else:
devices = [gpu_id]
return MachineParams(nprocesses=nprocesses, devices=devices)
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
sensors = cls.get_sensors()
return BabyAIRecurrentACModel(
action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())),
observation_space=SensorSuite(sensors).observation_spaces,
use_instr=cls.USE_INSTR,
use_memory=True,
arch=cls.ARCH,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return BabyAITaskSampler(**kwargs)
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return {
"env_builder": self.LEVEL,
"sensors": self.get_sensors(),
"seed": seeds[process_ind] if seeds is not None else None,
}
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
raise RuntimeError
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
max_tasks = self.NUM_TEST_TASKS // total_processes + (
process_ind < (self.NUM_TEST_TASKS % total_processes)
)
task_seeds_list = [
2**31 - 1 + self.TEST_SEED_OFFSET + process_ind + total_processes * i
for i in range(max_tasks)
]
# print(max_tasks, process_ind, total_processes, task_seeds_list)
assert len(task_seeds_list) == 0 or (
min(task_seeds_list) >= 0 and max(task_seeds_list) <= 2**32 - 1
)
train_sampler_args = self.train_task_sampler_args(
process_ind=process_ind,
total_processes=total_processes,
devices=devices,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
return {
**train_sampler_args,
"task_seeds_list": task_seeds_list,
"max_tasks": max_tasks,
"deterministic_sampling": True,
"sensors": [
s for s in train_sampler_args["sensors"] if "Expert" not in str(type(s))
],
}
================================================
FILE: projects/babyai_baselines/experiments/go_to_local/__init__.py
================================================
================================================
FILE: projects/babyai_baselines/experiments/go_to_local/a2c.py
================================================
import torch
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_local.base import (
BaseBabyAIGoToLocalExperimentConfig,
)
class A2CBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
"""A2C only."""
NUM_TRAIN_SAMPLERS: int = (
128 * 6
if torch.cuda.is_available()
else BaseBabyAIGoToLocalExperimentConfig.NUM_TRAIN_SAMPLERS
)
ROLLOUT_STEPS: int = 16
USE_LR_DECAY = False
DEFAULT_LR = 1e-4
@classmethod
def tag(cls):
return "BabyAIGoToLocalA2C"
@classmethod
def training_pipeline(cls, **kwargs):
total_training_steps = cls.TOTAL_RL_TRAIN_STEPS
a2c_info = cls.rl_loss_default("a2c", steps=total_training_steps)
return cls._training_pipeline(
named_losses={
"a2c_loss": a2c_info["loss"],
},
pipeline_stages=[
PipelineStage(
loss_names=["a2c_loss"],
max_stage_steps=total_training_steps,
),
],
num_mini_batch=a2c_info["num_mini_batch"],
update_repeats=a2c_info["update_repeats"],
total_train_steps=total_training_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_local/base.py
================================================
from abc import ABC
from typing import Dict, List, Optional, Union, Any, cast
import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.storage import ExperienceStorage
from allenact.base_abstractions.misc import Loss
from allenact.base_abstractions.sensor import SensorSuite
from allenact.utils.experiment_utils import (
Builder,
LinearDecay,
PipelineStage,
TrainingPipeline,
)
from allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel
from allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask
from projects.babyai_baselines.experiments.base import BaseBabyAIExperimentConfig
class BaseBabyAIGoToLocalExperimentConfig(BaseBabyAIExperimentConfig, ABC):
"""Base experimental config."""
LEVEL: Optional[str] = "BabyAI-GoToLocal-v0"
TOTAL_RL_TRAIN_STEPS = int(15e6)
TOTAL_IL_TRAIN_STEPS = int(7.5e6)
ROLLOUT_STEPS: int = 128
NUM_TRAIN_SAMPLERS: int = 128 if torch.cuda.is_available() else 4
PPO_NUM_MINI_BATCH = 4
NUM_CKPTS_TO_SAVE = 20
NUM_TEST_TASKS: int = 1000
USE_LR_DECAY: bool = True
# ARCH = "cnn1"
# ARCH = "cnn2"
ARCH = "expert_filmcnn"
USE_INSTR = True
INSTR_LEN: int = 5
INCLUDE_AUXILIARY_HEAD = False
@classmethod
def METRIC_ACCUMULATE_INTERVAL(cls):
return cls.NUM_TRAIN_SAMPLERS * 64
@classmethod
def _training_pipeline( # type:ignore
cls,
named_losses: Dict[str, Union[Loss, Builder]],
pipeline_stages: List[PipelineStage],
num_mini_batch: int,
update_repeats: int,
total_train_steps: int,
lr: Optional[float] = None,
named_storages: Optional[Dict[str, Union[ExperienceStorage, Builder]]] = None,
):
lr = cls.DEFAULT_LR
num_steps = cls.ROLLOUT_STEPS
metric_accumulate_interval = (
cls.METRIC_ACCUMULATE_INTERVAL()
) # Log every 10 max length tasks
save_interval = int(total_train_steps / cls.NUM_CKPTS_TO_SAVE)
gamma = 0.99
use_gae = "reinforce_loss" not in named_losses
gae_lambda = 0.99
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses=named_losses,
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=None,
should_log=cls.SHOULD_LOG,
pipeline_stages=pipeline_stages,
named_storages=named_storages,
lr_scheduler_builder=(
Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)} # type: ignore
)
if cls.USE_LR_DECAY
else None
),
)
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
sensors = cls.get_sensors()
return BabyAIRecurrentACModel(
action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())),
observation_space=SensorSuite(sensors).observation_spaces,
use_instr=cls.USE_INSTR,
use_memory=True,
arch=cls.ARCH,
instr_dim=256,
lang_model="attgru",
memory_dim=2048,
include_auxiliary_head=cls.INCLUDE_AUXILIARY_HEAD,
)
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
raise RuntimeError("No validation processes for these tasks")
================================================
FILE: projects/babyai_baselines/experiments/go_to_local/bc.py
================================================
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_local.base import (
BaseBabyAIGoToLocalExperimentConfig,
)
class PPOBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
"""Behavior clone then PPO."""
USE_EXPERT = True
@classmethod
def tag(cls):
return "BabyAIGoToLocalBC"
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=-1)
imitation_info = cls.rl_loss_default("imitation")
return cls._training_pipeline(
named_losses={
"imitation_loss": imitation_info["loss"],
},
pipeline_stages=[
PipelineStage(
loss_names=["imitation_loss"],
max_stage_steps=total_train_steps,
),
],
num_mini_batch=min(
info["num_mini_batch"] for info in [ppo_info, imitation_info]
),
update_repeats=min(
info["update_repeats"] for info in [ppo_info, imitation_info]
),
total_train_steps=total_train_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.py
================================================
import torch
from allenact.utils.experiment_utils import PipelineStage, LinearDecay
from projects.babyai_baselines.experiments.go_to_local.base import (
BaseBabyAIGoToLocalExperimentConfig,
)
class BCTeacherForcingBabyAIGoToLocalExperimentConfig(
BaseBabyAIGoToLocalExperimentConfig
):
"""Behavior clone with teacher forcing."""
USE_EXPERT = True
GPU_ID = 0 if torch.cuda.is_available() else None
@classmethod
def METRIC_ACCUMULATE_INTERVAL(cls):
return 1
@classmethod
def tag(cls):
return "BabyAIGoToLocalBCTeacherForcing"
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=-1)
imitation_info = cls.rl_loss_default("imitation")
return cls._training_pipeline(
named_losses={
"imitation_loss": imitation_info["loss"],
},
pipeline_stages=[
PipelineStage(
loss_names=["imitation_loss"],
teacher_forcing=LinearDecay(
startp=1.0,
endp=1.0,
steps=total_train_steps,
),
max_stage_steps=total_train_steps,
),
],
num_mini_batch=min(
info["num_mini_batch"] for info in [ppo_info, imitation_info]
),
update_repeats=min(
info["update_repeats"] for info in [ppo_info, imitation_info]
),
total_train_steps=total_train_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_local/dagger.py
================================================
from allenact.utils.experiment_utils import PipelineStage, LinearDecay
from projects.babyai_baselines.experiments.go_to_local.base import (
BaseBabyAIGoToLocalExperimentConfig,
)
class DaggerBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
"""Find goal in lighthouse env using imitation learning.
Training with Dagger.
"""
USE_EXPERT = True
@classmethod
def tag(cls):
return "BabyAIGoToLocalDagger"
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
loss_info = cls.rl_loss_default("imitation")
return cls._training_pipeline(
named_losses={"imitation_loss": loss_info["loss"]},
pipeline_stages=[
PipelineStage(
loss_names=["imitation_loss"],
teacher_forcing=LinearDecay(
startp=1.0,
endp=0.0,
steps=total_train_steps // 2,
),
max_stage_steps=total_train_steps,
)
],
num_mini_batch=loss_info["num_mini_batch"],
update_repeats=loss_info["update_repeats"],
total_train_steps=total_train_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_local/distributed_bc_offpolicy.py
================================================
import os
from typing import Optional
from typing import Sequence
import torch
from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage
from allenact.utils.experiment_utils import (
PipelineStage,
StageComponent,
TrainingSettings,
)
from allenact_plugins.babyai_plugin.babyai_constants import (
BABYAI_EXPERT_TRAJECTORIES_DIR,
)
from allenact_plugins.minigrid_plugin.minigrid_offpolicy import (
MiniGridOffPolicyExpertCELoss,
MiniGridExpertTrajectoryStorage,
)
from projects.tutorials.minigrid_offpolicy_tutorial import (
BCOffPolicyBabyAIGoToLocalExperimentConfig,
)
class DistributedBCOffPolicyBabyAIGoToLocalExperimentConfig(
BCOffPolicyBabyAIGoToLocalExperimentConfig
):
"""Distributed Off policy imitation."""
@classmethod
def tag(cls):
return "DistributedBabyAIGoToLocalBCOffPolicy"
@classmethod
def machine_params(
cls, mode="train", gpu_id="default", n_train_processes="default", **kwargs
):
res = super().machine_params(mode, gpu_id, n_train_processes, **kwargs)
if res["nprocesses"] > 0 and torch.cuda.is_available():
ngpu_to_use = min(torch.cuda.device_count(), 2)
res["nprocesses"] = [res["nprocesses"] // ngpu_to_use] * ngpu_to_use
res["gpu_ids"] = list(range(ngpu_to_use))
return res
@classmethod
def expert_ce_loss_kwargs_generator(
cls, worker_id: int, rollouts_per_worker: Sequence[int], seed: Optional[int]
):
return dict(num_workers=len(rollouts_per_worker), current_worker=worker_id)
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=-1)
num_mini_batch = ppo_info["num_mini_batch"]
update_repeats = ppo_info["update_repeats"]
return cls._training_pipeline(
named_losses={
"offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss(
total_episodes_in_epoch=int(1e6)
),
},
named_storages={
"onpolicy": RolloutBlockStorage(),
"minigrid_offpolicy_expert": MiniGridExpertTrajectoryStorage(
data_path=os.path.join(
BABYAI_EXPERT_TRAJECTORIES_DIR,
"BabyAI-GoToLocal-v0{}.pkl".format(
"" if torch.cuda.is_available() else "-small"
),
),
num_samplers=cls.NUM_TRAIN_SAMPLERS,
rollout_len=cls.ROLLOUT_STEPS,
instr_len=cls.INSTR_LEN,
),
},
pipeline_stages=[
PipelineStage(
loss_names=["offpolicy_expert_ce_loss"],
max_stage_steps=total_train_steps,
stage_components=[
StageComponent(
uuid="offpolicy",
storage_uuid="minigrid_offpolicy_expert",
loss_names=["offpolicy_expert_ce_loss"],
training_settings=TrainingSettings(
update_repeats=num_mini_batch * update_repeats,
num_mini_batch=1,
),
)
],
),
],
num_mini_batch=0,
update_repeats=0,
total_train_steps=total_train_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_local/distributed_bc_teacher_forcing.py
================================================
import torch
from .bc_teacher_forcing import BCTeacherForcingBabyAIGoToLocalExperimentConfig
class DistributedBCTeacherForcingBabyAIGoToLocalExperimentConfig(
BCTeacherForcingBabyAIGoToLocalExperimentConfig
):
"""Distributed behavior clone with teacher forcing."""
USE_EXPERT = True
GPU_ID = 0 if torch.cuda.is_available() else None
@classmethod
def METRIC_ACCUMULATE_INTERVAL(cls):
return 1
@classmethod
def tag(cls):
return "BabyAIGoToLocalBCTeacherForcingDistributed"
@classmethod
def machine_params(
cls, mode="train", gpu_id="default", n_train_processes="default", **kwargs
):
res = super().machine_params(mode, gpu_id, n_train_processes, **kwargs)
if res["nprocesses"] > 0 and torch.cuda.is_available():
ngpu_to_use = min(torch.cuda.device_count(), 2)
res["nprocesses"] = [res["nprocesses"] // ngpu_to_use] * ngpu_to_use
res["gpu_ids"] = list(range(ngpu_to_use))
return res
================================================
FILE: projects/babyai_baselines/experiments/go_to_local/ppo.py
================================================
import torch
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_local.base import (
BaseBabyAIGoToLocalExperimentConfig,
)
class PPOBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
"""PPO only."""
NUM_TRAIN_SAMPLERS: int = (
128 * 12
if torch.cuda.is_available()
else BaseBabyAIGoToLocalExperimentConfig.NUM_TRAIN_SAMPLERS
)
ROLLOUT_STEPS: int = 32
USE_LR_DECAY = False
DEFAULT_LR = 1e-4
@classmethod
def tag(cls):
return "BabyAIGoToLocalPPO"
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_RL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps)
return cls._training_pipeline(
named_losses={
"ppo_loss": ppo_info["loss"],
},
pipeline_stages=[
PipelineStage(
loss_names=["ppo_loss"],
max_stage_steps=total_train_steps,
),
],
num_mini_batch=ppo_info["num_mini_batch"],
update_repeats=ppo_info["update_repeats"],
total_train_steps=total_train_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/__init__.py
================================================
================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/a2c.py
================================================
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_obj.base import (
BaseBabyAIGoToObjExperimentConfig,
)
class A2CBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
"""A2C only."""
TOTAL_RL_TRAIN_STEPS = int(1e5)
@classmethod
def tag(cls):
return "BabyAIGoToObjA2C"
@classmethod
def training_pipeline(cls, **kwargs):
total_training_steps = cls.TOTAL_RL_TRAIN_STEPS
a2c_info = cls.rl_loss_default("a2c", steps=total_training_steps)
return cls._training_pipeline(
named_losses={
"a2c_loss": a2c_info["loss"],
},
pipeline_stages=[
PipelineStage(
loss_names=["a2c_loss"],
max_stage_steps=total_training_steps,
),
],
num_mini_batch=a2c_info["num_mini_batch"],
update_repeats=a2c_info["update_repeats"],
total_train_steps=total_training_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/base.py
================================================
from abc import ABC
from typing import Dict, List, Optional, Union, cast
import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.base_abstractions.misc import Loss
from allenact.base_abstractions.sensor import SensorSuite
from allenact.utils.experiment_utils import (
Builder,
LinearDecay,
PipelineStage,
TrainingPipeline,
)
from allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel
from allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask
from projects.babyai_baselines.experiments.base import BaseBabyAIExperimentConfig
class BaseBabyAIGoToObjExperimentConfig(BaseBabyAIExperimentConfig, ABC):
"""Base experimental config."""
LEVEL: Optional[str] = "BabyAI-GoToObj-v0"
TOTAL_RL_TRAIN_STEPS = int(5e4)
TOTAL_IL_TRAIN_STEPS = int(2e4)
ROLLOUT_STEPS: int = 32
NUM_TRAIN_SAMPLERS: int = 16
PPO_NUM_MINI_BATCH = 2
NUM_TEST_TASKS: int = 50
USE_LR_DECAY: bool = False
DEFAULT_LR = 1e-3
ARCH = "cnn1"
# ARCH = "cnn2"
# ARCH = "expert_filmcnn"
USE_INSTR = False
INSTR_LEN: int = -1
@classmethod
def METRIC_ACCUMULATE_INTERVAL(cls):
return cls.NUM_TRAIN_SAMPLERS * 128
@classmethod
def _training_pipeline( # type:ignore
cls,
named_losses: Dict[str, Union[Loss, Builder]],
pipeline_stages: List[PipelineStage],
num_mini_batch: int,
update_repeats: int,
total_train_steps: int,
lr: Optional[float] = None,
**kwargs,
):
lr = cls.DEFAULT_LR
num_steps = cls.ROLLOUT_STEPS
metric_accumulate_interval = (
cls.METRIC_ACCUMULATE_INTERVAL()
) # Log every 10 max length tasks
save_interval = 2**31
gamma = 0.99
use_gae = "reinforce_loss" not in named_losses
gae_lambda = 0.99
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses=named_losses,
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=None,
should_log=cls.SHOULD_LOG,
pipeline_stages=pipeline_stages,
lr_scheduler_builder=(
Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)} # type: ignore
)
if cls.USE_LR_DECAY
else None
),
**kwargs,
)
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
sensors = cls.get_sensors()
return BabyAIRecurrentACModel(
action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())),
observation_space=SensorSuite(sensors).observation_spaces,
use_instr=cls.USE_INSTR,
use_memory=True,
arch=cls.ARCH,
instr_dim=8,
lang_model="gru",
memory_dim=128,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/bc.py
================================================
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_obj.base import (
BaseBabyAIGoToObjExperimentConfig,
)
class PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
"""Behavior clone then PPO."""
USE_EXPERT = True
@classmethod
def tag(cls):
return "BabyAIGoToObjBC"
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=-1)
imitation_info = cls.rl_loss_default("imitation")
return cls._training_pipeline(
named_losses={
"imitation_loss": imitation_info["loss"],
},
pipeline_stages=[
PipelineStage(
loss_names=["imitation_loss"],
max_stage_steps=total_train_steps,
),
],
num_mini_batch=min(
info["num_mini_batch"] for info in [ppo_info, imitation_info]
),
update_repeats=min(
info["update_repeats"] for info in [ppo_info, imitation_info]
),
total_train_steps=total_train_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.py
================================================
from allenact.utils.experiment_utils import PipelineStage, LinearDecay
from projects.babyai_baselines.experiments.go_to_obj.base import (
BaseBabyAIGoToObjExperimentConfig,
)
class PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
"""Behavior clone (with teacher forcing) then PPO."""
USE_EXPERT = True
@classmethod
def tag(cls):
return "BabyAIGoToObjBCTeacherForcing"
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=-1)
imitation_info = cls.rl_loss_default("imitation")
return cls._training_pipeline(
named_losses={
"imitation_loss": imitation_info["loss"],
},
pipeline_stages=[
PipelineStage(
loss_names=["imitation_loss"],
teacher_forcing=LinearDecay(
startp=1.0,
endp=1.0,
steps=total_train_steps,
),
max_stage_steps=total_train_steps,
),
],
num_mini_batch=min(
info["num_mini_batch"] for info in [ppo_info, imitation_info]
),
update_repeats=min(
info["update_repeats"] for info in [ppo_info, imitation_info]
),
total_train_steps=total_train_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/dagger.py
================================================
from allenact.utils.experiment_utils import PipelineStage, LinearDecay
from projects.babyai_baselines.experiments.go_to_obj.base import (
BaseBabyAIGoToObjExperimentConfig,
)
class DaggerBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
"""Find goal in lighthouse env using imitation learning.
Training with Dagger.
"""
USE_EXPERT = True
@classmethod
def tag(cls):
return "BabyAIGoToObjDagger"
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
loss_info = cls.rl_loss_default("imitation")
return cls._training_pipeline(
named_losses={"imitation_loss": loss_info["loss"]},
pipeline_stages=[
PipelineStage(
loss_names=["imitation_loss"],
teacher_forcing=LinearDecay(
startp=1.0,
endp=0.0,
steps=total_train_steps // 2,
),
max_stage_steps=total_train_steps,
)
],
num_mini_batch=loss_info["num_mini_batch"],
update_repeats=loss_info["update_repeats"],
total_train_steps=total_train_steps,
)
================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/ppo.py
================================================
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_obj.base import (
BaseBabyAIGoToObjExperimentConfig,
)
class PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
"""PPO only."""
@classmethod
def tag(cls):
return "BabyAIGoToObjPPO"
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_RL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps)
return cls._training_pipeline(
named_losses={
"ppo_loss": ppo_info["loss"],
},
pipeline_stages=[
PipelineStage(
loss_names=["ppo_loss"],
max_stage_steps=total_train_steps,
),
],
num_mini_batch=ppo_info["num_mini_batch"],
update_repeats=ppo_info["update_repeats"],
total_train_steps=total_train_steps,
)
================================================
FILE: projects/gym_baselines/README.md
================================================
# Baseline models Gym (for MuJoCo environments)
This project contains the code for training baseline models for the tasks under the [MuJoCo](https://gym.openai.com/envs/#mujoco) group of Gym environments, included ["Ant-v2"](https://gym.openai.com/envs/Ant-v2/), ["HalfCheetah-v2"](https://gym.openai.com/envs/HalfCheetah-v2/), ["Hopper-v2"](https://gym.openai.com/envs/Hopper-v2/), ["Humanoid-v2"](https://gym.openai.com/envs/Humanoid-v2/), ["InvertedDoublePendulum-v2"](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["InvertedPendulum-v2"](https://gym.openai.com/envs/InvertedPendulum-v2/), [Reacher-v2](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["Swimmer-v2"](https://gym.openai.com/envs/Swimmer-v2/), and [Walker2d-v2"](https://gym.openai.com/envs/Walker2d-v2/).
Provided are experiment configs for training a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](https://allenact.org/api/allenact_plugins/gym_plugin/gym_models/#memorylessactorcritic), with a [Gaussian distribution](https://allenact.org/api/allenact_plugins/gym_plugin/gym_distributions/#gaussiandistr) to sample actions for all continuous-control environments under the `MuJoCo` group of `Gym` environments.
The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm.
To train an experiment run the following command from the `allenact` root directory:
```bash
python main.py -o
```
Where `` is the path of the directory where we want the model weights
and logs to be stored and `` is the path to the python file containing
the experiment configuration. An example usage of this command would be:
```bash
python main.py projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py -o /YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo
```
This trains a lightweight implementation with separate MLPs for actors and critic with a Gaussian distribution to sample actions in the "Ant-v2" environment, and stores the model weights and logs
to `/YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo`.
## Results
In our experiments, the rewards for MuJoCo environments we obtained after training using PPO are similar to those reported by OpenAI Gym Baselines(1M steps). The Humanoid environment is compared with the original PPO paper where training 50M steps using PPO. Due to the time constraint, we only tested our baseline across two seeds so far.
| Environment | Gym Baseline Reward | Ours Reward |
| ----------- | ------------------- | ----------- |
|[Ant-v2](https://gym.openai.com/envs/Ant-v2/)| 1083.2 |1098.6(reached 4719 in 25M steps) |
| [HalfCheetah-v2](https://gym.openai.com/envs/HalfCheetah-v2/) | 1795.43 | 1741(reached 4019 in 18M steps) |
|[Hopper-v2](https://gym.openai.com/envs/Hopper-v2/)|2316.16|2266|
|[Humanoid-v2](https://gym.openai.com/envs/Humanoid-v2/)|4000+|4500+(reached 6500 in 70M steps)|
| [InvertedPendulum-v2](https://gym.openai.com/envs/InvertedPendulum-v2/) | 809.43 | 1000 |
|[Reacher-v2](https://gym.openai.com/envs/Reacher-v2/)|-6.71|-7.045|
|[Swimmer-v2](https://gym.openai.com/envs/Swimmer-v2/)|111.19|124.7|
|[Walker2d](https://gym.openai.com/envs/Walker2d-v2/)|3424.95|2723 in 10M steps|
================================================
FILE: projects/gym_baselines/__init__.py
================================================
================================================
FILE: projects/gym_baselines/experiments/__init__.py
================================================
================================================
FILE: projects/gym_baselines/experiments/gym_base.py
================================================
from abc import ABC
from typing import Dict, Sequence, Optional, List, Any
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import Sensor
class GymBaseConfig(ExperimentConfig, ABC):
SENSORS: Optional[Sequence[Sensor]] = None
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
raise NotImplementedError
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="train", seeds=seeds
)
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="valid", seeds=seeds
)
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)
================================================
FILE: projects/gym_baselines/experiments/gym_humanoid_base.py
================================================
from abc import ABC
from typing import Dict, Any
from allenact.utils.viz_utils import VizSuite, AgentViewViz
from projects.gym_baselines.experiments.gym_base import GymBaseConfig
class GymHumanoidBaseConfig(GymBaseConfig, ABC):
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
visualizer = None
if mode == "test":
visualizer = VizSuite(
mode=mode,
video_viz=AgentViewViz(
label="episode_vid",
max_clip_length=400,
vector_task_source=("render", {"mode": "rgb_array"}),
fps=30,
),
)
return {
"nprocesses": 8 if mode == "train" else 1, # rollout
"devices": [],
"visualizer": visualizer,
}
================================================
FILE: projects/gym_baselines/experiments/gym_humanoid_ddppo.py
================================================
from abc import ABC
from typing import cast
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from projects.gym_baselines.experiments.gym_humanoid_base import GymHumanoidBaseConfig
class GymHumanoidPPOConfig(GymHumanoidBaseConfig, ABC):
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
lr = 1e-4
ppo_steps = int(8e7) # convergence may be after 1e8
clip_param = 0.1
value_loss_coef = 0.5
entropy_coef = 0.0
num_mini_batch = 4 # optimal 64
update_repeats = 10
max_grad_norm = 0.5
num_steps = 2048
gamma = 0.99
use_gae = True
gae_lambda = 0.95
advance_scene_rollout_period = None
save_interval = 200000
metric_accumulate_interval = 50000
return TrainingPipeline(
named_losses=dict(
ppo_loss=PPO(
clip_param=clip_param,
value_loss_coef=value_loss_coef,
entropy_coef=entropy_coef,
),
), # type:ignore
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=advance_scene_rollout_period,
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
lr_scheduler_builder=Builder(
LambdaLR,
{
"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=1)
}, # constant learning rate
),
)
================================================
FILE: projects/gym_baselines/experiments/gym_mujoco_base.py
================================================
from abc import ABC
from typing import Dict, Any
from allenact.utils.viz_utils import VizSuite, AgentViewViz
from projects.gym_baselines.experiments.gym_base import GymBaseConfig
class GymMoJoCoBaseConfig(GymBaseConfig, ABC):
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
visualizer = None
if mode == "test":
visualizer = VizSuite(
mode=mode,
video_viz=AgentViewViz(
label="episode_vid",
max_clip_length=400,
vector_task_source=("render", {"mode": "rgb_array"}),
fps=30,
),
)
return {
"nprocesses": 8 if mode == "train" else 1, # rollout
"devices": [],
"visualizer": visualizer,
}
================================================
FILE: projects/gym_baselines/experiments/gym_mujoco_ddppo.py
================================================
from abc import ABC
from typing import cast
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from projects.gym_baselines.experiments.gym_mujoco_base import GymMoJoCoBaseConfig
class GymMuJoCoPPOConfig(GymMoJoCoBaseConfig, ABC):
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
lr = 3e-4
ppo_steps = int(3e7)
clip_param = 0.2
value_loss_coef = 0.5
entropy_coef = 0.0
num_mini_batch = 4 # optimal 64
update_repeats = 10
max_grad_norm = 0.5
num_steps = 2048
gamma = 0.99
use_gae = True
gae_lambda = 0.95
advance_scene_rollout_period = None
save_interval = 200000
metric_accumulate_interval = 50000
return TrainingPipeline(
named_losses=dict(
ppo_loss=PPO(
clip_param=clip_param,
value_loss_coef=value_loss_coef,
entropy_coef=entropy_coef,
),
), # type:ignore
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=advance_scene_rollout_period,
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
lr_scheduler_builder=Builder(
LambdaLR,
{"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
),
)
================================================
FILE: projects/gym_baselines/experiments/mujoco/__init__.py
================================================
================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py
================================================
from typing import Dict, List, Any
import gym
import torch.nn as nn
from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig
class GymMuJoCoAntConfig(GymMuJoCoPPOConfig):
SENSORS = [
GymMuJoCoSensor(gym_env_name="Ant-v2", uuid="gym_mujoco_data"),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
action_space = gym.spaces.Box(-3.0, 3.0, (8,), "float32")
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=action_space, # specific action_space
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="Ant-v2", **kwargs)
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["Ant-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
@classmethod
def tag(cls) -> str:
return "Gym-MuJoCo-Ant-v2-PPO"
================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_halfcheetah_ddppo.py
================================================
from typing import Dict, List, Any
import gym
import torch.nn as nn
from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig
class GymMuJoCoHalfCheetahConfig(GymMuJoCoPPOConfig):
SENSORS = [
GymMuJoCoSensor(gym_env_name="HalfCheetah-v2", uuid="gym_mujoco_data"),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
action_space = gym.spaces.Box(-1.0, 1.0, (6,), "float32")
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=action_space, # specific action_space
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="HalfCheetah-v2", **kwargs)
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["HalfCheetah-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
@classmethod
def tag(cls) -> str:
return "Gym-MuJoCo-HalfCheetah-v2-PPO"
================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_hopper_ddppo.py
================================================
from typing import Dict, List, Any
import gym
import torch.nn as nn
from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig
class GymMuJoCoHopperConfig(GymMuJoCoPPOConfig):
SENSORS = [
GymMuJoCoSensor(gym_env_name="Hopper-v2", uuid="gym_mujoco_data"),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
action_space = gym.spaces.Box(-1.0, 1.0, (3,), "float32")
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=action_space, # specific action_space
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="Hopper-v2", **kwargs)
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["Hopper-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
@classmethod
def tag(cls) -> str:
return "Gym-MuJoCo-Hopper-v2-PPO"
================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_humanoid_ddppo.py
================================================
from typing import Dict, List, Any
import gym
import torch.nn as nn
from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from projects.gym_baselines.experiments.gym_humanoid_ddppo import GymHumanoidPPOConfig
class GymMuJoCoHumanoidConfig(GymHumanoidPPOConfig):
SENSORS = [
GymMuJoCoSensor(gym_env_name="Humanoid-v2", uuid="gym_mujoco_data"),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
action_space = gym.spaces.Box(
-0.4000000059604645, 0.4000000059604645, (17,), "float32"
)
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=action_space, # specific action_space
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="Humanoid-v2", **kwargs)
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["Humanoid-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
@classmethod
def tag(cls) -> str:
return "Gym-MuJoCo-Humanoid-v2-PPO"
================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_inverteddoublependulum_ddppo.py
================================================
from typing import Dict, List, Any
import gym
import torch.nn as nn
from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig
class GymMuJoInvertedDoublePendulumConfig(GymMuJoCoPPOConfig):
SENSORS = [
GymMuJoCoSensor(
gym_env_name="InvertedDoublePendulum-v2", uuid="gym_mujoco_data"
),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
action_space = gym.spaces.Box(-1.0, 1.0, (1,), "float32")
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=action_space, # specific action_space
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="InvertedDoublePendulum-v2", **kwargs)
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["InvertedDoublePendulum-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
@classmethod
def tag(cls) -> str:
return "Gym-MuJoCo-InvertedDoublePendulum-v2-PPO"
================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_invertedpendulum_ddppo.py
================================================
from typing import Dict, List, Any
import gym
import torch.nn as nn
from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig
class GymMuJoCoInvertedPendulumConfig(GymMuJoCoPPOConfig):
SENSORS = [
GymMuJoCoSensor(gym_env_name="InvertedPendulum-v2", uuid="gym_mujoco_data"),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
action_space = gym.spaces.Box(-3.0, 3.0, (1,), "float32")
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=action_space, # specific action_space
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="InvertedPendulum-v2", **kwargs)
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["InvertedPendulum-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
@classmethod
def tag(cls) -> str:
return "Gym-MuJoCo-InvertedPendulum-v2-PPO"
================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_reacher_ddppo.py
================================================
from typing import Dict, List, Any
import gym
import torch.nn as nn
from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig
class GymMuJoCoReacherConfig(GymMuJoCoPPOConfig):
SENSORS = [
GymMuJoCoSensor(gym_env_name="Reacher-v2", uuid="gym_mujoco_data"),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
action_space = gym.spaces.Box(-1.0, 1.0, (2,), "float32")
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=action_space, # specific action_space
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="Reacher-v2", **kwargs)
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["Reacher-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
@classmethod
def tag(cls) -> str:
return "Gym-MuJoCo-Reacher-v2-PPO"
================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_swimmer_ddppo.py
================================================
from typing import Dict, List, Any
import gym
import torch.nn as nn
from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig
class GymMuJoCoSwimmerConfig(GymMuJoCoPPOConfig):
SENSORS = [
GymMuJoCoSensor(gym_env_name="Swimmer-v2", uuid="gym_mujoco_data"),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
action_space = gym.spaces.Box(-1.0, 1.0, (2,), "float32")
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=action_space, # specific action_space
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="Swimmer-v2", **kwargs)
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["Swimmer-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
@classmethod
def tag(cls) -> str:
return "Gym-MuJoCo-Swimmer-v2-PPO"
================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_walker2d_ddppo.py
================================================
from typing import Dict, List, Any
import gym
import torch.nn as nn
from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig
class GymMuJoCoWalkerConfig(GymMuJoCoPPOConfig):
SENSORS = [
GymMuJoCoSensor(gym_env_name="Walker2d-v2", uuid="gym_mujoco_data"),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
action_space = gym.spaces.Box(-1.0, 1.0, (6,), "float32")
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=action_space, # specific action_space
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="Walker2d-v2", **kwargs)
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["Walker2d-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
@classmethod
def tag(cls) -> str:
return "Gym-MuJoCo-Walker2d-v2-PPO"
================================================
FILE: projects/gym_baselines/models/__init__.py
================================================
================================================
FILE: projects/gym_baselines/models/gym_models.py
================================================
"""
Note: I add this file just for the format consistence with other baselines in the project, so it is just the same as
`allenact_plugins.gym_models.py` so far. However, if it is in the Gym Robotics, some modification is need.
For example, for `state_dim`:
if input_uuid == 'gym_robotics_data':
# consider that the observation space is Dict for robotics env
state_dim = observation_space[self.input_uuid]['observation'].shape[0]
else:
assert len(observation_space[self.input_uuid].shape) == 1
state_dim = observation_space[self.input_uuid].shape[0]
"""
================================================
FILE: projects/manipulathor_baselines/__init__.py
================================================
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/__init__.py
================================================
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/__init__.py
================================================
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_base.py
================================================
from abc import ABC
from typing import Optional, Sequence, Union
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import Sensor
from allenact.utils.experiment_utils import Builder
class ArmPointNavBaseConfig(ExperimentConfig, ABC):
"""The base object navigation configuration file."""
ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
SENSORS: Optional[Sequence[Sensor]] = None
STEP_SIZE = 0.25
ROTATION_DEGREES = 45.0
VISIBILITY_DISTANCE = 1.0
STOCHASTIC = False
CAMERA_WIDTH = 224
CAMERA_HEIGHT = 224
SCREEN_SIZE = 224
MAX_STEPS = 200
def __init__(self):
self.REWARD_CONFIG = {
"step_penalty": -0.01,
"goal_success_reward": 10.0,
"pickup_success_reward": 5.0,
"failed_stop_reward": 0.0,
"shaping_weight": 1.0, # we are not using this
"failed_action_penalty": -0.03,
}
@classmethod
def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return tuple()
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_ddppo.py
================================================
import torch.optim as optim
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
Builder,
PipelineStage,
TrainingPipeline,
LinearDecay,
)
from torch.optim.lr_scheduler import LambdaLR
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import (
ArmPointNavBaseConfig,
)
class ArmPointNavMixInPPOConfig(ArmPointNavBaseConfig):
def training_pipeline(self, **kwargs):
ppo_steps = int(300000000)
lr = 3e-4
num_mini_batch = 1
update_repeats = 4
num_steps = self.MAX_STEPS
save_interval = 500000 # from 50k
log_interval = 1000
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={"ppo_loss": PPO(**PPOConfig)},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_simplegru.py
================================================
from typing import Sequence, Union
import gym
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import Builder
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import (
ArmPointNavBaseConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.models.arm_pointnav_models import (
ArmPointNavBaselineActorCritic,
)
class ArmPointNavMixInSimpleGRUConfig(ArmPointNavBaseConfig):
TASK_SAMPLER: TaskSampler
@classmethod
def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
preprocessors = []
return preprocessors
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return ArmPointNavBaselineActorCritic(
action_space=gym.spaces.Discrete(
len(cls.TASK_SAMPLER._TASK_TYPE.class_action_names())
),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
hidden_size=512,
)
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.py
================================================
import platform
from abc import ABC
from math import ceil
from typing import Dict, Any, List, Optional, Sequence
import gym
import numpy as np
import torch
from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
SimpleArmPointNavGeneralSampler,
)
from allenact_plugins.manipulathor_plugin.manipulathor_viz import (
ImageVisualizer,
TestMetricLogger,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import (
ArmPointNavBaseConfig,
)
class ArmPointNavThorBaseConfig(ArmPointNavBaseConfig, ABC):
"""The base config for all iTHOR PointNav experiments."""
TASK_SAMPLER = SimpleArmPointNavGeneralSampler
VISUALIZE = False
if platform.system() == "Darwin":
VISUALIZE = True
NUM_PROCESSES: Optional[int] = None
TRAIN_GPU_IDS = list(range(torch.cuda.device_count()))
SAMPLER_GPU_IDS = TRAIN_GPU_IDS
VALID_GPU_IDS = [torch.cuda.device_count() - 1]
TEST_GPU_IDS = [torch.cuda.device_count() - 1]
TRAIN_DATASET_DIR: Optional[str] = None
VAL_DATASET_DIR: Optional[str] = None
CAP_TRAINING = None
TRAIN_SCENES: Optional[List[str]] = None
VAL_SCENES: Optional[List[str]] = None
TEST_SCENES: Optional[List[str]] = None
OBJECT_TYPES: Optional[Sequence[str]] = None
VALID_SAMPLES_IN_SCENE = 1
TEST_SAMPLES_IN_SCENE = 1
NUMBER_OF_TEST_PROCESS = 10
def __init__(self):
super().__init__()
assert (
self.CAMERA_WIDTH == 224
and self.CAMERA_HEIGHT == 224
and self.VISIBILITY_DISTANCE == 1
and self.STEP_SIZE == 0.25
)
self.ENV_ARGS = ENV_ARGS
def machine_params(self, mode="train", **kwargs):
sampler_devices: Sequence[int] = []
if mode == "train":
workers_per_device = 1
gpu_ids = (
[]
if not torch.cuda.is_available()
else self.TRAIN_GPU_IDS * workers_per_device
)
nprocesses = (
1
if not torch.cuda.is_available()
else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
)
sampler_devices = self.SAMPLER_GPU_IDS
elif mode == "valid":
nprocesses = 1
gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS
elif mode == "test":
nprocesses = self.NUMBER_OF_TEST_PROCESS if torch.cuda.is_available() else 1
gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
sensors = [*self.SENSORS]
if mode != "train":
sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)]
sensor_preprocessor_graph = (
SensorPreprocessorGraph(
source_observation_spaces=SensorSuite(sensors).observation_spaces,
preprocessors=self.preprocessors(),
)
if mode == "train"
or (
(isinstance(nprocesses, int) and nprocesses > 0)
or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
)
else None
)
return MachineParams(
nprocesses=nprocesses,
devices=gpu_ids,
sampler_devices=(
sampler_devices if mode == "train" else gpu_ids
), # ignored with > 1 gpu_ids
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
from datetime import datetime
now = datetime.now()
exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f")
if cls.VISUALIZE:
visualizers = [
ImageVisualizer(exp_name=exp_name_w_time),
TestMetricLogger(exp_name=exp_name_w_time),
]
kwargs["visualizers"] = visualizers
kwargs["objects"] = cls.OBJECT_TYPES
kwargs["exp_name"] = exp_name_w_time
return cls.TASK_SAMPLER(**kwargs)
@staticmethod
def _partition_inds(n: int, num_parts: int):
return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
np.int32
)
def _get_sampler_args_for_scene_split(
self,
scenes: List[str],
process_ind: int,
total_processes: int,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
if total_processes > len(scenes): # oversample some scenes -> bias
if total_processes % len(scenes) != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisible by the number of scenes"
)
scenes = scenes * int(ceil(total_processes / len(scenes)))
scenes = scenes[: total_processes * (len(scenes) // total_processes)]
else:
if len(scenes) % total_processes != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisor of the number of scenes"
)
inds = self._partition_inds(len(scenes), total_processes)
return {
"scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
"env_args": self.ENV_ARGS,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(
len(self.TASK_SAMPLER._TASK_TYPE.class_action_names())
),
"seed": seeds[process_ind] if seeds is not None else None,
"deterministic_cudnn": deterministic_cudnn,
"rewards_config": self.REWARD_CONFIG,
}
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
self.TRAIN_SCENES,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_period"] = "manual"
res["sampler_mode"] = "train"
res["cap_training"] = self.CAP_TRAINING
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None
)
return res
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]],
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
self.VALID_SCENES,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_period"] = self.VALID_SAMPLES_IN_SCENE
res["sampler_mode"] = "val"
res["cap_training"] = self.CAP_TRAINING
res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"])
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None
)
return res
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]],
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
self.TEST_SCENES,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_period"] = self.TEST_SAMPLES_IN_SCENE
res["sampler_mode"] = "test"
res["env_args"] = {}
res["cap_training"] = self.CAP_TRAINING
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None
)
return res
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/__init__.py
================================================
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_depth.py
================================================
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_sensors import (
DepthSensorThor,
RelativeAgentArmToObjectSensor,
RelativeObjectToGoalSensor,
PickedUpObjSensor,
)
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (
ArmPointNavMixInPPOConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (
ArmPointNavMixInSimpleGRUConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (
ArmPointNaviThorBaseConfig,
)
class ArmPointNavDepth(
ArmPointNaviThorBaseConfig,
ArmPointNavMixInPPOConfig,
ArmPointNavMixInSimpleGRUConfig,
):
"""An Object Navigation experiment configuration in iThor with RGB
input."""
SENSORS = [
DepthSensorThor(
height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
RelativeAgentArmToObjectSensor(),
RelativeObjectToGoalSensor(),
PickedUpObjSensor(),
]
MAX_STEPS = 200
TASK_SAMPLER = ArmPointNavTaskSampler
def __init__(self):
super().__init__()
assert (
self.CAMERA_WIDTH == 224
and self.CAMERA_HEIGHT == 224
and self.VISIBILITY_DISTANCE == 1
and self.STEP_SIZE == 0.25
)
self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": True}
@classmethod
def tag(cls):
return cls.__name__
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_disjoint_depth.py
================================================
import gym
import torch.nn as nn
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_depth import (
ArmPointNavDepth,
)
from projects.manipulathor_baselines.armpointnav_baselines.models.disjoint_arm_pointnav_models import (
DisjointArmPointNavBaselineActorCritic,
)
class ArmPointNavDisjointDepth(ArmPointNavDepth):
"""An Object Navigation experiment configuration in iThor with RGB
input."""
TASK_SAMPLER = ArmPointNavTaskSampler
def __init__(self):
super().__init__()
assert (
self.CAMERA_WIDTH == 224
and self.CAMERA_HEIGHT == 224
and self.VISIBILITY_DISTANCE == 1
and self.STEP_SIZE == 0.25
)
self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": True}
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return DisjointArmPointNavBaselineActorCritic(
action_space=gym.spaces.Discrete(
len(cls.TASK_SAMPLER._TASK_TYPE.class_action_names())
),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
hidden_size=512,
)
@classmethod
def tag(cls):
return cls.__name__
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_ithor_base.py
================================================
from abc import ABC
from allenact_plugins.manipulathor_plugin.armpointnav_constants import (
TRAIN_OBJECTS,
TEST_OBJECTS,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_thor_base import (
ArmPointNavThorBaseConfig,
)
class ArmPointNaviThorBaseConfig(ArmPointNavThorBaseConfig, ABC):
"""The base config for all iTHOR ObjectNav experiments."""
NUM_PROCESSES = 40
# add all the arguments here
TOTAL_NUMBER_SCENES = 30
TRAIN_SCENES = [
"FloorPlan{}_physics".format(str(i))
for i in range(1, TOTAL_NUMBER_SCENES + 1)
if (i % 3 == 1 or i % 3 == 0) and i != 28
] # last scenes are really bad
TEST_SCENES = [
"FloorPlan{}_physics".format(str(i))
for i in range(1, TOTAL_NUMBER_SCENES + 1)
if i % 3 == 2 and i % 6 == 2
]
VALID_SCENES = [
"FloorPlan{}_physics".format(str(i))
for i in range(1, TOTAL_NUMBER_SCENES + 1)
if i % 3 == 2 and i % 6 == 5
]
ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES
assert (
len(ALL_SCENES) == TOTAL_NUMBER_SCENES - 1
and len(set(ALL_SCENES)) == TOTAL_NUMBER_SCENES - 1
)
OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS))
UNSEEN_OBJECT_TYPES = tuple(sorted(TEST_OBJECTS))
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_no_vision.py
================================================
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_sensors import (
NoVisionSensorThor,
RelativeAgentArmToObjectSensor,
RelativeObjectToGoalSensor,
PickedUpObjSensor,
)
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (
ArmPointNavMixInPPOConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (
ArmPointNavMixInSimpleGRUConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (
ArmPointNaviThorBaseConfig,
)
class ArmPointNavNoVision(
ArmPointNaviThorBaseConfig,
ArmPointNavMixInPPOConfig,
ArmPointNavMixInSimpleGRUConfig,
):
"""An Object Navigation experiment configuration in iThor with RGB
input."""
SENSORS = [
NoVisionSensorThor(
height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=False,
uuid="rgb_lowres",
),
RelativeAgentArmToObjectSensor(),
RelativeObjectToGoalSensor(),
PickedUpObjSensor(),
]
MAX_STEPS = 200
TASK_SAMPLER = ArmPointNavTaskSampler #
def __init__(self):
super().__init__()
assert (
self.CAMERA_WIDTH == 224
and self.CAMERA_HEIGHT == 224
and self.VISIBILITY_DISTANCE == 1
and self.STEP_SIZE == 0.25
)
self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": False}
@classmethod
def tag(cls):
return cls.__name__
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgb.py
================================================
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_sensors import (
RelativeAgentArmToObjectSensor,
RelativeObjectToGoalSensor,
PickedUpObjSensor,
)
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (
ArmPointNavMixInPPOConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (
ArmPointNavMixInSimpleGRUConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (
ArmPointNaviThorBaseConfig,
)
class ArmPointNavRGB(
ArmPointNaviThorBaseConfig,
ArmPointNavMixInPPOConfig,
ArmPointNavMixInSimpleGRUConfig,
):
"""An Object Navigation experiment configuration in iThor with RGB
input."""
SENSORS = [
RGBSensorThor(
height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
RelativeAgentArmToObjectSensor(),
RelativeObjectToGoalSensor(),
PickedUpObjSensor(),
]
MAX_STEPS = 200
TASK_SAMPLER = ArmPointNavTaskSampler #
def __init__(self):
super().__init__()
assert (
self.CAMERA_WIDTH == 224
and self.CAMERA_HEIGHT == 224
and self.VISIBILITY_DISTANCE == 1
and self.STEP_SIZE == 0.25
)
self.ENV_ARGS = {**ENV_ARGS}
@classmethod
def tag(cls):
return cls.__name__
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgbdepth.py
================================================
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_sensors import (
DepthSensorThor,
RelativeAgentArmToObjectSensor,
RelativeObjectToGoalSensor,
PickedUpObjSensor,
)
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (
ArmPointNavMixInPPOConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (
ArmPointNavMixInSimpleGRUConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (
ArmPointNaviThorBaseConfig,
)
class ArmPointNavRGBDepth(
ArmPointNaviThorBaseConfig,
ArmPointNavMixInPPOConfig,
ArmPointNavMixInSimpleGRUConfig,
):
"""An Object Navigation experiment configuration in iThor with RGB
input."""
SENSORS = [
DepthSensorThor(
height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
RGBSensorThor(
height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
RelativeAgentArmToObjectSensor(),
RelativeObjectToGoalSensor(),
PickedUpObjSensor(),
]
MAX_STEPS = 200
TASK_SAMPLER = ArmPointNavTaskSampler #
def __init__(self):
super().__init__()
assert (
self.CAMERA_WIDTH == 224
and self.CAMERA_HEIGHT == 224
and self.VISIBILITY_DISTANCE == 1
and self.STEP_SIZE == 0.25
)
self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": True}
@classmethod
def tag(cls):
return cls.__name__
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/__init__.py
================================================
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.py
================================================
"""Baseline models for use in the Arm Point Navigation task.
Arm Point Navigation is currently available as a Task in ManipulaTHOR.
"""
from typing import Tuple, Optional
import gym
import torch
from gym.spaces.dict import Dict as SpaceDict
from allenact.algorithms.onpolicy_sync.policy import (
ActorCriticModel,
LinearCriticHead,
LinearActorHead,
DistributionType,
Memory,
ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.embodiedai.models.basic_models import SimpleCNN, RNNStateEncoder
from projects.manipulathor_baselines.armpointnav_baselines.models.manipulathor_net_utils import (
input_embedding_net,
)
class ArmPointNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]):
"""Baseline recurrent actor critic model for armpointnav task.
# Attributes
action_space : The space of actions available to the agent. Currently only discrete
actions are allowed (so this space will always be of type `gym.spaces.Discrete`).
observation_space : The observation space expected by the agent. This observation space
should include (optionally) 'rgb' images and 'depth' images.
hidden_size : The hidden size of the GRU RNN.
object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal
object type.
"""
def __init__(
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
hidden_size=512,
obj_state_embedding_size=512,
trainable_masked_hidden_state: bool = False,
num_rnn_layers=1,
rnn_type="GRU",
):
"""Initializer.
See class documentation for parameter definitions.
"""
super().__init__(action_space=action_space, observation_space=observation_space)
self._hidden_size = hidden_size
self.object_type_embedding_size = obj_state_embedding_size
sensor_names = self.observation_space.spaces.keys()
self.visual_encoder = SimpleCNN(
self.observation_space,
self._hidden_size,
rgb_uuid="rgb_lowres" if "rgb_lowres" in sensor_names else None,
depth_uuid="depth_lowres" if "depth_lowres" in sensor_names else None,
)
if "rgb_lowres" in sensor_names and "depth_lowres" in sensor_names:
input_visual_feature_num = 2
elif "rgb_lowres" in sensor_names:
input_visual_feature_num = 1
elif "depth_lowres" in sensor_names:
input_visual_feature_num = 1
else:
raise NotImplementedError
self.state_encoder = RNNStateEncoder(
self._hidden_size * input_visual_feature_num + obj_state_embedding_size,
self._hidden_size,
trainable_masked_hidden_state=trainable_masked_hidden_state,
num_layers=num_rnn_layers,
rnn_type=rnn_type,
)
self.actor = LinearActorHead(self._hidden_size, action_space.n)
self.critic = LinearCriticHead(self._hidden_size)
relative_dist_embedding_size = torch.Tensor([3, 100, obj_state_embedding_size])
self.relative_dist_embedding = input_embedding_net(
relative_dist_embedding_size.long().tolist(), dropout=0
)
self.train()
@property
def recurrent_hidden_state_size(self) -> int:
"""The recurrent hidden state size of the model."""
return self._hidden_size
@property
def num_recurrent_layers(self) -> int:
"""Number of recurrent hidden layers."""
return self.state_encoder.num_recurrent_layers
def _recurrent_memory_specification(self):
return dict(
rnn=(
(
("layer", self.num_recurrent_layers),
("sampler", None),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
)
def get_relative_distance_embedding(
self, state_tensor: torch.Tensor
) -> torch.FloatTensor:
return self.relative_dist_embedding(state_tensor)
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
"""Processes input batched observations to produce new actor and critic
values. Processes input batched observations (along with prior hidden
states, previous actions, and masks denoting which recurrent hidden
states should be masked) and returns an `ActorCriticOutput` object
containing the model's policy (distribution over actions) and
evaluation of the current state (value).
# Parameters
observations : Batched input observations.
memory : `Memory` containing the hidden states from initial timepoints.
prev_actions : Tensor of previous actions taken.
masks : Masks applied to hidden states. See `RNNStateEncoder`.
# Returns
Tuple of the `ActorCriticOutput` and recurrent hidden state.
"""
arm2obj_dist = self.get_relative_distance_embedding(
observations["relative_agent_arm_to_obj"]
)
obj2goal_dist = self.get_relative_distance_embedding(
observations["relative_obj_to_goal"]
)
perception_embed = self.visual_encoder(observations)
pickup_bool = observations["pickedup_object"]
after_pickup = pickup_bool == 1
distances = arm2obj_dist
distances[after_pickup] = obj2goal_dist[after_pickup]
x = [distances, perception_embed]
x_cat = torch.cat(x, dim=-1)
x_out, rnn_hidden_states = self.state_encoder(
x_cat, memory.tensor("rnn"), masks
)
actor_out = self.actor(x_out)
critic_out = self.critic(x_out)
actor_critic_output = ActorCriticOutput(
distributions=actor_out, values=critic_out, extras={}
)
updated_memory = memory.set_tensor("rnn", rnn_hidden_states)
return (
actor_critic_output,
updated_memory,
)
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/base_models.py
================================================
import torch
import torch.nn as nn
class LinearActorHeadNoCategory(nn.Module):
def __init__(self, num_inputs: int, num_outputs: int):
super().__init__()
self.linear = nn.Linear(num_inputs, num_outputs)
nn.init.orthogonal_(self.linear.weight, gain=0.01)
nn.init.constant_(self.linear.bias, 0)
def forward(self, x: torch.FloatTensor): # type: ignore
x = self.linear(x) # type:ignore
assert len(x.shape) == 3
return x
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.py
================================================
"""Baseline models for use in the Arm Point Navigation task.
Arm Point Navigation is currently available as a Task in ManipulaTHOR.
"""
from typing import Tuple, Optional
import gym
import torch
from gym.spaces.dict import Dict as SpaceDict
from allenact.algorithms.onpolicy_sync.policy import (
ActorCriticModel,
LinearCriticHead,
DistributionType,
Memory,
ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.embodiedai.models.basic_models import SimpleCNN, RNNStateEncoder
from projects.manipulathor_baselines.armpointnav_baselines.models.base_models import (
LinearActorHeadNoCategory,
)
from projects.manipulathor_baselines.armpointnav_baselines.models.manipulathor_net_utils import (
input_embedding_net,
)
class DisjointArmPointNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]):
"""Disjoint Baseline recurrent actor critic model for armpointnav.
# Attributes
action_space : The space of actions available to the agent. Currently only discrete
actions are allowed (so this space will always be of type `gym.spaces.Discrete`).
observation_space : The observation space expected by the agent. This observation space
should include (optionally) 'rgb' images and 'depth' images and is required to
have a component corresponding to the goal `goal_sensor_uuid`.
goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor`
as an example of such a sensor.
hidden_size : The hidden size of the GRU RNN.
object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal
object type.
"""
def __init__(
self,
action_space: gym.spaces.Discrete,
observation_space: SpaceDict,
hidden_size=512,
obj_state_embedding_size=512,
trainable_masked_hidden_state: bool = False,
num_rnn_layers=1,
rnn_type="GRU",
):
"""Initializer.
See class documentation for parameter definitions.
"""
super().__init__(action_space=action_space, observation_space=observation_space)
self._hidden_size = hidden_size
self.object_type_embedding_size = obj_state_embedding_size
self.visual_encoder_pick = SimpleCNN(
self.observation_space,
self._hidden_size,
rgb_uuid=None,
depth_uuid="depth_lowres",
)
self.visual_encoder_drop = SimpleCNN(
self.observation_space,
self._hidden_size,
rgb_uuid=None,
depth_uuid="depth_lowres",
)
self.state_encoder = RNNStateEncoder(
self._hidden_size + obj_state_embedding_size,
self._hidden_size,
trainable_masked_hidden_state=trainable_masked_hidden_state,
num_layers=num_rnn_layers,
rnn_type=rnn_type,
)
self.actor_pick = LinearActorHeadNoCategory(self._hidden_size, action_space.n)
self.critic_pick = LinearCriticHead(self._hidden_size)
self.actor_drop = LinearActorHeadNoCategory(self._hidden_size, action_space.n)
self.critic_drop = LinearCriticHead(self._hidden_size)
# self.object_state_embedding = nn.Embedding(num_embeddings=6, embedding_dim=obj_state_embedding_size)
relative_dist_embedding_size = torch.Tensor([3, 100, obj_state_embedding_size])
self.relative_dist_embedding_pick = input_embedding_net(
relative_dist_embedding_size.long().tolist(), dropout=0
)
self.relative_dist_embedding_drop = input_embedding_net(
relative_dist_embedding_size.long().tolist(), dropout=0
)
self.train()
@property
def recurrent_hidden_state_size(self) -> int:
"""The recurrent hidden state size of the model."""
return self._hidden_size
@property
def num_recurrent_layers(self) -> int:
"""Number of recurrent hidden layers."""
return self.state_encoder.num_recurrent_layers
def _recurrent_memory_specification(self):
return dict(
rnn=(
(
("layer", self.num_recurrent_layers),
("sampler", None),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
)
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
"""Processes input batched observations to produce new actor and critic
values. Processes input batched observations (along with prior hidden
states, previous actions, and masks denoting which recurrent hidden
states should be masked) and returns an `ActorCriticOutput` object
containing the model's policy (distribution over actions) and
evaluation of the current state (value).
# Parameters
observations : Batched input observations.
memory : `Memory` containing the hidden states from initial timepoints.
prev_actions : Tensor of previous actions taken.
masks : Masks applied to hidden states. See `RNNStateEncoder`.
# Returns
Tuple of the `ActorCriticOutput` and recurrent hidden state.
"""
arm2obj_dist = self.relative_dist_embedding_pick(
observations["relative_agent_arm_to_obj"]
)
obj2goal_dist = self.relative_dist_embedding_drop(
observations["relative_obj_to_goal"]
)
perception_embed_pick = self.visual_encoder_pick(observations)
perception_embed_drop = self.visual_encoder_drop(observations)
pickup_bool = observations["pickedup_object"]
after_pickup = pickup_bool == 1
distances = arm2obj_dist
distances[after_pickup] = obj2goal_dist[after_pickup]
perception_embed = perception_embed_pick
perception_embed[after_pickup] = perception_embed_drop[after_pickup]
x = [distances, perception_embed]
x_cat = torch.cat(x, dim=-1) # type: ignore
x_out, rnn_hidden_states = self.state_encoder(
x_cat, memory.tensor("rnn"), masks
)
actor_out_pick = self.actor_pick(x_out)
critic_out_pick = self.critic_pick(x_out)
actor_out_drop = self.actor_drop(x_out)
critic_out_drop = self.critic_drop(x_out)
actor_out = actor_out_pick
actor_out[after_pickup] = actor_out_drop[after_pickup]
critic_out = critic_out_pick
critic_out[after_pickup] = critic_out_drop[after_pickup]
actor_out = CategoricalDistr(logits=actor_out)
actor_critic_output = ActorCriticOutput(
distributions=actor_out, values=critic_out, extras={}
)
updated_memory = memory.set_tensor("rnn", rnn_hidden_states)
return (
actor_critic_output,
updated_memory,
)
================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.py
================================================
import pdb
import torch.nn as nn
import torch.nn.functional as F
def upshuffle(
in_planes, out_planes, upscale_factor, kernel_size=3, stride=1, padding=1
):
return nn.Sequential(
nn.Conv2d(
in_planes,
out_planes * upscale_factor**2,
kernel_size=kernel_size,
stride=stride,
padding=padding,
),
nn.PixelShuffle(upscale_factor),
nn.LeakyReLU(),
)
def upshufflenorelu(
in_planes, out_planes, upscale_factor, kernel_size=3, stride=1, padding=1
):
return nn.Sequential(
nn.Conv2d(
in_planes,
out_planes * upscale_factor**2,
kernel_size=kernel_size,
stride=stride,
padding=padding,
),
nn.PixelShuffle(upscale_factor),
)
def combine_block_w_bn(in_planes, out_planes):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, 1, 1),
nn.BatchNorm2d(out_planes),
nn.LeakyReLU(),
)
def conv2d_block(in_planes, out_planes, kernel_size, stride=1, padding=1):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride, padding=padding),
nn.BatchNorm2d(out_planes),
nn.LeakyReLU(),
nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(out_planes),
)
def combine_block_w_do(in_planes, out_planes, dropout=0.0):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, 1, 1),
nn.LeakyReLU(),
nn.Dropout(dropout),
)
def combine_block_no_do(in_planes, out_planes):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, 1, 1),
nn.LeakyReLU(),
)
def linear_block(in_features, out_features, dropout=0.0):
return nn.Sequential(
nn.Linear(in_features, out_features),
nn.LeakyReLU(),
nn.Dropout(dropout),
)
def linear_block_norelu(in_features, out_features):
return nn.Sequential(
nn.Linear(in_features, out_features),
)
def input_embedding_net(list_of_feature_sizes, dropout=0.0):
modules = []
for i in range(len(list_of_feature_sizes) - 1):
input_size, output_size = list_of_feature_sizes[i : i + 2]
if i + 2 == len(list_of_feature_sizes):
modules.append(linear_block_norelu(input_size, output_size))
else:
modules.append(linear_block(input_size, output_size, dropout=dropout))
return nn.Sequential(*modules)
def _upsample_add(x, y):
_, _, H, W = y.size()
return F.upsample(x, size=(H, W), mode="bilinear") + y
def replace_all_relu_w_leakyrelu(model):
pdb.set_trace()
print("Not sure if using this is a good idea")
modules = model._modules
for m in modules.keys():
module = modules[m]
if isinstance(module, nn.ReLU):
model._modules[m] = nn.LeakyReLU()
elif isinstance(module, nn.Module):
model._modules[m] = replace_all_relu_w_leakyrelu(module)
return model
def replace_all_leakyrelu_w_relu(model):
modules = model._modules
for m in modules.keys():
module = modules[m]
if isinstance(module, nn.LeakyReLU):
model._modules[m] = nn.ReLU()
elif isinstance(module, nn.Module):
model._modules[m] = replace_all_leakyrelu_w_relu(module)
return model
def replace_all_bn_w_groupnorm(model):
pdb.set_trace()
print("Not sure if using this is a good idea")
modules = model._modules
for m in modules.keys():
module = modules[m]
if isinstance(module, nn.BatchNorm2d) or isinstance(module, nn.BatchNorm1d):
feature_number = module.num_features
model._modules[m] = nn.GroupNorm(32, feature_number)
elif isinstance(module, nn.BatchNorm3d):
raise Exception("Not implemented")
elif isinstance(module, nn.Module):
model._modules[m] = replace_all_bn_w_groupnorm(module)
return model
def flat_temporal(tensor, batch_size, sequence_length):
tensor_shape = [s for s in tensor.shape]
assert tensor_shape[0] == batch_size and tensor_shape[1] == sequence_length
result_shape = [batch_size * sequence_length] + tensor_shape[2:]
return tensor.contiguous().view(result_shape)
def unflat_temporal(tensor, batch_size, sequence_length):
tensor_shape = [s for s in tensor.shape]
assert tensor_shape[0] == batch_size * sequence_length
result_shape = [batch_size, sequence_length] + tensor_shape[1:]
return tensor.contiguous().view(result_shape)
================================================
FILE: projects/objectnav_baselines/README.md
================================================
# Baseline models ObjectNav (for RoboTHOR/iTHOR)
This project contains the code for training baseline models for the ObjectNav task. In ObjectNav, the agent
spawns at a location in an environment and is tasked to explore the environment until it finds an object of a
certain type (such as TV or Basketball). Once the agent is confident that it has the object within sight
it executes the `END` action which terminates the episode. If the agent is within a set
distance to the target (in our case 1.0 meters) and the target is visible within its observation frame
the agent succeeded, otherwise it failed.
Provided are experiment configs for training a simple convolutional model with
an GRU using `RGB`, `Depth` or `RGB-D` (i.e. `RGB+Depth`) as inputs in
[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).
The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm. For the RoboTHOR environment we also have and experiment
(`objectnav_robothor_rgb_resnetgru_dagger.py`) showing how a model can be trained using DAgger,
a form of imitation learning.
To train an experiment run the following command from the `allenact` root directory:
```bash
python main.py -o -c
```
Where `` is the path of the directory where we want the model weights
and logs to be stored and `` is the path to the python file containing
the experiment configuration. An example usage of this command would be:
```bash
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet_ddppo.py -o storage/objectnav-robothor-rgb
```
This trains a simple convolutional neural network with a GRU using RGB input
passed through a pretrained ResNet-18 visual encoder on the
PointNav task in the RoboTHOR environment and stores the model weights and logs
to `storage/pointnav-robothor-rgb`.
## RoboTHOR ObjectNav 2021 Challenge
The experiment configs found under the `projects/objectnav_baselines/experiments/robothor` directory are designed
to conform to the requirements of the [RoboTHOR ObjectNav 2021 Challenge](https://ai2thor.allenai.org/robothor/cvpr-2021-challenge).
### Training a baseline
To train a baseline ResNet->GRU model taking RGB-D inputs, run the following command
```bash
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet_ddppo.py -o storage/objectnav-robothor-rgbd
```
By default, when using a machine with a GPU, the above experiment will attempt to train using 60 parallel processes
across all available GPUs. See the `TRAIN_GPU_IDS` constant in `experiments/objectnav_thor_base.py` and
the `NUM_PROCESSES` constant in `experiments/robothor/objectnav_robothor_base.py` if you'd like to change which
GPUs are used or how many processes are run respectively.
### Downloading our pretrained model checkpoint
We provide a pretrained model obtained allowing the above command to run for all 300M training steps and then selecting
the model checkpoint with best validation-set performance (for us occuring at ~170M training steps). You can download
this model checkpoint by running
```bash
bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021
```
from the top-level directory. This will download the pretrained model weights and save them at the path
```bash
pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt
```
### Running inference on the pretrained model
You can run inference on the above pretrained model (on the test dataset) by running
```bash
export SAVED_MODEL_PATH=pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.py -c $SAVED_MODEL_PATH --eval
```
To discourage "cheating", the test dataset has been scrubbed of the information needed to actually compute the success rate / SPL
of your model and so running the above will only save the trajectories your models take. To evaluate these
trajectories you will have to submit them to our leaderboard, see [here for more details](https://github.com/allenai/robothor-challenge/).
If you'd like to get a sense of if your model is doing well before submitting to the leaderboard, you can obtain the
success rate / SPL of it on our validation dataset. To do this, you can simply comment-out the line
```python
TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/test")
```
within the `projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py` file and rerun the above
`python main.py ...` command (when the test dataset is not given, the code defaults to using the validation set).
================================================
FILE: projects/objectnav_baselines/__init__.py
================================================
================================================
FILE: projects/objectnav_baselines/experiments/__init__.py
================================================
================================================
FILE: projects/objectnav_baselines/experiments/clip/__init__.py
================================================
================================================
FILE: projects/objectnav_baselines/experiments/clip/mixins.py
================================================
from typing import Sequence, Union, Type, Tuple, Optional, Dict, Any
import attr
import gym
import numpy as np
import torch
import torch.nn as nn
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import (
ObservationType,
Memory,
ActorCriticOutput,
DistributionType,
)
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import Sensor
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import Builder
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.navigation_plugin.objectnav.models import (
ResnetTensorNavActorCritic,
)
class LookDownFirstResnetTensorNavActorCritic(ResnetTensorNavActorCritic):
def __init__(self, look_down_action_index: int, **kwargs):
super().__init__(**kwargs)
self.look_down_action_index = look_down_action_index
self.register_buffer(
"look_down_delta", torch.zeros(1, 1, self.action_space.n), persistent=False
)
self.look_down_delta[0, 0, self.look_down_action_index] = 99999
def forward( # type:ignore
self,
observations: ObservationType,
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
ac_out, memory = super(LookDownFirstResnetTensorNavActorCritic, self).forward(
**prepare_locals_for_super(locals())
)
logits = ac_out.distributions.logits * masks + self.look_down_delta * (
1 - masks
)
ac_out = ActorCriticOutput(
distributions=CategoricalDistr(logits=logits),
values=ac_out.values,
extras=ac_out.extras,
)
return ac_out, memory
@attr.s(kw_only=True)
class ClipResNetPreprocessGRUActorCriticMixin:
sensors: Sequence[Sensor] = attr.ib()
clip_model_type: str = attr.ib()
screen_size: int = attr.ib()
goal_sensor_type: Type[Optional[Sensor]] = attr.ib()
pool: bool = attr.ib(default=False)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
preprocessors = []
rgb_sensor = next((s for s in self.sensors if isinstance(s, RGBSensor)), None)
assert (
np.linalg.norm(
np.array(rgb_sensor._norm_means)
- np.array(ClipResNetPreprocessor.CLIP_RGB_MEANS)
)
< 1e-5
)
assert (
np.linalg.norm(
np.array(rgb_sensor._norm_sds)
- np.array(ClipResNetPreprocessor.CLIP_RGB_STDS)
)
< 1e-5
)
if rgb_sensor is not None:
preprocessors.append(
ClipResNetPreprocessor(
rgb_input_uuid=rgb_sensor.uuid,
clip_model_type=self.clip_model_type,
pool=self.pool,
output_uuid="rgb_clip_resnet",
input_img_height_width=(rgb_sensor.height, rgb_sensor.width),
)
)
depth_sensor = next(
(s for s in self.sensors if isinstance(s, DepthSensor)), None
)
if depth_sensor is not None:
preprocessors.append(
ClipResNetPreprocessor(
rgb_input_uuid=depth_sensor.uuid,
clip_model_type=self.clip_model_type,
pool=self.pool,
output_uuid="depth_clip_resnet",
input_img_height_width=(depth_sensor.height, depth_sensor.width),
)
)
return preprocessors
def create_model(
self,
num_actions: int,
add_prev_actions: bool,
look_down_first: bool = False,
look_down_action_index: Optional[int] = None,
hidden_size: int = 512,
rnn_type="GRU",
model_kwargs: Optional[Dict[str, Any]] = None,
**kwargs
) -> nn.Module:
has_rgb = any(isinstance(s, RGBSensor) for s in self.sensors)
has_depth = any(isinstance(s, DepthSensor) for s in self.sensors)
goal_sensor_uuid = next(
(s.uuid for s in self.sensors if isinstance(s, self.goal_sensor_type)),
None,
)
if model_kwargs is None:
model_kwargs = {}
model_kwargs = dict(
action_space=gym.spaces.Discrete(num_actions),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
goal_sensor_uuid=goal_sensor_uuid,
rgb_resnet_preprocessor_uuid="rgb_clip_resnet" if has_rgb else None,
depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None,
hidden_size=hidden_size,
goal_dims=32,
add_prev_actions=add_prev_actions,
rnn_type=rnn_type,
**model_kwargs
)
if not look_down_first:
return ResnetTensorNavActorCritic(**model_kwargs)
else:
return LookDownFirstResnetTensorNavActorCritic(
look_down_action_index=look_down_action_index, **model_kwargs
)
================================================
FILE: projects/objectnav_baselines/experiments/habitat/__init__.py
================================================
================================================
FILE: projects/objectnav_baselines/experiments/habitat/clip/__init__.py
================================================
================================================
FILE: projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from torch.distributions.utils import lazy_property
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.habitat_plugin.habitat_sensors import (
RGBSensorHabitat,
TargetObjectSensorHabitat,
)
from projects.objectnav_baselines.experiments.clip.mixins import (
ClipResNetPreprocessGRUActorCriticMixin,
)
from projects.objectnav_baselines.experiments.habitat.objectnav_habitat_base import (
ObjectNavHabitatBaseConfig,
)
from projects.objectnav_baselines.mixins import ObjectNavPPOMixin
class ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig(
ObjectNavHabitatBaseConfig
):
"""An Object Navigation experiment configuration in Habitat."""
CLIP_MODEL_TYPE = "RN50"
def __init__(self, lr: float, **kwargs):
super().__init__(**kwargs)
self.lr = lr
self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
clip_model_type=self.CLIP_MODEL_TYPE,
screen_size=self.SCREEN_SIZE,
goal_sensor_type=TargetObjectSensorHabitat,
)
@lazy_property
def SENSORS(self):
return [
RGBSensorHabitat(
height=ObjectNavHabitatBaseConfig.SCREEN_SIZE,
width=ObjectNavHabitatBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
),
TargetObjectSensorHabitat(len(self.DEFAULT_OBJECT_CATEGORIES_TO_IND)),
]
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
lr=self.lr,
auxiliary_uuids=self.auxiliary_uuids,
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n,
add_prev_actions=self.add_prev_actions,
auxiliary_uuids=self.auxiliary_uuids,
**kwargs,
)
def tag(self):
return (
f"{super(ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig, self).tag()}"
f"-RGB-ClipResNet50GRU-DDPPO-lr{self.lr}"
)
================================================
FILE: projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py
================================================
import torch
import torch.optim as optim
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
Builder,
TrainingPipeline,
PipelineStage,
TrainingSettings,
)
from projects.objectnav_baselines.experiments.habitat.clip.objectnav_habitat_rgb_clipresnet50gru_ddppo import (
ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig,
)
from projects.objectnav_baselines.mixins import update_with_auxiliary_losses
class ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig(
ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig
):
def __init__(self, lr=1e-4, **kwargs):
super().__init__(lr, **kwargs)
self.lr = lr
def training_pipeline(self, **kwargs) -> TrainingPipeline:
auxiliary_uuids = self.auxiliary_uuids
multiple_beliefs = False
normalize_advantage = False
advance_scene_rollout_period = self.ADVANCE_SCENE_ROLLOUT_PERIOD
log_interval_small = (
self.num_train_processes * 32 * 10 if torch.cuda.is_available() else 1
)
log_interval_med = (
self.num_train_processes * 64 * 5 if torch.cuda.is_available() else 1
)
log_interval_large = (
self.num_train_processes * 128 * 5 if torch.cuda.is_available() else 1
)
batch_steps_0 = int(10e6)
batch_steps_1 = int(10e6)
batch_steps_2 = int(1e9) - batch_steps_0 - batch_steps_1
lr = self.lr
num_mini_batch = 1
update_repeats = 4
save_interval = 5000000
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
named_losses = {
"ppo_loss": (PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0)
}
named_losses = update_with_auxiliary_losses(
named_losses=named_losses,
auxiliary_uuids=auxiliary_uuids,
multiple_beliefs=multiple_beliefs,
)
return TrainingPipeline(
save_interval=save_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
named_losses={key: val[0] for key, val in named_losses.items()},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=advance_scene_rollout_period,
pipeline_stages=[
PipelineStage(
loss_names=list(named_losses.keys()),
max_stage_steps=batch_steps_0,
training_settings=TrainingSettings(
num_steps=32, metric_accumulate_interval=log_interval_small
),
),
PipelineStage(
loss_names=list(named_losses.keys()),
max_stage_steps=batch_steps_1,
training_settings=TrainingSettings(
num_steps=64,
metric_accumulate_interval=log_interval_med,
),
),
PipelineStage(
loss_names=list(named_losses.keys()),
max_stage_steps=batch_steps_2,
training_settings=TrainingSettings(
num_steps=128,
metric_accumulate_interval=log_interval_large,
),
),
],
lr_scheduler_builder=None,
)
def tag(self):
return (
super(
ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig, self
)
.tag()
.replace("-DDPPO-lr", "-DDPPO-IncRollouts-lr")
)
================================================
FILE: projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
================================================
import glob
import math
import os
import warnings
from abc import ABC
from typing import Dict, Any, List, Optional, Sequence, Union, Tuple
import gym
import numpy as np
import torch
from torch.distributions.utils import lazy_property
# noinspection PyUnresolvedReferences
import habitat
from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import (
SensorPreprocessorGraph,
Preprocessor,
)
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins, Builder
from allenact.utils.system import get_logger
from allenact_plugins.habitat_plugin.habitat_constants import (
HABITAT_DATASETS_DIR,
HABITAT_CONFIGS_DIR,
HABITAT_SCENE_DATASETS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_task_samplers import ObjectNavTaskSampler
from allenact_plugins.habitat_plugin.habitat_tasks import ObjectNavTask
from allenact_plugins.habitat_plugin.habitat_utils import (
get_habitat_config,
construct_env_configs,
)
from projects.objectnav_baselines.experiments.objectnav_base import ObjectNavBaseConfig
def create_objectnav_config(
config_yaml_path: str,
mode: str,
scenes_path: str,
simulator_gpu_ids: Sequence[int],
rotation_degrees: float,
step_size: float,
max_steps: int,
num_processes: int,
camera_width: int,
camera_height: int,
using_rgb: bool,
using_depth: bool,
training: bool,
num_episode_sample: int,
horizontal_fov: Optional[int] = None,
) -> habitat.Config:
config = get_habitat_config(config_yaml_path)
config.defrost()
config.NUM_PROCESSES = num_processes
config.SIMULATOR_GPU_IDS = simulator_gpu_ids
config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR
config.DATASET.DATA_PATH = scenes_path
config.SIMULATOR.AGENT_0.SENSORS = []
if using_rgb:
config.SIMULATOR.AGENT_0.SENSORS.append("RGB_SENSOR")
if using_depth:
config.SIMULATOR.AGENT_0.SENSORS.append("DEPTH_SENSOR")
config.SIMULATOR.RGB_SENSOR.WIDTH = camera_width
config.SIMULATOR.RGB_SENSOR.HEIGHT = camera_height
config.SIMULATOR.DEPTH_SENSOR.WIDTH = camera_width
config.SIMULATOR.DEPTH_SENSOR.HEIGHT = camera_height
config.SIMULATOR.SEMANTIC_SENSOR.WIDTH = camera_width
config.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = camera_height
if horizontal_fov is not None:
config.SIMULATOR.RGB_SENSOR.HFOV = horizontal_fov
config.SIMULATOR.DEPTH_SENSOR.HFOV = horizontal_fov
config.SIMULATOR.SEMANTIC_SENSOR.HFOV = horizontal_fov
assert rotation_degrees == config.SIMULATOR.TURN_ANGLE
assert step_size == config.SIMULATOR.FORWARD_STEP_SIZE
assert max_steps == config.ENVIRONMENT.MAX_EPISODE_STEPS
config.SIMULATOR.MAX_EPISODE_STEPS = max_steps
assert config.TASK.TYPE == "ObjectNav-v1"
assert config.TASK.SUCCESS.SUCCESS_DISTANCE == 0.1
assert config.TASK.DISTANCE_TO_GOAL.DISTANCE_TO == "VIEW_POINTS"
config.TASK.SENSORS = ["OBJECTGOAL_SENSOR", "COMPASS_SENSOR", "GPS_SENSOR"]
config.TASK.GOAL_SENSOR_UUID = "objectgoal"
config.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL", "SOFT_SPL"]
if not training:
config.SEED = 0
config.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False
if num_episode_sample > 0:
config.ENVIRONMENT.ITERATOR_OPTIONS.NUM_EPISODE_SAMPLE = num_episode_sample
config.MODE = mode
config.freeze()
return config
class ObjectNavHabitatBaseConfig(ObjectNavBaseConfig, ABC):
"""The base config for all Habitat ObjectNav experiments."""
# selected auxiliary uuids
## if comment all the keys, then it's vanilla DD-PPO
_AUXILIARY_UUIDS = [
# InverseDynamicsLoss.UUID,
# TemporalDistanceLoss.UUID,
# CPCA1Loss.UUID,
# CPCA4Loss.UUID,
# CPCA8Loss.UUID,
# CPCA16Loss.UUID,
]
MULTIPLE_BELIEFS = False
BELIEF_FUSION = ( # choose one
None
# AttentiveFusion
# AverageFusion
# SoftmaxFusion
)
FAILED_END_REWARD = -1.0
ACTION_SPACE = gym.spaces.Discrete(len(ObjectNavTask.class_action_names()))
DEFAULT_NUM_TRAIN_PROCESSES = (
5 * torch.cuda.device_count() if torch.cuda.is_available() else 1
)
DEFAULT_NUM_TEST_PROCESSES = 11
DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))
DEFAULT_VALID_GPU_IDS = [torch.cuda.device_count() - 1]
DEFAULT_TEST_GPU_IDS = tuple(range(torch.cuda.device_count()))
def __init__(
self,
scene_dataset: str, # Should be "mp3d" or "hm3d"
debug: bool = False,
num_train_processes: Optional[int] = None,
num_test_processes: Optional[int] = None,
test_on_validation: bool = False,
run_valid: bool = True,
train_gpu_ids: Optional[Sequence[int]] = None,
val_gpu_ids: Optional[Sequence[int]] = None,
test_gpu_ids: Optional[Sequence[int]] = None,
add_prev_actions: bool = False,
look_constraints: Optional[Tuple[int, int]] = None,
**kwargs,
):
super().__init__(**kwargs)
self.scene_dataset = scene_dataset
self.debug = debug
assert look_constraints is None or all(
lc in [0, 1, 2, 3] for lc in look_constraints
), "Look constraints limit the number of times agents can look up/down when starting from the horizon line."
assert (
look_constraints is None or look_constraints[1] > 0
), "The agent must be allowed to look down from the horizon at least once."
self.look_constraints = look_constraints
def v_or_default(v, default):
return v if v is not None else default
self.num_train_processes = v_or_default(
num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES
)
self.num_test_processes = v_or_default(
num_test_processes, (10 if torch.cuda.is_available() else 1)
)
self.test_on_validation = test_on_validation
self.run_valid = run_valid
self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS)
self.val_gpu_ids = v_or_default(
val_gpu_ids, self.DEFAULT_VALID_GPU_IDS if run_valid else []
)
self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)
self.add_prev_actions = add_prev_actions
self.auxiliary_uuids = self._AUXILIARY_UUIDS
def _create_config(
self,
mode: str,
scenes_path: str,
num_processes: int,
simulator_gpu_ids: Sequence[int],
training: bool = True,
num_episode_sample: int = -1,
):
return create_objectnav_config(
config_yaml_path=self.BASE_CONFIG_YAML_PATH,
mode=mode,
scenes_path=scenes_path,
simulator_gpu_ids=simulator_gpu_ids,
rotation_degrees=self.ROTATION_DEGREES,
step_size=self.STEP_SIZE,
max_steps=self.MAX_STEPS,
num_processes=num_processes,
camera_width=self.CAMERA_WIDTH,
camera_height=self.CAMERA_HEIGHT,
horizontal_fov=self.HORIZONTAL_FIELD_OF_VIEW,
using_rgb=any(isinstance(s, RGBSensor) for s in self.SENSORS),
using_depth=any(isinstance(s, DepthSensor) for s in self.SENSORS),
training=training,
num_episode_sample=num_episode_sample,
)
@lazy_property
def DEFAULT_OBJECT_CATEGORIES_TO_IND(self):
if self.scene_dataset == "mp3d":
return {
"chair": 0,
"table": 1,
"picture": 2,
"cabinet": 3,
"cushion": 4,
"sofa": 5,
"bed": 6,
"chest_of_drawers": 7,
"plant": 8,
"sink": 9,
"toilet": 10,
"stool": 11,
"towel": 12,
"tv_monitor": 13,
"shower": 14,
"bathtub": 15,
"counter": 16,
"fireplace": 17,
"gym_equipment": 18,
"seating": 19,
"clothes": 20,
}
elif self.scene_dataset == "hm3d":
return {
"chair": 0,
"bed": 1,
"plant": 2,
"toilet": 3,
"tv_monitor": 4,
"sofa": 5,
}
else:
raise NotImplementedError
@lazy_property
def TASK_DATA_DIR_TEMPLATE(self):
return os.path.join(
HABITAT_DATASETS_DIR, f"objectnav/{self.scene_dataset}/v1/{{}}/{{}}.json.gz"
)
@lazy_property
def BASE_CONFIG_YAML_PATH(self):
return os.path.join(
HABITAT_CONFIGS_DIR, f"tasks/objectnav_{self.scene_dataset}.yaml"
)
@lazy_property
def TRAIN_CONFIG(self):
return self._create_config(
mode="train",
scenes_path=self.train_scenes_path(),
num_processes=self.num_train_processes,
simulator_gpu_ids=self.train_gpu_ids,
training=True,
)
@lazy_property
def VALID_CONFIG(self):
return self._create_config(
mode="validate",
scenes_path=self.valid_scenes_path(),
num_processes=1,
simulator_gpu_ids=self.val_gpu_ids,
training=False,
num_episode_sample=200,
)
@lazy_property
def TEST_CONFIG(self):
return self._create_config(
mode="validate",
scenes_path=self.test_scenes_path(),
num_processes=self.num_test_processes,
simulator_gpu_ids=self.test_gpu_ids,
training=False,
)
@lazy_property
def TRAIN_CONFIGS_PER_PROCESS(self):
configs = construct_env_configs(self.TRAIN_CONFIG, allow_scene_repeat=True)
if len(self.train_gpu_ids) >= 2:
scenes_dir = configs[0].DATASET.SCENES_DIR
memory_use_per_config = []
for config in configs:
assert (
len(config.DATASET.CONTENT_SCENES) == 1
), config.DATASET.CONTENT_SCENES
scene_name = config.DATASET.CONTENT_SCENES[0]
paths = glob.glob(
os.path.join(
scenes_dir, self.scene_dataset, "**", f"{scene_name}.*"
),
recursive=True,
)
if self.scene_dataset == "mp3d":
assert len(paths) == 4
else:
assert len(paths) == 2
memory_use_per_config.append(sum(os.path.getsize(p) for p in paths))
max_configs_per_device = math.ceil(len(configs) / len(self.train_gpu_ids))
mem_per_device = np.array([0.0 for _ in range(len(self.train_gpu_ids))])
configs_per_device = [[] for _ in range(len(mem_per_device))]
for mem, config in sorted(
list(zip(memory_use_per_config, configs)), key=lambda x: x[0]
):
ind = int(np.argmin(mem_per_device))
config.defrost()
config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = self.train_gpu_ids[ind]
config.freeze()
configs_per_device[ind].append(config)
mem_per_device[ind] += mem
if len(configs_per_device[ind]) >= max_configs_per_device:
mem_per_device[ind] = float("inf")
configs_per_device.sort(key=lambda x: len(x))
configs = sum(configs_per_device, [])
if self.debug:
warnings.warn(
"IN DEBUG MODE, WILL ONLY USE `1LXtFkjw3qL` SCENE IN MP3D OR `1S7LAXRdDqK` scene in HM3D!!!"
)
for config in configs:
config.defrost()
if self.scene_dataset == "mp3d":
config.DATASET.CONTENT_SCENES = ["1LXtFkjw3qL"]
elif self.scene_dataset == "hm3d":
config.DATASET.CONTENT_SCENES = ["1S7LAXRdDqK"]
else:
raise NotImplementedError
config.freeze()
return configs
@lazy_property
def TEST_CONFIG_PER_PROCESS(self):
return construct_env_configs(self.TEST_CONFIG, allow_scene_repeat=False)
def train_scenes_path(self):
return self.TASK_DATA_DIR_TEMPLATE.format(*(["train"] * 2))
def valid_scenes_path(self):
return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2))
def test_scenes_path(self):
get_logger().warning("Running tests on the validation set!")
return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2))
# return self.TASK_DATA_DIR_TEMPLATE.format(*(["test"] * 2))
def tag(self):
t = f"ObjectNav-Habitat-{self.scene_dataset.upper()}"
if self.add_prev_actions:
t = f"{t}-PrevActions"
if self.look_constraints is not None:
t = f"{t}-Look{','.join(map(str, self.look_constraints))}"
return t
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return tuple()
def machine_params(self, mode="train", **kwargs):
has_gpus = torch.cuda.is_available()
if not has_gpus:
gpu_ids = []
nprocesses = 1
elif mode == "train":
gpu_ids = self.train_gpu_ids
nprocesses = self.num_train_processes
elif mode == "valid":
gpu_ids = self.val_gpu_ids
nprocesses = 1 if self.run_valid else 0
elif mode == "test":
gpu_ids = self.test_gpu_ids
nprocesses = self.num_test_processes
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
if has_gpus:
nprocesses = evenly_distribute_count_into_bins(nprocesses, len(gpu_ids))
sensor_preprocessor_graph = (
SensorPreprocessorGraph(
source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
preprocessors=self.preprocessors(),
)
if mode == "train"
or (
(isinstance(nprocesses, int) and nprocesses > 0)
or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
)
else None
)
return MachineParams(
nprocesses=nprocesses,
devices=gpu_ids,
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
def make_sampler_fn(self, **kwargs) -> TaskSampler:
return ObjectNavTaskSampler(
task_kwargs={
"look_constraints": self.look_constraints,
},
**{"failed_end_reward": self.FAILED_END_REWARD, **kwargs}, # type: ignore
)
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind]
return {
"env_config": config,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": self.ACTION_SPACE,
}
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
if total_processes != 1:
raise NotImplementedError(
"In validation, `total_processes` must equal 1 for habitat tasks"
)
return {
"env_config": self.VALID_CONFIG,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(
len(ObjectNavTask.class_action_names())
),
}
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
config = self.TEST_CONFIG_PER_PROCESS[process_ind]
return {
"env_config": config,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(
len(ObjectNavTask.class_action_names())
),
}
================================================
FILE: projects/objectnav_baselines/experiments/ithor/__init__.py
================================================
================================================
FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_base.py
================================================
import os
from abc import ABC
import torch
from projects.objectnav_baselines.experiments.objectnav_thor_base import (
ObjectNavThorBaseConfig,
)
class ObjectNaviThorBaseConfig(ObjectNavThorBaseConfig, ABC):
"""The base config for all iTHOR ObjectNav experiments."""
THOR_COMMIT_ID = "9549791ce2e7f472063a10abb1fb7664159fec23"
AGENT_MODE = "default"
DEFAULT_NUM_TRAIN_PROCESSES = 40 if torch.cuda.is_available() else 1
TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train")
VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val")
TARGET_TYPES = tuple(
sorted(
[
"AlarmClock",
"Apple",
"Book",
"Bowl",
"Box",
"Candle",
"GarbageCan",
"HousePlant",
"Laptop",
"SoapBottle",
"Television",
"Toaster",
],
)
)
================================================
FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import (
ObjectNaviThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ResNetPreprocessGRUActorCriticMixin,
ObjectNavPPOMixin,
)
class ObjectNaviThorDepthPPOExperimentConfig(ObjectNaviThorBaseConfig):
"""An Object Navigation experiment configuration in iThor with Depth
input."""
SENSORS = (
DepthSensorThor(
height=ObjectNaviThorBaseConfig.SCREEN_SIZE,
width=ObjectNaviThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,
),
)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
resnet_type="RN18",
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n, **kwargs
)
def tag(self):
return "ObjectNav-iTHOR-Depth-ResNet18GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
GoalObjectTypeThorSensor,
RGBSensorThor,
)
from projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import (
ObjectNaviThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ResNetPreprocessGRUActorCriticMixin,
ObjectNavPPOMixin,
)
class ObjectNaviThorRGBPPOExperimentConfig(ObjectNaviThorBaseConfig):
"""An Object Navigation experiment configuration in iThor with RGB
input."""
SENSORS = [
RGBSensorThor(
height=ObjectNaviThorBaseConfig.SCREEN_SIZE,
width=ObjectNaviThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,
),
]
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
resnet_type="RN18",
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n, **kwargs
)
@classmethod
def tag(cls):
return "ObjectNav-iTHOR-RGB-ResNet18GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
RGBSensorThor,
GoalObjectTypeThorSensor,
)
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import (
ObjectNaviThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ResNetPreprocessGRUActorCriticMixin,
ObjectNavPPOMixin,
)
class ObjectNaviThorRGBDPPOExperimentConfig(ObjectNaviThorBaseConfig):
"""An Object Navigation experiment configuration in iTHOR with RGBD
input."""
SENSORS = [
RGBSensorThor(
height=ObjectNaviThorBaseConfig.SCREEN_SIZE,
width=ObjectNaviThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
DepthSensorThor(
height=ObjectNaviThorBaseConfig.SCREEN_SIZE,
width=ObjectNaviThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,
),
]
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
resnet_type="RN18",
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n, **kwargs
)
def tag(self):
return "ObjectNav-iTHOR-RGBD-ResNet18GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/experiments/objectnav_base.py
================================================
from abc import ABC
from typing import Optional, Sequence, Union
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import Sensor
from allenact.utils.experiment_utils import Builder
class ObjectNavBaseConfig(ExperimentConfig, ABC):
"""The base object navigation configuration file."""
STEP_SIZE = 0.25
ROTATION_DEGREES = 30.0
VISIBILITY_DISTANCE = 1.0
STOCHASTIC = True
HORIZONTAL_FIELD_OF_VIEW = 79
CAMERA_WIDTH = 400
CAMERA_HEIGHT = 300
SCREEN_SIZE = 224
MAX_STEPS = 500
ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
SENSORS: Sequence[Sensor] = []
def __init__(self):
self.REWARD_CONFIG = {
"step_penalty": -0.01,
"goal_success_reward": 10.0,
"failed_stop_reward": 0.0,
"shaping_weight": 1.0,
}
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return tuple()
================================================
FILE: projects/objectnav_baselines/experiments/objectnav_thor_base.py
================================================
import glob
import os
import platform
from abc import ABC
from math import ceil
from typing import Dict, Any, List, Optional, Sequence, Tuple, cast
import ai2thor
import ai2thor.build
import gym
import numpy as np
import torch
from packaging import version
from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_util import (
horizontal_to_vertical_fov,
get_open_x_displays,
)
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
ObjectNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask
from projects.objectnav_baselines.experiments.objectnav_base import ObjectNavBaseConfig
if (
ai2thor.__version__ not in ["0.0.1", None]
and not ai2thor.__version__.startswith("0+")
and version.parse(ai2thor.__version__) < version.parse("3.2.0")
):
raise ImportError(
"To run the AI2-THOR ObjectNav baseline experiments you must use"
" ai2thor version 3.2.0 or higher."
)
import ai2thor.platform
class ObjectNavThorBaseConfig(ObjectNavBaseConfig, ABC):
"""The base config for all AI2-THOR ObjectNav experiments."""
DEFAULT_NUM_TRAIN_PROCESSES: Optional[int] = None
DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))
DEFAULT_VALID_GPU_IDS = (torch.cuda.device_count() - 1,)
DEFAULT_TEST_GPU_IDS = (torch.cuda.device_count() - 1,)
TRAIN_DATASET_DIR: Optional[str] = None
VAL_DATASET_DIR: Optional[str] = None
TEST_DATASET_DIR: Optional[str] = None
AGENT_MODE = "default"
TARGET_TYPES: Optional[Sequence[str]] = None
THOR_COMMIT_ID: Optional[str] = None
DEFAULT_THOR_IS_HEADLESS: bool = False
ACTION_SPACE = gym.spaces.Discrete(len(ObjectNavTask.class_action_names()))
def __init__(
self,
num_train_processes: Optional[int] = None,
num_test_processes: Optional[int] = None,
test_on_validation: bool = False,
train_gpu_ids: Optional[Sequence[int]] = None,
val_gpu_ids: Optional[Sequence[int]] = None,
test_gpu_ids: Optional[Sequence[int]] = None,
randomize_train_materials: bool = False,
headless: bool = False,
):
super().__init__()
def v_or_default(v, default):
return v if v is not None else default
self.num_train_processes = v_or_default(
num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES
)
self.num_test_processes = v_or_default(
num_test_processes, (10 if torch.cuda.is_available() else 1)
)
self.test_on_validation = test_on_validation
self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS)
self.val_gpu_ids = v_or_default(val_gpu_ids, self.DEFAULT_VALID_GPU_IDS)
self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)
self.headless = v_or_default(headless, self.DEFAULT_THOR_IS_HEADLESS)
self.sampler_devices = self.train_gpu_ids
self.randomize_train_materials = randomize_train_materials
def env_args(self):
assert self.THOR_COMMIT_ID is not None
return dict(
width=self.CAMERA_WIDTH,
height=self.CAMERA_HEIGHT,
commit_id=(
self.THOR_COMMIT_ID if not self.headless else ai2thor.build.COMMIT_ID
),
stochastic=True,
continuousMode=True,
applyActionNoise=self.STOCHASTIC,
rotateStepDegrees=self.ROTATION_DEGREES,
visibilityDistance=self.VISIBILITY_DISTANCE,
gridSize=self.STEP_SIZE,
snapToGrid=False,
agentMode=self.AGENT_MODE,
fieldOfView=horizontal_to_vertical_fov(
horizontal_fov_in_degrees=self.HORIZONTAL_FIELD_OF_VIEW,
width=self.CAMERA_WIDTH,
height=self.CAMERA_HEIGHT,
),
include_private_scenes=False,
renderDepthImage=any(isinstance(s, DepthSensorThor) for s in self.SENSORS),
)
def machine_params(self, mode="train", **kwargs):
sampler_devices: Sequence[torch.device] = []
devices: Sequence[torch.device]
if mode == "train":
workers_per_device = 1
devices = (
[torch.device("cpu")]
if not torch.cuda.is_available()
else cast(Tuple, self.train_gpu_ids) * workers_per_device
)
nprocesses = evenly_distribute_count_into_bins(
self.num_train_processes, max(len(devices), 1)
)
sampler_devices = self.sampler_devices
elif mode == "valid":
nprocesses = 1
devices = (
[torch.device("cpu")]
if not torch.cuda.is_available()
else self.val_gpu_ids
)
elif mode == "test":
devices = (
[torch.device("cpu")]
if not torch.cuda.is_available()
else self.test_gpu_ids
)
nprocesses = evenly_distribute_count_into_bins(
self.num_test_processes, max(len(devices), 1)
)
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
sensors = [*self.SENSORS]
if mode != "train":
sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)]
sensor_preprocessor_graph = (
SensorPreprocessorGraph(
source_observation_spaces=SensorSuite(sensors).observation_spaces,
preprocessors=self.preprocessors(),
)
if mode == "train"
or (
(isinstance(nprocesses, int) and nprocesses > 0)
or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
)
else None
)
return MachineParams(
nprocesses=nprocesses,
devices=devices,
sampler_devices=(
sampler_devices if mode == "train" else devices
), # ignored with > 1 gpu_ids
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return ObjectNavDatasetTaskSampler(**kwargs)
@staticmethod
def _partition_inds(n: int, num_parts: int):
return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
np.int32
)
def _get_sampler_args_for_scene_split(
self,
scenes_dir: str,
process_ind: int,
total_processes: int,
devices: Optional[List[int]],
seeds: Optional[List[int]],
deterministic_cudnn: bool,
include_expert_sensor: bool = True,
allow_oversample: bool = False,
) -> Dict[str, Any]:
path = os.path.join(scenes_dir, "*.json.gz")
scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
if len(scenes) == 0:
raise RuntimeError(
(
"Could find no scene dataset information in directory {}."
" Are you sure you've downloaded them? "
" If not, see https://allenact.org/installation/download-datasets/ information"
" on how this can be done."
).format(scenes_dir)
)
oversample_warning = (
f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})."
" You can avoid this by setting a number of workers divisible by the number of scenes"
)
if total_processes > len(scenes): # oversample some scenes -> bias
if not allow_oversample:
raise RuntimeError(
f"Cannot have `total_processes > len(scenes)`"
f" ({total_processes} > {len(scenes)}) when `allow_oversample` is `False`."
)
if total_processes % len(scenes) != 0:
get_logger().warning(oversample_warning)
scenes = scenes * int(ceil(total_processes / len(scenes)))
scenes = scenes[: total_processes * (len(scenes) // total_processes)]
elif len(scenes) % total_processes != 0:
get_logger().warning(oversample_warning)
inds = self._partition_inds(len(scenes), total_processes)
if not self.headless:
x_display: Optional[str] = None
if platform.system() == "Linux":
x_displays = get_open_x_displays(throw_error_if_empty=True)
if len([d for d in devices if d != torch.device("cpu")]) > len(
x_displays
):
get_logger().warning(
f"More GPU devices found than X-displays (devices: `{x_displays}`, x_displays: `{x_displays}`)."
f" This is not necessarily a bad thing but may mean that you're not using GPU memory as"
f" efficiently as possible. Consider following the instructions here:"
f" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin"
f" describing how to start an X-display on every GPU."
)
x_display = x_displays[process_ind % len(x_displays)]
device_dict = dict(x_display=x_display)
else:
device_dict = dict(
gpu_device=devices[process_ind % len(devices)],
platform=ai2thor.platform.CloudRendering,
)
return {
"scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
"object_types": self.TARGET_TYPES,
"max_steps": self.MAX_STEPS,
"sensors": [
s
for s in self.SENSORS
if (include_expert_sensor or not isinstance(s, ExpertActionSensor))
],
"action_space": self.ACTION_SPACE,
"seed": seeds[process_ind] if seeds is not None else None,
"deterministic_cudnn": deterministic_cudnn,
"rewards_config": self.REWARD_CONFIG,
"env_args": {**self.env_args(), **device_dict},
}
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
scenes_dir=os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
process_ind=process_ind,
total_processes=total_processes,
devices=devices,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
allow_oversample=True,
)
res["scene_directory"] = self.TRAIN_DATASET_DIR
res["loop_dataset"] = True
res["allow_flipping"] = True
res["randomize_materials_in_training"] = self.randomize_train_materials
return res
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
scenes_dir=os.path.join(self.VAL_DATASET_DIR, "episodes"),
process_ind=process_ind,
total_processes=total_processes,
devices=devices,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
include_expert_sensor=False,
allow_oversample=False,
)
res["scene_directory"] = self.VAL_DATASET_DIR
res["loop_dataset"] = False
return res
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
if self.test_on_validation or self.TEST_DATASET_DIR is None:
if not self.test_on_validation:
get_logger().warning(
"`test_on_validation` is set to `True` and thus we will run evaluation on the validation set instead."
" Be careful as the saved metrics json and tensorboard files **will still be labeled as"
" 'test' rather than 'valid'**."
)
else:
get_logger().warning(
"No test dataset dir detected, running test on validation set instead."
" Be careful as the saved metrics json and tensorboard files *will still be labeled as"
" 'test' rather than 'valid'**."
)
return self.valid_task_sampler_args(
process_ind=process_ind,
total_processes=total_processes,
devices=devices,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
else:
res = self._get_sampler_args_for_scene_split(
scenes_dir=os.path.join(self.TEST_DATASET_DIR, "episodes"),
process_ind=process_ind,
total_processes=total_processes,
devices=devices,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
include_expert_sensor=False,
allow_oversample=False,
)
res["env_args"]["all_metadata_available"] = False
res["rewards_config"] = {**res["rewards_config"], "shaping_weight": 0}
res["scene_directory"] = self.TEST_DATASET_DIR
res["loop_dataset"] = False
return res
================================================
FILE: projects/objectnav_baselines/experiments/robothor/__init__.py
================================================
================================================
FILE: projects/objectnav_baselines/experiments/robothor/beta/README.md
================================================
# Beta experiments
This folder contains "beta" experiments, e.g. training experiments meant to be used
to test new features. These experiments may have bugs or not train well.
================================================
FILE: projects/objectnav_baselines/experiments/robothor/beta/__init__.py
================================================
================================================
FILE: projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.py
================================================
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.grouped_action_imitation import (
GroupedActionImitation,
)
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
Builder,
PipelineStage,
TrainingPipeline,
LinearDecay,
)
from allenact_plugins.ithor_plugin.ithor_sensors import (
RGBSensorThor,
GoalObjectTypeThorSensor,
)
from allenact_plugins.ithor_plugin.ithor_sensors import TakeEndActionThorNavSensor
from allenact_plugins.robothor_plugin import robothor_constants
from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import ResNetPreprocessGRUActorCriticMixin
class ObjectNavRoboThorResNet18GRURGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
"""An Object Navigation experiment configuration in RoboThor with RGB
input."""
SENSORS = ( # type:ignore
RGBSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
TakeEndActionThorNavSensor(
nactions=len(ObjectNavTask.class_action_names()), uuid="expert_group_action"
),
)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
resnet_type="RN18",
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
def preprocessors(self):
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs):
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n, **kwargs
)
def training_pipeline(self, **kwargs):
ppo_steps = int(300000000)
lr = 3e-4
num_mini_batch = 1
update_repeats = 4
num_steps = 128
save_interval = 5000000
log_interval = 10000
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
action_strs = ObjectNavTask.class_action_names()
non_end_action_inds_set = {
i for i, a in enumerate(action_strs) if a != robothor_constants.END
}
end_action_ind_set = {action_strs.index(robothor_constants.END)}
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={
"ppo_loss": PPO(**PPOConfig),
"grouped_action_imitation": GroupedActionImitation(
nactions=len(ObjectNavTask.class_action_names()),
action_groups=[non_end_action_inds_set, end_action_ind_set],
),
},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(
loss_names=["ppo_loss", "grouped_action_imitation"],
max_stage_steps=ppo_steps,
)
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
def tag(self):
return "ObjectNav-RoboTHOR-RGB-ResNet18GRU-DDPPOAndGBC"
================================================
FILE: projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py
================================================
from typing import Union, Optional, Any
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage
# noinspection PyUnresolvedReferences
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.embodiedai.storage.vdr_storage import (
DiscreteVisualDynamicsReplayStorage,
InverseDynamicsVDRLoss,
)
from allenact.utils.experiment_utils import Builder, TrainingSettings
from allenact.utils.experiment_utils import (
PipelineStage,
LinearDecay,
StageComponent,
)
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.ithor_plugin.ithor_sensors import (
RGBSensorThor,
GoalObjectTypeThorSensor,
)
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ObjectNavUnfrozenResNetWithGRUActorCriticMixin,
update_with_auxiliary_losses,
)
def compute_inv_dyn_action_logits(
model,
img0,
img1,
):
rgb_uuid = model.visual_encoder.rgb_uuid
img0_enc = model.visual_encoder({rgb_uuid: img0.unsqueeze(0)}).squeeze(0)
img1_enc = model.visual_encoder({rgb_uuid: img1.unsqueeze(0)}).squeeze(0)
return model.inv_dyn_mlp(torch.cat((img0_enc, img1_enc), dim=1))
class LastActionSuccessSensor(
Sensor[
Union[IThorEnvironment, RoboThorEnvironment],
Union[Task[IThorEnvironment], Task[RoboThorEnvironment]],
]
):
def __init__(self, uuid: str = "last_action_success", **kwargs: Any):
super().__init__(
uuid=uuid, observation_space=gym.spaces.MultiBinary(1), **kwargs
)
def get_observation(
self,
env: Union[IThorEnvironment, RoboThorEnvironment],
task: Optional[Task],
*args: Any,
**kwargs: Any
) -> Any:
return 1 * task.last_action_success
class VisibleObjectTypesSensor(
Sensor[
Union[IThorEnvironment, RoboThorEnvironment],
Union[Task[IThorEnvironment], Task[RoboThorEnvironment]],
]
):
def __init__(self, uuid: str = "visible_objects", **kwargs: Any):
super().__init__(
uuid=uuid,
observation_space=gym.spaces.Box(
low=0, high=1, shape=(len(ObjectNavRoboThorBaseConfig.TARGET_TYPES),)
),
**kwargs
)
self.type_to_index = {
tt: i for i, tt in enumerate(ObjectNavRoboThorBaseConfig.TARGET_TYPES)
}
def get_observation(
self,
env: Union[IThorEnvironment, RoboThorEnvironment],
task: Optional[Task],
*args: Any,
**kwargs: Any
) -> Any:
out = np.zeros((len(self.type_to_index),))
for o in env.controller.last_event.metadata["objects"]:
if o["visible"] and o["objectType"] in self.type_to_index:
out[self.type_to_index[o["objectType"]]] = 1.0
return out
class ObjectNavRoboThorVdrTmpRGBExperimentConfig(ObjectNavRoboThorBaseConfig):
SENSORS = [
RGBSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
LastActionSuccessSensor(),
VisibleObjectTypesSensor(),
]
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.model_creation_handler = ObjectNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="gnresnet18",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs):
# PPO
ppo_steps = int(300000000)
lr = 3e-4
num_mini_batch = 1
update_repeats = 4
num_steps = 128
save_interval = 5000000
log_interval = 10000 if torch.cuda.is_available() else 1
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
auxiliary_uuids = tuple()
multiple_beliefs = False
named_losses = {"ppo_loss": (PPO(**PPOConfig), 1.0)}
named_losses = update_with_auxiliary_losses(
named_losses=named_losses,
auxiliary_uuids=auxiliary_uuids,
multiple_beliefs=multiple_beliefs,
)
default_ts = TrainingSettings(
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
save_interval=save_interval,
metric_accumulate_interval=log_interval,
)
named_losses = {
**named_losses,
"inv_dyn_vdr": (
InverseDynamicsVDRLoss(
compute_action_logits_fn=compute_inv_dyn_action_logits,
img0_key="img0",
img1_key="img1",
action_key="action",
),
1.0,
),
}
sorted_loss_names = list(sorted(named_losses.keys()))
return TrainingPipeline(
training_settings=default_ts,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
named_losses={k: v[0] for k, v in named_losses.items()},
named_storages={
"onpolicy": RolloutBlockStorage(init_size=num_steps),
"discrete_vdr": DiscreteVisualDynamicsReplayStorage(
image_uuid="rgb_lowres",
action_success_uuid="last_action_success",
extra_targets=["visible_objects"],
nactions=6,
num_to_store_per_action=200 if torch.cuda.is_available() else 10,
max_to_save_per_episode=6,
target_batch_size=256 if torch.cuda.is_available() else 128,
),
},
pipeline_stages=[
PipelineStage(
loss_names=sorted_loss_names,
max_stage_steps=ppo_steps,
loss_weights=[
named_losses[loss_name][1] for loss_name in sorted_loss_names
],
stage_components=[
StageComponent(
uuid="onpolicy",
storage_uuid="onpolicy",
loss_names=[
ln for ln in sorted_loss_names if ln != "inv_dyn_vdr"
],
),
StageComponent(
uuid="vdr",
storage_uuid="discrete_vdr",
loss_names=["inv_dyn_vdr"],
training_settings=TrainingSettings(
num_mini_batch=1,
update_repeats=1,
),
),
],
)
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
def create_model(self, **kwargs) -> nn.Module:
model = self.model_creation_handler.create_model(**kwargs)
model.inv_dyn_mlp = nn.Sequential(
nn.Linear(1024, 256),
nn.ReLU(inplace=True),
nn.Linear(256, 6),
)
return model
def tag(self):
return "Objectnav-RoboTHOR-RGB-UnfrozenResNet18GRU-VDR"
================================================
FILE: projects/objectnav_baselines/experiments/robothor/clip/__init__.py
================================================
================================================
FILE: projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.ithor_plugin.ithor_sensors import (
GoalObjectTypeThorSensor,
RGBSensorThor,
)
from projects.objectnav_baselines.experiments.clip.mixins import (
ClipResNetPreprocessGRUActorCriticMixin,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import ObjectNavPPOMixin
class ObjectNavRoboThorClipRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
"""An Object Navigation experiment configuration in RoboThor with RGB
input."""
CLIP_MODEL_TYPE = "RN50"
SENSORS = [
RGBSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
]
def __init__(self, add_prev_actions: bool = False, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
clip_model_type=self.CLIP_MODEL_TYPE,
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
self.add_prev_actions = add_prev_actions
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n,
add_prev_actions=self.add_prev_actions,
**kwargs
)
@classmethod
def tag(cls):
return "ObjectNav-RoboTHOR-RGB-ClipResNet50GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.ithor_plugin.ithor_sensors import (
GoalObjectTypeThorSensor,
RGBSensorThor,
)
from projects.objectnav_baselines.experiments.clip.mixins import (
ClipResNetPreprocessGRUActorCriticMixin,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import ObjectNavPPOMixin
class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
"""An Object Navigation experiment configuration in RoboThor with RGB
input."""
CLIP_MODEL_TYPE = "RN50x16"
SENSORS = [
RGBSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
]
def __init__(self, add_prev_actions: bool = False, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
clip_model_type=self.CLIP_MODEL_TYPE,
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
self.add_prev_actions = add_prev_actions
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n,
add_prev_actions=self.add_prev_actions,
**kwargs
)
@classmethod
def tag(cls):
return "ObjectNav-RoboTHOR-RGB-ClipResNet50x16GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py
================================================
import os
from abc import ABC
from typing import Optional, List, Any, Dict
import torch
from allenact.utils.misc_utils import prepare_locals_for_super
from projects.objectnav_baselines.experiments.objectnav_thor_base import (
ObjectNavThorBaseConfig,
)
class ObjectNavRoboThorBaseConfig(ObjectNavThorBaseConfig, ABC):
"""The base config for all RoboTHOR ObjectNav experiments."""
THOR_COMMIT_ID = "bad5bc2b250615cb766ffb45d455c211329af17e"
THOR_COMMIT_ID_FOR_RAND_MATERIALS = "9549791ce2e7f472063a10abb1fb7664159fec23"
AGENT_MODE = "locobot"
DEFAULT_NUM_TRAIN_PROCESSES = 60 if torch.cuda.is_available() else 1
TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/train")
VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/val")
TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/test")
TARGET_TYPES = tuple(
sorted(
[
"AlarmClock",
"Apple",
"BaseballBat",
"BasketBall",
"Bowl",
"GarbageCan",
"HousePlant",
"Laptop",
"Mug",
"SprayBottle",
"Television",
"Vase",
]
)
)
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
kwargs = super(ObjectNavRoboThorBaseConfig, self).train_task_sampler_args(
**prepare_locals_for_super(locals())
)
if self.randomize_train_materials:
kwargs["env_args"]["commit_id"] = self.THOR_COMMIT_ID_FOR_RAND_MATERIALS
return kwargs
================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_depth_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ResNetPreprocessGRUActorCriticMixin,
ObjectNavPPOMixin,
)
class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
"""An Object Navigation experiment configuration in RoboThor with Depth
input."""
SENSORS = (
DepthSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
resnet_type="RN18",
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n, **kwargs
)
def tag(self):
return "ObjectNav-RoboTHOR-Depth-ResNet18GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_dagger.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import ExpertActionSensor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
GoalObjectTypeThorSensor,
RGBSensorThor,
)
from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ResNetPreprocessGRUActorCriticMixin,
ObjectNavDAggerMixin,
)
class ObjectNavRoboThorRGBDAggerExperimentConfig(ObjectNavRoboThorBaseConfig):
"""An Object Navigation experiment configuration in RoboThor with RGB
input."""
SENSORS = [
RGBSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
ExpertActionSensor(
nactions=len(ObjectNavTask.class_action_names()),
),
]
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.REWARD_CONFIG["shaping"] = 0
self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
resnet_type="RN18",
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavDAggerMixin.training_pipeline(
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n, **kwargs
)
@classmethod
def tag(cls):
return "ObjectNav-RoboTHOR-RGB-ResNet18GRU-DAgger"
================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
GoalObjectTypeThorSensor,
RGBSensorThor,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ResNetPreprocessGRUActorCriticMixin,
ObjectNavPPOMixin,
)
class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
"""An Object Navigation experiment configuration in RoboThor with RGB
input."""
SENSORS = [
RGBSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
]
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
resnet_type="RN18",
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n, **kwargs
)
@classmethod
def tag(cls):
return "ObjectNav-RoboTHOR-RGB-ResNet18GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet50gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
GoalObjectTypeThorSensor,
RGBSensorThor,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ResNetPreprocessGRUActorCriticMixin,
ObjectNavPPOMixin,
)
class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
"""An Object Navigation experiment configuration in RoboThor with RGB
input."""
SENSORS = [
RGBSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
]
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
resnet_type="RN50",
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n, **kwargs
)
def tag(self):
return "ObjectNav-RoboTHOR-RGB-ResNet50GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet18gru_ddppo.py
================================================
import torch.nn as nn
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
RGBSensorThor,
GoalObjectTypeThorSensor,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ObjectNavUnfrozenResNetWithGRUActorCriticMixin,
ObjectNavPPOMixin,
)
class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
"""An Object Navigation experiment configuration in RoboThor with RGB input
without preprocessing by frozen ResNet (instead, a trainable ResNet)."""
SENSORS = [
RGBSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
]
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.model_creation_handler = ObjectNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="gnresnet18",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs) -> nn.Module:
return self.model_creation_handler.create_model(**kwargs)
def tag(self):
return "ObjectNav-RoboTHOR-RGB-UnfrozenResNet18GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
RGBSensorThor,
GoalObjectTypeThorSensor,
)
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
ResNetPreprocessGRUActorCriticMixin,
ObjectNavPPOMixin,
)
class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
"""An Object Navigation experiment configuration in RoboThor with RGBD
input."""
SENSORS = [
RGBSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
DepthSensorThor(
height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
GoalObjectTypeThorSensor(
object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
),
]
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
resnet_type="RN18",
screen_size=self.SCREEN_SIZE,
goal_sensor_type=GoalObjectTypeThorSensor,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return ObjectNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n, **kwargs
)
def tag(self):
return "ObjectNav-RoboTHOR-RGBD-ResNet18GRU-DDPPO"
================================================
FILE: projects/objectnav_baselines/mixins.py
================================================
from typing import Sequence, Union, Optional, Dict, Tuple, Type
import attr
import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import Sensor
from allenact.embodiedai.aux_losses.losses import (
InverseDynamicsLoss,
TemporalDistanceLoss,
CPCA1Loss,
CPCA2Loss,
CPCA4Loss,
CPCA8Loss,
CPCA16Loss,
MultiAuxTaskNegEntropyLoss,
CPCA1SoftMaxLoss,
CPCA2SoftMaxLoss,
CPCA4SoftMaxLoss,
CPCA8SoftMaxLoss,
CPCA16SoftMaxLoss,
)
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import (
Builder,
TrainingPipeline,
PipelineStage,
LinearDecay,
)
from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
from allenact_plugins.navigation_plugin.objectnav.models import (
ResnetTensorNavActorCritic,
ObjectNavActorCritic,
)
from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask
@attr.s(kw_only=True)
class ResNetPreprocessGRUActorCriticMixin:
sensors: Sequence[Sensor] = attr.ib()
resnet_type: str = attr.ib()
screen_size: int = attr.ib()
goal_sensor_type: Type[Sensor] = attr.ib()
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
preprocessors = []
if self.resnet_type in ["RN18", "RN34"]:
output_shape = (512, 7, 7)
elif self.resnet_type in ["RN50", "RN101", "RN152"]:
output_shape = (2048, 7, 7)
else:
raise NotImplementedError(
f"`RESNET_TYPE` must be one 'RNx' with x equaling one of"
f" 18, 34, 50, 101, or 152."
)
rgb_sensor = next((s for s in self.sensors if isinstance(s, RGBSensor)), None)
if rgb_sensor is not None:
preprocessors.append(
ResNetPreprocessor(
input_height=self.screen_size,
input_width=self.screen_size,
output_width=output_shape[2],
output_height=output_shape[1],
output_dims=output_shape[0],
pool=False,
torchvision_resnet_model=getattr(
models, f"resnet{self.resnet_type.replace('RN', '')}"
),
input_uuids=[rgb_sensor.uuid],
output_uuid="rgb_resnet_imagenet",
)
)
depth_sensor = next(
(s for s in self.sensors if isinstance(s, DepthSensor)), None
)
if depth_sensor is not None:
preprocessors.append(
ResNetPreprocessor(
input_height=self.screen_size,
input_width=self.screen_size,
output_width=output_shape[2],
output_height=output_shape[1],
output_dims=output_shape[0],
pool=False,
torchvision_resnet_model=getattr(
models, f"resnet{self.resnet_type.replace('RN', '')}"
),
input_uuids=[depth_sensor.uuid],
output_uuid="depth_resnet_imagenet",
)
)
return preprocessors
def create_model(self, **kwargs) -> nn.Module:
has_rgb = any(isinstance(s, RGBSensor) for s in self.sensors)
has_depth = any(isinstance(s, DepthSensor) for s in self.sensors)
goal_sensor_uuid = next(
(s.uuid for s in self.sensors if isinstance(s, self.goal_sensor_type)),
None,
)
return ResnetTensorNavActorCritic(
action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
goal_sensor_uuid=goal_sensor_uuid,
rgb_resnet_preprocessor_uuid="rgb_resnet_imagenet" if has_rgb else None,
depth_resnet_preprocessor_uuid=(
"depth_resnet_imagenet" if has_depth else None
),
hidden_size=512,
goal_dims=32,
)
@attr.s(kw_only=True)
class ObjectNavUnfrozenResNetWithGRUActorCriticMixin:
backbone: str = attr.ib()
sensors: Sequence[Sensor] = attr.ib()
auxiliary_uuids: Sequence[str] = attr.ib()
add_prev_actions: bool = attr.ib()
multiple_beliefs: bool = attr.ib()
belief_fusion: Optional[str] = attr.ib()
def create_model(self, **kwargs) -> nn.Module:
rgb_uuid = next(
(s.uuid for s in self.sensors if isinstance(s, RGBSensor)), None
)
depth_uuid = next(
(s.uuid for s in self.sensors if isinstance(s, DepthSensor)), None
)
goal_sensor_uuid = next(
(s.uuid for s in self.sensors if isinstance(s, GoalObjectTypeThorSensor))
)
return ObjectNavActorCritic(
action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
rgb_uuid=rgb_uuid,
depth_uuid=depth_uuid,
goal_sensor_uuid=goal_sensor_uuid,
hidden_size=(
192 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512
),
backbone=self.backbone,
resnet_baseplanes=32,
object_type_embedding_dim=32,
num_rnn_layers=1,
rnn_type="GRU",
add_prev_actions=self.add_prev_actions,
action_embed_size=6,
auxiliary_uuids=self.auxiliary_uuids,
multiple_beliefs=self.multiple_beliefs,
beliefs_fusion=self.belief_fusion,
)
class ObjectNavDAggerMixin:
@staticmethod
def training_pipeline(
advance_scene_rollout_period: Optional[int] = None,
) -> TrainingPipeline:
training_steps = int(300000000)
tf_steps = int(5e6)
anneal_steps = int(5e6)
il_no_tf_steps = training_steps - tf_steps - anneal_steps
assert il_no_tf_steps > 0
lr = 3e-4
num_mini_batch = 1
update_repeats = 4
num_steps = 128
save_interval = 5000000
log_interval = 10000 if torch.cuda.is_available() else 1
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={
"imitation_loss": Imitation(),
},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=advance_scene_rollout_period,
pipeline_stages=[
PipelineStage(
loss_names=["imitation_loss"],
max_stage_steps=tf_steps,
teacher_forcing=LinearDecay(
startp=1.0,
endp=1.0,
steps=tf_steps,
),
),
PipelineStage(
loss_names=["imitation_loss"],
max_stage_steps=anneal_steps + il_no_tf_steps,
teacher_forcing=LinearDecay(
startp=1.0,
endp=0.0,
steps=anneal_steps,
),
),
],
lr_scheduler_builder=Builder(
LambdaLR,
{"lr_lambda": LinearDecay(steps=training_steps)},
),
)
def update_with_auxiliary_losses(
named_losses: Dict[str, Tuple[AbstractActorCriticLoss, float]],
auxiliary_uuids: Sequence[str],
multiple_beliefs: bool,
) -> Dict[str, Tuple[AbstractActorCriticLoss, float]]:
# auxliary losses
aux_loss_total_weight = 2.0
# Total losses
total_aux_losses: Dict[str, Tuple[AbstractActorCriticLoss, float]] = {
InverseDynamicsLoss.UUID: (
InverseDynamicsLoss(
subsample_rate=0.2,
subsample_min_num=10, # TODO: test its effects
),
0.05 * aux_loss_total_weight, # should times 2
),
TemporalDistanceLoss.UUID: (
TemporalDistanceLoss(
num_pairs=8,
epsiode_len_min=5, # TODO: test its effects
),
0.2 * aux_loss_total_weight, # should times 2
),
CPCA1Loss.UUID: (
CPCA1Loss(
subsample_rate=0.2,
), # TODO: test its effects
0.05 * aux_loss_total_weight, # should times 2
),
CPCA2Loss.UUID: (
CPCA2Loss(
subsample_rate=0.2,
), # TODO: test its effects
0.05 * aux_loss_total_weight, # should times 2
),
CPCA4Loss.UUID: (
CPCA4Loss(
subsample_rate=0.2,
), # TODO: test its effects
0.05 * aux_loss_total_weight, # should times 2
),
CPCA8Loss.UUID: (
CPCA8Loss(
subsample_rate=0.2,
), # TODO: test its effects
0.05 * aux_loss_total_weight, # should times 2
),
CPCA16Loss.UUID: (
CPCA16Loss(
subsample_rate=0.2,
), # TODO: test its effects
0.05 * aux_loss_total_weight, # should times 2
),
CPCA1SoftMaxLoss.UUID: (
CPCA1SoftMaxLoss(
subsample_rate=1.0,
),
0.05 * aux_loss_total_weight, # should times 2
),
CPCA2SoftMaxLoss.UUID: (
CPCA2SoftMaxLoss(
subsample_rate=1.0,
),
0.05 * aux_loss_total_weight, # should times 2
),
CPCA4SoftMaxLoss.UUID: (
CPCA4SoftMaxLoss(
subsample_rate=1.0,
),
0.05 * aux_loss_total_weight, # should times 2
),
CPCA8SoftMaxLoss.UUID: (
CPCA8SoftMaxLoss(
subsample_rate=1.0,
),
0.05 * aux_loss_total_weight, # should times 2
),
CPCA16SoftMaxLoss.UUID: (
CPCA16SoftMaxLoss(
subsample_rate=1.0,
),
0.05 * aux_loss_total_weight, # should times 2
),
}
named_losses.update({uuid: total_aux_losses[uuid] for uuid in auxiliary_uuids})
if multiple_beliefs: # add weight entropy loss automatically
named_losses[MultiAuxTaskNegEntropyLoss.UUID] = (
MultiAuxTaskNegEntropyLoss(auxiliary_uuids),
0.01,
)
return named_losses
class ObjectNavPPOMixin:
@staticmethod
def training_pipeline(
auxiliary_uuids: Sequence[str],
multiple_beliefs: bool,
normalize_advantage: bool = True,
advance_scene_rollout_period: Optional[int] = None,
lr=3e-4,
num_mini_batch=1,
update_repeats=4,
num_steps=128,
save_interval=5000000,
log_interval=10000 if torch.cuda.is_available() else 1,
gamma=0.99,
use_gae=True,
gae_lambda=0.95,
max_grad_norm=0.5,
anneal_lr: bool = True,
extra_losses: Optional[Dict[str, Tuple[AbstractActorCriticLoss, float]]] = None,
) -> TrainingPipeline:
ppo_steps = int(300000000)
named_losses = {
"ppo_loss": (
PPO(**PPOConfig, normalize_advantage=normalize_advantage),
1.0,
),
**({} if extra_losses is None else extra_losses),
}
named_losses = update_with_auxiliary_losses(
named_losses=named_losses,
auxiliary_uuids=auxiliary_uuids,
multiple_beliefs=multiple_beliefs,
)
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={key: val[0] for key, val in named_losses.items()},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=advance_scene_rollout_period,
pipeline_stages=[
PipelineStage(
loss_names=list(named_losses.keys()),
max_stage_steps=ppo_steps,
loss_weights=[val[1] for val in named_losses.values()],
)
],
lr_scheduler_builder=(
Builder(LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)})
if anneal_lr
else None
),
)
================================================
FILE: projects/pointnav_baselines/README.md
================================================
# Baseline models for the Point Navigation task in the Habitat, RoboTHOR and iTHOR environments
This project contains the code for training baseline models on the PointNav task. In this setting the agent
spawns at a location in an environment and is tasked to move to another location. The agent is given a "compass"
that tells it the distance and bearing to the target position at every frame. Once the agent is confident that
it has reached the end it executes the `END` action which terminates the episode. If the agent is within a set
distance to the target (in our case 0.2 meters) the agent succeeded, else it failed.
Provided are experiment configs for training a simple convolutional model with
an GRU using `RGB`, `Depth` or `RGBD` as inputs in [Habitat](https://github.com/facebookresearch/habitat-sim),
[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).
The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm.
To train an experiment run the following command from the `allenact` root directory:
```bash
python main.py -o -c -b
```
Where `` is the path of the directory where we want the model weights
and logs to be stored, `` is the directory where our
experiment file is located and `` is the name of the python module containing
the experiment. An example usage of this command would be:
```bash
python main.py -o storage/pointnav-robothor-depth -b projects/pointnav_baselines/experiments/robothor/ pointnav_robothor_depth_simpleconvgru_ddppo
```
This trains a simple convolutional neural network with a GRU using Depth input on the
PointNav task in the RoboTHOR environment and stores the model weights and logs
to `storage/pointnav-robothor-rgb`.
================================================
FILE: projects/pointnav_baselines/__init__.py
================================================
================================================
FILE: projects/pointnav_baselines/experiments/__init__.py
================================================
================================================
FILE: projects/pointnav_baselines/experiments/habitat/__init__.py
================================================
================================================
FILE: projects/pointnav_baselines/experiments/habitat/clip/__init__.py
================================================
================================================
FILE: projects/pointnav_baselines/experiments/habitat/clip/pointnav_habitat_rgb_clipresnet50gru_ddppo.py
================================================
from typing import Sequence, Union
import torch.nn as nn
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.habitat_plugin.habitat_sensors import (
RGBSensorHabitat,
TargetCoordinatesSensorHabitat,
)
from projects.objectnav_baselines.experiments.clip.mixins import (
ClipResNetPreprocessGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (
PointNavHabitatBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
class PointNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig(PointNavHabitatBaseConfig):
"""An Point Navigation experiment configuration in Habitat with Depth
input."""
CLIP_MODEL_TYPE = "RN50"
SENSORS = [
RGBSensorHabitat(
height=PointNavHabitatBaseConfig.SCREEN_SIZE,
width=PointNavHabitatBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
),
TargetCoordinatesSensorHabitat(coordinate_dims=2),
]
def __init__(self, add_prev_actions: bool = False, **kwargs):
super().__init__(**kwargs)
self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
sensors=self.SENSORS,
clip_model_type=self.CLIP_MODEL_TYPE,
screen_size=self.SCREEN_SIZE,
goal_sensor_type=TargetCoordinatesSensorHabitat,
)
self.add_prev_actions = add_prev_actions
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=False,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return self.preprocessing_and_model.preprocessors()
def create_model(self, **kwargs) -> nn.Module:
return self.preprocessing_and_model.create_model(
num_actions=self.ACTION_SPACE.n,
add_prev_actions=self.add_prev_actions,
**kwargs,
)
@classmethod
def tag(cls):
return "PointNav-Habitat-RGB-ClipResNet50GRU-DDPPO"
================================================
FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_base.py
================================================
import os
from abc import ABC
from typing import Dict, Any, List, Optional, Sequence, Union
import gym
import torch
# noinspection PyUnresolvedReferences
import habitat
from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import (
SensorPreprocessorGraph,
Preprocessor,
)
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins, Builder
from allenact.utils.system import get_logger
from allenact_plugins.habitat_plugin.habitat_constants import (
HABITAT_DATASETS_DIR,
HABITAT_CONFIGS_DIR,
HABITAT_SCENE_DATASETS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler
from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask
from allenact_plugins.habitat_plugin.habitat_utils import (
get_habitat_config,
construct_env_configs,
)
from projects.pointnav_baselines.experiments.pointnav_base import PointNavBaseConfig
def create_pointnav_config(
config_yaml_path: str,
mode: str,
scenes_path: str,
simulator_gpu_ids: Sequence[int],
distance_to_goal: float,
rotation_degrees: float,
step_size: float,
max_steps: int,
num_processes: int,
camera_width: int,
camera_height: int,
using_rgb: bool,
using_depth: bool,
training: bool,
num_episode_sample: int,
) -> habitat.Config:
config = get_habitat_config(config_yaml_path)
config.defrost()
config.NUM_PROCESSES = num_processes
config.SIMULATOR_GPU_IDS = simulator_gpu_ids
config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR
config.DATASET.DATA_PATH = scenes_path
config.SIMULATOR.AGENT_0.SENSORS = []
if using_rgb:
config.SIMULATOR.AGENT_0.SENSORS.append("RGB_SENSOR")
if using_depth:
config.SIMULATOR.AGENT_0.SENSORS.append("DEPTH_SENSOR")
config.SIMULATOR.RGB_SENSOR.WIDTH = camera_width
config.SIMULATOR.RGB_SENSOR.HEIGHT = camera_height
config.SIMULATOR.DEPTH_SENSOR.WIDTH = camera_width
config.SIMULATOR.DEPTH_SENSOR.HEIGHT = camera_height
config.SIMULATOR.TURN_ANGLE = rotation_degrees
config.SIMULATOR.FORWARD_STEP_SIZE = step_size
config.ENVIRONMENT.MAX_EPISODE_STEPS = max_steps
config.TASK.TYPE = "Nav-v0"
config.TASK.SUCCESS_DISTANCE = distance_to_goal
config.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
config.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR"
config.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2
config.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass"
config.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"]
config.TASK.SPL.TYPE = "SPL"
config.TASK.SPL.SUCCESS_DISTANCE = distance_to_goal
config.TASK.SUCCESS.SUCCESS_DISTANCE = distance_to_goal
if not training:
config.SEED = 0
config.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False
if num_episode_sample > 0:
config.ENVIRONMENT.ITERATOR_OPTIONS.NUM_EPISODE_SAMPLE = num_episode_sample
config.MODE = mode
config.freeze()
return config
class PointNavHabitatBaseConfig(PointNavBaseConfig, ABC):
"""The base config for all Habitat PointNav experiments."""
# selected auxiliary uuids
## if comment all the keys, then it's vanilla DD-PPO
AUXILIARY_UUIDS = [
# InverseDynamicsLoss.UUID,
# TemporalDistanceLoss.UUID,
# CPCA1Loss.UUID,
# CPCA4Loss.UUID,
# CPCA8Loss.UUID,
# CPCA16Loss.UUID,
]
ADD_PREV_ACTIONS = False
MULTIPLE_BELIEFS = False
BELIEF_FUSION = ( # choose one
None
# AttentiveFusion
# AverageFusion
# SoftmaxFusion
)
FAILED_END_REWARD = -1.0
TASK_DATA_DIR_TEMPLATE = os.path.join(
HABITAT_DATASETS_DIR, "pointnav/gibson/v1/{}/{}.json.gz"
)
BASE_CONFIG_YAML_PATH = os.path.join(
HABITAT_CONFIGS_DIR, "tasks/pointnav_gibson.yaml"
)
ACTION_SPACE = gym.spaces.Discrete(len(PointNavTask.class_action_names()))
DEFAULT_NUM_TRAIN_PROCESSES = (
5 * torch.cuda.device_count() if torch.cuda.is_available() else 1
)
DEFAULT_NUM_TEST_PROCESSES = 10
DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))
DEFAULT_VALID_GPU_IDS = [torch.cuda.device_count() - 1]
DEFAULT_TEST_GPU_IDS = [torch.cuda.device_count() - 1]
def __init__(
self,
debug: bool = False,
num_train_processes: Optional[int] = None,
num_test_processes: Optional[int] = None,
test_on_validation: bool = False,
run_valid: bool = True,
train_gpu_ids: Optional[Sequence[int]] = None,
val_gpu_ids: Optional[Sequence[int]] = None,
test_gpu_ids: Optional[Sequence[int]] = None,
**kwargs,
):
super().__init__(**kwargs)
def v_or_default(v, default):
return v if v is not None else default
self.num_train_processes = v_or_default(
num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES
)
self.num_test_processes = v_or_default(
num_test_processes, (10 if torch.cuda.is_available() else 1)
)
self.test_on_validation = test_on_validation
self.run_valid = run_valid
self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS)
self.val_gpu_ids = v_or_default(
val_gpu_ids, self.DEFAULT_VALID_GPU_IDS if run_valid else []
)
self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)
def create_config(
mode: str,
scenes_path: str,
num_processes: int,
simulator_gpu_ids: Sequence[int],
training: bool = True,
num_episode_sample: int = -1,
):
return create_pointnav_config(
config_yaml_path=self.BASE_CONFIG_YAML_PATH,
mode=mode,
scenes_path=scenes_path,
simulator_gpu_ids=simulator_gpu_ids,
distance_to_goal=self.DISTANCE_TO_GOAL,
rotation_degrees=self.ROTATION_DEGREES,
step_size=self.STEP_SIZE,
max_steps=self.MAX_STEPS,
num_processes=num_processes,
camera_width=self.CAMERA_WIDTH,
camera_height=self.CAMERA_HEIGHT,
using_rgb=any(isinstance(s, RGBSensor) for s in self.SENSORS),
using_depth=any(isinstance(s, DepthSensor) for s in self.SENSORS),
training=training,
num_episode_sample=num_episode_sample,
)
self.TRAIN_CONFIG = create_config(
mode="train",
scenes_path=self.train_scenes_path(),
num_processes=self.num_train_processes,
simulator_gpu_ids=self.train_gpu_ids,
training=True,
)
self.VALID_CONFIG = create_config(
mode="validate",
scenes_path=self.valid_scenes_path(),
num_processes=1,
simulator_gpu_ids=self.val_gpu_ids,
training=False,
num_episode_sample=200,
)
self.TEST_CONFIG = create_config(
mode="validate",
scenes_path=self.test_scenes_path(),
num_processes=self.num_test_processes,
simulator_gpu_ids=self.test_gpu_ids,
training=False,
)
self.TRAIN_CONFIGS_PER_PROCESS = construct_env_configs(
self.TRAIN_CONFIG, allow_scene_repeat=True
)
if debug:
get_logger().warning("IN DEBUG MODE, WILL ONLY USE `Adrian` SCENE!!!")
for config in self.TRAIN_CONFIGS_PER_PROCESS:
config.defrost()
config.DATASET.CONTENT_SCENES = ["Adrian"]
config.freeze()
self.TEST_CONFIG_PER_PROCESS = construct_env_configs(
self.TEST_CONFIG, allow_scene_repeat=False
)
def train_scenes_path(self):
return self.TASK_DATA_DIR_TEMPLATE.format(*(["train"] * 2))
def valid_scenes_path(self):
return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2))
def test_scenes_path(self):
get_logger().warning("Running tests on the validation set!")
return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2))
# return self.TASK_DATA_DIR_TEMPLATE.format(*(["test"] * 2))
@classmethod
def tag(cls):
return "PointNav"
def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
return tuple()
def machine_params(self, mode="train", **kwargs):
has_gpus = torch.cuda.is_available()
if not has_gpus:
gpu_ids = []
nprocesses = 1
elif mode == "train":
gpu_ids = self.train_gpu_ids
nprocesses = self.num_train_processes
elif mode == "valid":
gpu_ids = self.val_gpu_ids
nprocesses = 1 if self.run_valid else 0
elif mode == "test":
gpu_ids = self.test_gpu_ids
nprocesses = self.num_test_processes
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
if has_gpus:
nprocesses = evenly_distribute_count_into_bins(nprocesses, len(gpu_ids))
sensor_preprocessor_graph = (
SensorPreprocessorGraph(
source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
preprocessors=self.preprocessors(),
)
if mode == "train"
or (
(isinstance(nprocesses, int) and nprocesses > 0)
or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
)
else None
)
return MachineParams(
nprocesses=nprocesses,
devices=gpu_ids,
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return PointNavTaskSampler(
**{"failed_end_reward": cls.FAILED_END_REWARD, **kwargs} # type: ignore
)
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind]
return {
"env_config": config,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": self.ACTION_SPACE,
"distance_to_goal": self.DISTANCE_TO_GOAL,
}
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
if total_processes != 1:
raise NotImplementedError(
"In validation, `total_processes` must equal 1 for habitat tasks"
)
return {
"env_config": self.VALID_CONFIG,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
"distance_to_goal": self.DISTANCE_TO_GOAL,
}
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
config = self.TEST_CONFIG_PER_PROCESS[process_ind]
return {
"env_config": config,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
"distance_to_goal": self.DISTANCE_TO_GOAL,
}
================================================
FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.habitat_plugin.habitat_sensors import (
DepthSensorHabitat,
TargetCoordinatesSensorHabitat,
)
from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (
PointNavHabitatBaseConfig,
)
from projects.pointnav_baselines.mixins import (
PointNavPPOMixin,
PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
class PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig(
PointNavHabitatBaseConfig,
):
"""An Point Navigation experiment configuration in Habitat with Depth
input."""
SENSORS = [
DepthSensorHabitat(
height=PointNavHabitatBaseConfig.SCREEN_SIZE,
width=PointNavHabitatBaseConfig.SCREEN_SIZE,
use_normalization=True,
),
TargetCoordinatesSensorHabitat(coordinate_dims=2),
]
def __init__(self):
super().__init__()
self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="simple_cnn",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=True,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs):
return self.model_creation_handler.create_model(**kwargs)
def tag(self):
return "PointNav-Habitat-Depth-SimpleConv-DDPPO"
================================================
FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.habitat_plugin.habitat_sensors import RGBSensorHabitat
from allenact_plugins.habitat_plugin.habitat_sensors import (
TargetCoordinatesSensorHabitat,
)
from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (
PointNavHabitatBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
from projects.pointnav_baselines.mixins import (
PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
class PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig(
PointNavHabitatBaseConfig
):
"""An Point Navigation experiment configuration in Habitat with Depth
input."""
SENSORS = [
RGBSensorHabitat(
height=PointNavHabitatBaseConfig.SCREEN_SIZE,
width=PointNavHabitatBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
),
TargetCoordinatesSensorHabitat(coordinate_dims=2),
]
def __init__(self):
super().__init__()
self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="simple_cnn",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=True,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs):
return self.model_creation_handler.create_model(**kwargs)
@classmethod
def tag(cls):
return "PointNav-Habitat-RGB-SimpleConv-DDPPO"
================================================
FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.habitat_plugin.habitat_sensors import DepthSensorHabitat
from allenact_plugins.habitat_plugin.habitat_sensors import RGBSensorHabitat
from allenact_plugins.habitat_plugin.habitat_sensors import (
TargetCoordinatesSensorHabitat,
)
from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (
PointNavHabitatBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
from projects.pointnav_baselines.mixins import (
PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
class PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig(
PointNavHabitatBaseConfig
):
"""An Point Navigation experiment configuration in Habitat with RGBD
input."""
SENSORS = [
RGBSensorHabitat(
height=PointNavHabitatBaseConfig.SCREEN_SIZE,
width=PointNavHabitatBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
),
DepthSensorHabitat(
height=PointNavHabitatBaseConfig.SCREEN_SIZE,
width=PointNavHabitatBaseConfig.SCREEN_SIZE,
use_normalization=True,
),
TargetCoordinatesSensorHabitat(coordinate_dims=2),
]
def __init__(self):
super().__init__()
self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="simple_cnn",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=True,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs):
return self.model_creation_handler.create_model(**kwargs)
def tag(self):
return "PointNav-Habitat-RGBD-SimpleConv-DDPPO"
================================================
FILE: projects/pointnav_baselines/experiments/ithor/__init__.py
================================================
================================================
FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_base.py
================================================
import os
from abc import ABC
from projects.pointnav_baselines.experiments.pointnav_thor_base import (
PointNavThorBaseConfig,
)
class PointNaviThorBaseConfig(PointNavThorBaseConfig, ABC):
"""The base config for all iTHOR PointNav experiments."""
NUM_PROCESSES = 40
TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-pointnav/train")
VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-pointnav/val")
================================================
FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.robothor_plugin.robothor_sensors import (
DepthSensorThor,
GPSCompassSensorRoboThor,
)
from projects.pointnav_baselines.mixins import (
PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import (
PointNaviThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
class PointNaviThorDepthPPOExperimentConfig(PointNaviThorBaseConfig):
"""An Point Navigation experiment configuration in iThor with Depth
input."""
SENSORS = [
DepthSensorThor(
height=PointNaviThorBaseConfig.SCREEN_SIZE,
width=PointNaviThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
GPSCompassSensorRoboThor(),
]
def __init__(self):
super().__init__()
self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="simple_cnn",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=True,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs):
return self.model_creation_handler.create_model(**kwargs)
def tag(self):
return "PointNav-iTHOR-Depth-SimpleConv-DDPPO"
================================================
FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from projects.pointnav_baselines.mixins import (
PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import (
PointNaviThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
class PointNaviThorRGBPPOExperimentConfig(PointNaviThorBaseConfig):
"""An Point Navigation experiment configuration in iThor with RGB input."""
SENSORS = [
RGBSensorThor(
height=PointNaviThorBaseConfig.SCREEN_SIZE,
width=PointNaviThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GPSCompassSensorRoboThor(),
]
def __init__(self):
super().__init__()
self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="simple_cnn",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=True,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs):
return self.model_creation_handler.create_model(**kwargs)
def tag(self):
return "PointNav-iTHOR-RGB-SimpleConv-DDPPO"
================================================
FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from projects.pointnav_baselines.mixins import (
PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import (
PointNaviThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
class PointNaviThorRGBDPPOExperimentConfig(PointNaviThorBaseConfig):
"""An Point Navigation experiment configuration in iThor with RGBD
input."""
SENSORS = [
RGBSensorThor(
height=PointNaviThorBaseConfig.SCREEN_SIZE,
width=PointNaviThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
DepthSensorThor(
height=PointNaviThorBaseConfig.SCREEN_SIZE,
width=PointNaviThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
GPSCompassSensorRoboThor(),
]
def __init__(self):
super().__init__()
self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="simple_cnn",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=True,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs):
return self.model_creation_handler.create_model(**kwargs)
def tag(self):
return "PointNav-iTHOR-RGBD-SimpleConv-DDPPO"
================================================
FILE: projects/pointnav_baselines/experiments/pointnav_base.py
================================================
from abc import ABC
from typing import Optional, Sequence
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import Sensor
class PointNavBaseConfig(ExperimentConfig, ABC):
"""An Object Navigation experiment configuration in iThor."""
ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
SENSORS: Optional[Sequence[Sensor]] = None
STEP_SIZE = 0.25
ROTATION_DEGREES = 30.0
DISTANCE_TO_GOAL = 0.2
STOCHASTIC = True
CAMERA_WIDTH = 400
CAMERA_HEIGHT = 300
SCREEN_SIZE = 224
MAX_STEPS = 500
def __init__(self):
self.REWARD_CONFIG = {
"step_penalty": -0.01,
"goal_success_reward": 10.0,
"failed_stop_reward": 0.0,
"reached_max_steps_reward": 0.0,
"shaping_weight": 1.0,
}
================================================
FILE: projects/pointnav_baselines/experiments/pointnav_thor_base.py
================================================
import glob
import os
import platform
from abc import ABC
from math import ceil
from typing import Dict, Any, List, Optional, Sequence
import ai2thor
import gym
import numpy as np
import torch
from packaging import version
from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_util import get_open_x_displays
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
PointNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
from projects.pointnav_baselines.experiments.pointnav_base import PointNavBaseConfig
if ai2thor.__version__ not in ["0.0.1", None] and version.parse(
ai2thor.__version__
) < version.parse("2.7.2"):
raise ImportError(
"To run the PointNav baseline experiments you must use"
" ai2thor version 2.7.1 or higher."
)
class PointNavThorBaseConfig(PointNavBaseConfig, ABC):
"""The base config for all iTHOR PointNav experiments."""
NUM_PROCESSES: Optional[int] = None
TRAIN_GPU_IDS = list(range(torch.cuda.device_count()))
VALID_GPU_IDS = [torch.cuda.device_count() - 1]
TEST_GPU_IDS = [torch.cuda.device_count() - 1]
TRAIN_DATASET_DIR: Optional[str] = None
VAL_DATASET_DIR: Optional[str] = None
TARGET_TYPES: Optional[Sequence[str]] = None
ACTION_SPACE = gym.spaces.Discrete(len(PointNavTask.class_action_names()))
def __init__(self):
super().__init__()
self.ENV_ARGS = dict(
width=self.CAMERA_WIDTH,
height=self.CAMERA_HEIGHT,
continuousMode=True,
applyActionNoise=self.STOCHASTIC,
rotateStepDegrees=self.ROTATION_DEGREES,
gridSize=self.STEP_SIZE,
snapToGrid=False,
agentMode="bot",
include_private_scenes=False,
renderDepthImage=any(isinstance(s, DepthSensorThor) for s in self.SENSORS),
)
def preprocessors(self):
return tuple()
def machine_params(self, mode="train", **kwargs):
sampler_devices: Sequence[int] = []
if mode == "train":
workers_per_device = 1
gpu_ids = (
[]
if not torch.cuda.is_available()
else self.TRAIN_GPU_IDS * workers_per_device
)
nprocesses = (
1
if not torch.cuda.is_available()
else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
)
sampler_devices = self.TRAIN_GPU_IDS
elif mode == "valid":
nprocesses = 1 if torch.cuda.is_available() else 0
gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS
elif mode == "test":
nprocesses = 10
gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
sensor_preprocessor_graph = (
SensorPreprocessorGraph(
source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
preprocessors=self.preprocessors(),
)
if mode == "train"
or (
(isinstance(nprocesses, int) and nprocesses > 0)
or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
)
else None
)
return MachineParams(
nprocesses=nprocesses,
devices=gpu_ids,
sampler_devices=(
sampler_devices if mode == "train" else gpu_ids
), # ignored with > 1 gpu_ids
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return PointNavDatasetTaskSampler(**kwargs)
@staticmethod
def _partition_inds(n: int, num_parts: int):
return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
np.int32
)
def _get_sampler_args_for_scene_split(
self,
scenes_dir: str,
process_ind: int,
total_processes: int,
devices: Optional[List[int]],
seeds: Optional[List[int]],
deterministic_cudnn: bool,
include_expert_sensor: bool = True,
) -> Dict[str, Any]:
path = os.path.join(scenes_dir, "*.json.gz")
scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
if len(scenes) == 0:
raise RuntimeError(
(
"Could find no scene dataset information in directory {}."
" Are you sure you've downloaded them? "
" If not, see https://allenact.org/installation/download-datasets/ information"
" on how this can be done."
).format(scenes_dir)
)
oversample_warning = (
f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})."
" You can avoid this by setting a number of workers divisible by the number of scenes"
)
if total_processes > len(scenes): # oversample some scenes -> bias
if total_processes % len(scenes) != 0:
get_logger().warning(oversample_warning)
scenes = scenes * int(ceil(total_processes / len(scenes)))
scenes = scenes[: total_processes * (len(scenes) // total_processes)]
elif len(scenes) % total_processes != 0:
get_logger().warning(oversample_warning)
inds = self._partition_inds(len(scenes), total_processes)
x_display: Optional[str] = None
if platform.system() == "Linux":
x_displays = get_open_x_displays(throw_error_if_empty=True)
if len([d for d in devices if d != torch.device("cpu")]) > len(x_displays):
get_logger().warning(
f"More GPU devices found than X-displays (devices: `{x_displays}`, x_displays: `{x_displays}`)."
f" This is not necessarily a bad thing but may mean that you're not using GPU memory as"
f" efficiently as possible. Consider following the instructions here:"
f" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin"
f" describing how to start an X-display on every GPU."
)
x_display = x_displays[process_ind % len(x_displays)]
return {
"scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
"object_types": self.TARGET_TYPES,
"max_steps": self.MAX_STEPS,
"sensors": [
s
for s in self.SENSORS
if (include_expert_sensor or not isinstance(s, ExpertActionSensor))
],
"action_space": self.ACTION_SPACE,
"seed": seeds[process_ind] if seeds is not None else None,
"deterministic_cudnn": deterministic_cudnn,
"rewards_config": self.REWARD_CONFIG,
"env_args": {
**self.ENV_ARGS,
"x_display": x_display,
},
}
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
process_ind,
total_processes,
devices=devices,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.TRAIN_DATASET_DIR
res["loop_dataset"] = True
res["allow_flipping"] = True
return res
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.VAL_DATASET_DIR, "episodes"),
process_ind,
total_processes,
devices=devices,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
include_expert_sensor=False,
)
res["scene_directory"] = self.VAL_DATASET_DIR
res["loop_dataset"] = False
return res
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self.valid_task_sampler_args(
process_ind=process_ind,
total_processes=total_processes,
devices=devices,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
================================================
FILE: projects/pointnav_baselines/experiments/robothor/__init__.py
================================================
================================================
FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_base.py
================================================
import os
from abc import ABC
from projects.pointnav_baselines.experiments.pointnav_thor_base import (
PointNavThorBaseConfig,
)
class PointNavRoboThorBaseConfig(PointNavThorBaseConfig, ABC):
"""The base config for all iTHOR PointNav experiments."""
NUM_PROCESSES = 60
TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/train")
VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/val")
================================================
FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.robothor_plugin.robothor_sensors import (
DepthSensorThor,
GPSCompassSensorRoboThor,
)
from projects.pointnav_baselines.mixins import (
PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import (
PointNavRoboThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
class PointNavRoboThorRGBPPOExperimentConfig(
PointNavRoboThorBaseConfig,
):
"""An Point Navigation experiment configuration in RoboTHOR with Depth
input."""
SENSORS = [
DepthSensorThor(
height=PointNavRoboThorBaseConfig.SCREEN_SIZE,
width=PointNavRoboThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
GPSCompassSensorRoboThor(),
]
def __init__(self):
super().__init__()
self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="simple_cnn",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=True,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs):
return self.model_creation_handler.create_model(**kwargs)
def tag(self):
return "PointNav-RoboTHOR-Depth-SimpleConv-DDPPO"
================================================
FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from projects.pointnav_baselines.mixins import (
PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import (
PointNavRoboThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
class PointNavRoboThorRGBPPOExperimentConfig(
PointNavRoboThorBaseConfig,
):
"""An Point Navigation experiment configuration in RoboThor with RGB
input."""
SENSORS = [
RGBSensorThor(
height=PointNavRoboThorBaseConfig.SCREEN_SIZE,
width=PointNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GPSCompassSensorRoboThor(),
]
def __init__(self):
super().__init__()
self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="simple_cnn",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=True,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs):
return self.model_creation_handler.create_model(**kwargs)
def tag(self):
return "PointNav-RoboTHOR-RGB-SimpleConv-DDPPO"
================================================
FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from projects.pointnav_baselines.mixins import (
PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import (
PointNavRoboThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
class PointNavRoboThorRGBPPOExperimentConfig(
PointNavRoboThorBaseConfig,
):
"""An Point Navigation experiment configuration in RoboThor with RGBD
input."""
SENSORS = [
RGBSensorThor(
height=PointNavRoboThorBaseConfig.SCREEN_SIZE,
width=PointNavRoboThorBaseConfig.SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
DepthSensorThor(
height=PointNavRoboThorBaseConfig.SCREEN_SIZE,
width=PointNavRoboThorBaseConfig.SCREEN_SIZE,
use_normalization=True,
uuid="depth_lowres",
),
GPSCompassSensorRoboThor(),
]
def __init__(self):
super().__init__()
self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
backbone="simple_cnn",
sensors=self.SENSORS,
auxiliary_uuids=[],
add_prev_actions=True,
multiple_beliefs=False,
belief_fusion=None,
)
def training_pipeline(self, **kwargs) -> TrainingPipeline:
return PointNavPPOMixin.training_pipeline(
auxiliary_uuids=[],
multiple_beliefs=False,
normalize_advantage=True,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
)
def create_model(self, **kwargs):
return self.model_creation_handler.create_model(**kwargs)
def tag(self):
return "PointNav-RoboTHOR-RGBD-SimpleConv-DDPPO"
================================================
FILE: projects/pointnav_baselines/mixins.py
================================================
from typing import Optional
from typing import Sequence
import attr
import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.sensor import Sensor
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import (
Builder,
TrainingPipeline,
PipelineStage,
LinearDecay,
)
from projects.objectnav_baselines.mixins import update_with_auxiliary_losses
# fmt: off
try:
# Habitat may not be installed, just create a fake class here in that case
from allenact_plugins.habitat_plugin.habitat_sensors import TargetCoordinatesSensorHabitat
except ImportError:
class TargetCoordinatesSensorHabitat: #type:ignore
pass
# fmt: on
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
from allenact_plugins.navigation_plugin.pointnav.models import PointNavActorCritic
@attr.s(kw_only=True)
class PointNavUnfrozenResNetWithGRUActorCriticMixin:
backbone: str = attr.ib()
sensors: Sequence[Sensor] = attr.ib()
auxiliary_uuids: Sequence[str] = attr.ib()
add_prev_actions: bool = attr.ib()
multiple_beliefs: bool = attr.ib()
belief_fusion: Optional[str] = attr.ib()
def create_model(self, **kwargs) -> nn.Module:
rgb_uuid = next(
(s.uuid for s in self.sensors if isinstance(s, RGBSensor)), None
)
depth_uuid = next(
(s.uuid for s in self.sensors if isinstance(s, DepthSensor)), None
)
goal_sensor_uuid = next(
(
s.uuid
for s in self.sensors
if isinstance(
s, (GPSCompassSensorRoboThor, TargetCoordinatesSensorHabitat)
)
)
)
return PointNavActorCritic(
# Env and Tak
action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
rgb_uuid=rgb_uuid,
depth_uuid=depth_uuid,
goal_sensor_uuid=goal_sensor_uuid,
# RNN
hidden_size=(
228 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512
),
num_rnn_layers=1,
rnn_type="GRU",
add_prev_actions=self.add_prev_actions,
action_embed_size=4,
# CNN
backbone=self.backbone,
resnet_baseplanes=32,
embed_coordinates=False,
coordinate_dims=2,
# Aux
auxiliary_uuids=self.auxiliary_uuids,
multiple_beliefs=self.multiple_beliefs,
beliefs_fusion=self.belief_fusion,
)
class PointNavPPOMixin:
@staticmethod
def training_pipeline(
auxiliary_uuids: Sequence[str],
multiple_beliefs: bool,
normalize_advantage: bool,
advance_scene_rollout_period: Optional[int] = None,
) -> TrainingPipeline:
ppo_steps = int(75000000)
lr = 3e-4
num_mini_batch = 1
update_repeats = 4
num_steps = 128
save_interval = 5000000
log_interval = 10000 if torch.cuda.is_available() else 1
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
named_losses = {
"ppo_loss": (PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0)
}
named_losses = update_with_auxiliary_losses(
named_losses=named_losses,
auxiliary_uuids=auxiliary_uuids,
multiple_beliefs=multiple_beliefs,
)
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={key: val[0] for key, val in named_losses.items()},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=advance_scene_rollout_period,
pipeline_stages=[
PipelineStage(
loss_names=list(named_losses.keys()),
max_stage_steps=ppo_steps,
loss_weights=[val[1] for val in named_losses.values()],
)
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
================================================
FILE: projects/tutorials/__init__.py
================================================
================================================
FILE: projects/tutorials/distributed_objectnav_tutorial.py
================================================
# literate: tutorials/distributed-objectnav-tutorial.md
# %%
"""# Tutorial: Distributed training across multiple nodes."""
# %%
"""
**Note** The provided commands to execute in this tutorial assume include a configuration script to
[clone the full library](../installation/installation-allenact.md#full-library). Setting up headless THOR might
require superuser privileges. We also assume [NCCL](https://developer.nvidia.com/nccl) is available for communication
across computation nodes and all nodes have a running `ssh` server.
The below introduced experimental tools and commands for distributed training assume a Linux OS (tested on Ubuntu
18.04).
In this tutorial, we:
1. Introduce the available API for training across multiple nodes, as well as experimental scripts for distributed
configuration, training start and termination, and remote command execution.
1. Introduce the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenAct`. Note that, in contrast with
previous tutorials using AI2-THOR, this time we don't require an xserver (in Linux) to be active.
1. Show a training example for RoboTHOR ObjectNav on a cluster, with each node having sufficient GPUs and GPU memory to
host 60 experience samplers collecting rollout data.
Thanks to the massive parallelization of experience collection and model training enabled by
[DD-PPO](https://arxiv.org/abs/1911.00357), we can greatly speed up training by scaling across multiple nodes:

## The task: ObjectNav
In ObjectNav, the goal for the agent is to navigate to an object (possibly unseen during training) of a known given
class and signal task completion when it determines it has reached the goal.
## Implementation
For this tutorial, we'll use the readily available `objectnav_baselines` project, which includes configurations for
a wide variety of object navigation experiments for both iTHOR and RoboTHOR. Since those configuration files are
defined for a single-node setup, we will mainly focus on the changes required in the `machine_params` and
`training_pipeline` methods.
Note that, in order to use the headless version of AI2-THOR, we currently need to install a specific THOR commit,
different from the default one in `robothor_plugin`. Note that this command is included in the configuration script
below, so **we don't need to run this**:
```bash
pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48
```
The experiment config starts as follows:
"""
# %%
import math
from typing import Optional, Sequence
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
Builder,
LinearDecay,
MultiLinearDecay,
TrainingPipeline,
PipelineStage,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_rgb_resnet18gru_ddppo import (
ObjectNavRoboThorRGBPPOExperimentConfig as BaseConfig,
)
class DistributedObjectNavRoboThorRGBPPOExperimentConfig(BaseConfig):
def tag(self) -> str:
return "DistributedObjectNavRoboThorRGBPPO"
# %%
"""We override ObjectNavRoboThorBaseConfig's THOR_COMMIT_ID to match the installed headless one:"""
# %%
THOR_COMMIT_ID = "91139c909576f3bf95a187c5b02c6fd455d06b48"
# %%
"""Also indicate that we're using headless THOR (for `task_sampler_args` methods):"""
# %%
THOR_IS_HEADLESS = True
# %%
"""**Temporary hack** Disable the `commit_id` argument passed to the THOR `Controller`'s `init` method:"""
# %%
def env_args(self):
res = super().env_args()
res.pop("commit_id", None)
return res
# %%
"""
And, of course, define the number of nodes. This will be used by `machine_params` and `training_pipeline` below.
We override the existing `ExperimentConfig`'s `init` method to include control on the number of nodes:
"""
# %%
def __init__(
self,
distributed_nodes: int = 1,
num_train_processes: Optional[int] = None,
train_gpu_ids: Optional[Sequence[int]] = None,
val_gpu_ids: Optional[Sequence[int]] = None,
test_gpu_ids: Optional[Sequence[int]] = None,
):
super().__init__(
num_train_processes=num_train_processes,
train_gpu_ids=train_gpu_ids,
val_gpu_ids=val_gpu_ids,
test_gpu_ids=test_gpu_ids,
)
self.distributed_nodes = distributed_nodes
# %%
"""
### Machine parameters
**Note:** We assume that all nodes are identical (same number and model of GPUs and drivers).
The `machine_params` method will be invoked by `runner.py` with different arguments, e.g. to determine the
configuration for validation or training.
When working in distributed settings, `AllenAct` needs to know the total number of trainers across all nodes as well
as the local number of trainers. This is accomplished through the introduction of a `machine_id` keyword argument,
which will be used to define the training parameters as follows:
"""
# %%
def machine_params(self, mode="train", **kwargs):
params = super().machine_params(mode, **kwargs)
if mode == "train":
params.devices = params.devices * self.distributed_nodes
params.nprocesses = params.nprocesses * self.distributed_nodes
params.sampler_devices = params.sampler_devices * self.distributed_nodes
if "machine_id" in kwargs:
machine_id = kwargs["machine_id"]
assert (
0 <= machine_id < self.distributed_nodes
), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]"
local_worker_ids = list(
range(
len(self.train_gpu_ids) * machine_id,
len(self.train_gpu_ids) * (machine_id + 1),
)
)
params.set_local_worker_ids(local_worker_ids)
# Confirm we're setting up train params nicely:
print(
f"devices {params.devices}"
f"\nnprocesses {params.nprocesses}"
f"\nsampler_devices {params.sampler_devices}"
f"\nlocal_worker_ids {params.local_worker_ids}"
)
elif mode == "valid":
# Use all GPUs at their maximum capacity for training
# (you may run validation in a separate machine)
params.nprocesses = (0,)
return params
# %%
"""
In summary, we need to specify which indices in `devices`, `nprocesses` and `sampler_devices` correspond to the
local `machine_id` node (whenever a `machine_id` is given as a keyword argument), otherwise we specify the global
configuration.
### Training pipeline
In preliminary ObjectNav experiments, we observe that small batches are useful during the initial training steps in
terms of sample efficiency, whereas large batches are preferred during the rest of training.
In order to scale to the larger amount of collected data in multi-node settings, we will proceed with a two-stage
pipeline:
1. In the first stage, we'll enforce a number of updates per amount of collected data similar to the
configuration with a single node by enforcing more batches per rollout (for about 30 million steps).
1. In the second stage we'll switch to a configuration with larger learning rate and batch size to be
used up to the grand total of 300 million experience steps.
We first define a helper method to generate a learning rate curve with decay for each stage:
"""
# %%
@staticmethod
def lr_scheduler(small_batch_steps, transition_steps, ppo_steps, lr_scaling):
safe_small_batch_steps = int(small_batch_steps * 1.02)
large_batch_and_lr_steps = ppo_steps - safe_small_batch_steps - transition_steps
# Learning rate after small batch steps (assuming decay to 0)
break1 = 1.0 - safe_small_batch_steps / ppo_steps
# Initial learning rate for large batch (after transition from initial to large learning rate)
break2 = lr_scaling * (
1.0 - (safe_small_batch_steps + transition_steps) / ppo_steps
)
return MultiLinearDecay(
[
# Base learning rate phase for small batch (with linear decay towards 0)
LinearDecay(
steps=safe_small_batch_steps,
startp=1.0,
endp=break1,
),
# Allow the optimizer to adapt its statistics to the changes with a larger learning rate
LinearDecay(
steps=transition_steps,
startp=break1,
endp=break2,
),
# Scaled learning rate phase for large batch (with linear decay towards 0)
LinearDecay(
steps=large_batch_and_lr_steps,
startp=break2,
endp=0,
),
]
)
# %%
"""
The training pipeline looks like:
"""
# %%
def training_pipeline(self, **kwargs):
# These params are identical to the baseline configuration for 60 samplers (1 machine)
ppo_steps = int(300e6)
lr = 3e-4
num_mini_batch = 1
update_repeats = 4
num_steps = 128
save_interval = 5000000
log_interval = 10000 if torch.cuda.is_available() else 1
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
# We add 30 million steps for small batch learning
small_batch_steps = int(30e6)
# And a short transition phase towards large learning rate
# (see comment in the `lr_scheduler` helper method
transition_steps = int(2 / 3 * self.distributed_nodes * 1e6)
# Find exact number of samplers per GPU
assert (
self.num_train_processes % len(self.train_gpu_ids) == 0
), "Expected uniform number of samplers per GPU"
samplers_per_gpu = self.num_train_processes // len(self.train_gpu_ids)
# Multiply num_mini_batch by the largest divisor of
# samplers_per_gpu to keep all batches of same size:
num_mini_batch_multiplier = [
i
for i in reversed(
range(1, min(samplers_per_gpu // 2, self.distributed_nodes) + 1)
)
if samplers_per_gpu % i == 0
][0]
# Multiply update_repeats so that the product of this factor and
# num_mini_batch_multiplier is >= self.distributed_nodes:
update_repeats_multiplier = int(
math.ceil(self.distributed_nodes / num_mini_batch_multiplier)
)
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={"ppo_loss": PPO(**PPOConfig, show_ratios=False)},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
# We increase the number of batches for the first stage to reach an
# equivalent number of updates per collected rollout data as in the
# 1 node/60 samplers setting
PipelineStage(
loss_names=["ppo_loss"],
max_stage_steps=small_batch_steps,
num_mini_batch=num_mini_batch * num_mini_batch_multiplier,
update_repeats=update_repeats * update_repeats_multiplier,
),
# The we proceed with the base configuration (leading to larger
# batches due to the increased number of samplers)
PipelineStage(
loss_names=["ppo_loss"],
max_stage_steps=ppo_steps - small_batch_steps,
),
],
# We use the MultiLinearDecay curve defined by the helper function,
# setting the learning rate scaling as the square root of the number
# of nodes. Linear scaling might also works, but we leave that
# check to the reader.
lr_scheduler_builder=Builder(
LambdaLR,
{
"lr_lambda": self.lr_scheduler(
small_batch_steps=small_batch_steps,
transition_steps=transition_steps,
ppo_steps=ppo_steps,
lr_scaling=math.sqrt(self.distributed_nodes),
)
},
),
)
# %%
"""
## Multi-node configuration
**Note:** In the following, we'll assume you don't have an available setup for distributed execution, such as
[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup and run
distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for
a rather basic usage pattern that might not suit your needs.
If we haven't set up AllenAct with the headless version of Ai2-THOR in our nodes, we can define a configuration script
similar to:
```bash
#!/bin/bash
# Prepare a virtualenv for allenact
sudo apt-get install -y python3-venv
python3 -mvenv ~/allenact_venv
source ~/allenact_venv/bin/activate
pip install -U pip wheel
# Install AllenAct
cd ~
git clone https://github.com/allenai/allenact.git
cd allenact
# Install AllenaAct + RoboTHOR plugin dependencies
pip install -r requirements.txt
pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt
# Download + setup datasets
bash datasets/download_navigation_datasets.sh robothor-objectnav
# Install headless AI2-THOR and required libvulkan1
sudo apt-get install -y libvulkan1
pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48
# Download AI2-THOR binaries
python -c "from ai2thor.controller import Controller; c=Controller(); c.stop()"
echo DONE
```
and save it as `headless_robothor_config.sh`. Note that some of the configuration steps in the script assume you have
superuser privileges.
Then, we can just copy this file to the first node in our cluster and run it with:
```bash
source
```
If everything went well, we should be able to
```bash
cd ~/allenact && source ~/allenact_venv/bin/activate
```
Note that we might need to install `libvulkan1` in each node (even if the AllenAct setup is shared across nodes) if it
is not already available.
### Local filesystems
If our cluster does not use a shared filesystem, we'll need to propagate the setup to the rest of nodes. Assuming
we can just `ssh` with the current user to all nodes, we can propagate our config with
```bash
scripts/dconfig.py --runs_on \
--config_script
```
and we can check the state of the installation with the `scripts/dcommand.py` tool:
```bash
scripts/dcommand.py --runs_on \
--command 'tail -n 5 ~/log_allenact_distributed_config'
```
If everything went fine, all requirements are ready to start running our experiment.
## Run your experiment
**Note:** In this section, we again assume you don't have an available setup for distributed execution, such as
[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup/run
distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for
a rather basic usage pattern that might not suit your needs.
Our experimental extension to AllenAct's `main.py` script allows using practically identical commands to the ones
used in a single-node setup to start our experiments. From the root `allenact` directory, we can simply invoke
```bash
scripts/dmain.py projects/tutorials/distributed_objectnav_tutorial.py \
--config_kwargs '{"distributed_nodes":3}' \
--runs_on \
--env_activate_path ~/allenact_venv/bin/activate \
--allenact_path ~/allenact \
--distributed_ip_and_port :
```
This script will do several things for you, including synchronization of the changes in the `allenact` directory
to all machines, enabling virtual environments in each node, sharing the same random seed for all `main.py` instances,
assigning `--machine_id` parameters required for multi-node training, and redirecting the process output to a log file
under the output results folder.
Note that by changing the value associated with the `distributed_nodes` key in the `config_kwargs` map and the `runs_on`
list of IPs, we can easily scale our training to e.g. 1, 3, or 8 nodes as shown in the chart above. Note that for this
call to work unmodified, you should have sufficient GPUs/GPU memory to host 60 samplers per node.
## Track and stop your experiment
You might have noticed that, when your experiment started with the above command, a file was created under
`~/.allenact`. This file includes IP addresses and screen session IDs for all nodes. It can be used
by the already introduced `scripts/dcommand.py` script, if we omit the `--runs_on` argument, to call a command on each
node via ssh; but most importantly it is used by the `scripts/dkill.py` script to terminate all screen sessions hosting
our training processes.
### Experiment tracking
A simple way to check all machines are training, assuming you have `nvidia-smi` installed in all nodes, is to just call
```bash
scripts/dcommand.py
```
from the root `allenact` directory. If everything is working well, the GPU usage stats from `nvidia-smi` should reflect
ongoing activity. You can also add different commands to be executed by each node. It is of course also possible to run
tensorboard on any of the nodes, if that's your preference.
### Experiment termination
Just call
```bash
scripts/dkill.py
```
After killing all involved screen sessions, you will be asked about whether you also want to delete the "killfile"
stored under the `~/.allenact` directory (which might be your preferred option once all processes are terminated).
We hope this tutorial will help you start quickly testing new ideas! Even if we've only explored moderates settings of
up to 480 experience samplers, you might want to consider some additional changes (like the
[choice for the optimizer](https://arxiv.org/abs/2103.07013)) if you plan to run at larger scale.
"""
================================================
FILE: projects/tutorials/gym_mujoco_tutorial.py
================================================
# literate: tutorials/gym-mujoco-tutorial.md
# %%
"""# Tutorial: OpenAI gym MuJoCo environment."""
# %%
"""
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the
`gym_plugin`. The latter can be installed by
```bash
pip install -r allenact_plugins/gym_plugin/extra_requirements.txt
```
The environments for this tutorial use [MuJoCo](http://www.mujoco.org/)(**Mu**lti-**Jo**int dynamics in **Co**ntact)
physics simulator, which is also required to be installed properly with instructions
[here](https://github.com/openai/mujoco-py).
## The task
For this tutorial, we'll focus on one of the continuous-control environments under the `mujoco` group of `gym`
environments: [Ant-v2](https://gym.openai.com/envs/Ant-v2/). In this task, the goal
is to make a four-legged creature, "ant", walk forward as fast as possible. A random agent of "Ant-v2" is shown below.
.
To achieve the goal, we need to provide continuous control for the agent moving forward with four legs with the
`x` velocity as high as possible for at most 1000 episodes steps. The agent is failed, or done, if the `z` position
is out of the range [0.2, 1.0]. The dimension of the action space is 8 and 111 for the dimension of the observation
space that maps to different body parts, including 3D position `(x,y,z)`, orientation(quaternion `x`,`y`,`z`,`w`)
of the torso, and the joint angles, 3D velocity `(x,y,z)`, 3D angular velocity `(x,y,z)`, and joint velocities.
The rewards for the agent "ant" are composed of the forward rewards, healthy rewards, control cost, and contact cost.
## Implementation
For this tutorial, we'll use the readily available `gym_plugin`, which includes a
[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a
[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and
[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a
[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`
environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).
The experiment config, similar to the one used for the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:
"""
# %%
from typing import Dict, Optional, List, Any, cast
import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from allenact.utils.viz_utils import VizSuite, AgentViewViz
class HandManipulateTutorialExperimentConfig(ExperimentConfig):
@classmethod
def tag(cls) -> str:
return "GymMuJoCoTutorial"
# %%
"""
### Sensors and Model
As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide
full observations from the state of the `gym` environment to our model.
"""
# %%
SENSORS = [
GymMuJoCoSensor("Ant-v2", uuid="gym_mujoco_data"),
]
# %%
"""
We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,
[MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since
this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a
[Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.
"""
# %%
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
"""We define our `ActorCriticModel` agent using a lightweight
implementation with separate MLPs for actors and critic,
MemorylessActorCritic.
Since this is a model for continuous control, note that the
superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
a Gaussian distribution to sample actions.
"""
return MemorylessActorCritic(
input_uuid="gym_mujoco_data",
action_space=gym.spaces.Box(
-3.0, 3.0, (8,), "float32"
), # 8 actors, each in the range [-3.0, 3.0]
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
# %%
"""
### Task samplers
We use an available `TaskSampler` implementation for `gym` environments that allows to sample
[GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):
[GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task
sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created
above, which contain a custom identifier for the actual observation space (`gym_mujoco_data`) also used by the model.
"""
# %%
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(gym_env_type="Ant-v2", **kwargs)
# %%
"""
For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three
modes, `train, valid, test`:
"""
# %%
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="train", seeds=seeds
)
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="valid", seeds=seeds
)
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)
# %%
"""
Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,
during testing (or validation), we sample a fixed number of tasks.
"""
# %%
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 4
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["Ant-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
# %%
"""
Note that we just sample 4 tasks for validation and testing in this case, which suffice to illustrate the model's
success.
### Machine parameters
In this tutorial, we just train the model on the CPU. We allocate a larger number of samplers for training (8) than
for validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also
include a video visualizer (`AgentViewViz`) in test mode.
"""
# %%
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
visualizer = None
if mode == "test":
visualizer = VizSuite(
mode=mode,
video_viz=AgentViewViz(
label="episode_vid",
max_clip_length=400,
vector_task_source=("render", {"mode": "rgb_array"}),
fps=30,
),
)
return {
"nprocesses": 8 if mode == "train" else 1, # rollout
"devices": [],
"visualizer": visualizer,
}
# %%
"""
### Training pipeline
The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate
and 10 single-batch update repeats per rollout. The reward should exceed 4,000
in 20M steps in the test. In order to make the "ant" run with an obvious fast speed, we train the agents using PPO
with 3e7 steps.
"""
# %%
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
lr = 3e-4
ppo_steps = int(3e7)
clip_param = 0.2
value_loss_coef = 0.5
entropy_coef = 0.0
num_mini_batch = 4 # optimal 64
update_repeats = 10
max_grad_norm = 0.5
num_steps = 2048
gamma = 0.99
use_gae = True
gae_lambda = 0.95
advance_scene_rollout_period = None
save_interval = 200000
metric_accumulate_interval = 50000
return TrainingPipeline(
named_losses=dict(
ppo_loss=PPO(
clip_param=clip_param,
value_loss_coef=value_loss_coef,
entropy_coef=entropy_coef,
),
), # type:ignore
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=advance_scene_rollout_period,
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
lr_scheduler_builder=Builder(
LambdaLR,
{"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
),
)
# %%
"""
## Training and validation
We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_mujoco_tutorial.py`.
To start training from scratch, we just need to invoke
```bash
PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_mujoco_output -s 0 -e
```
from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.
If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/gym_mujoco_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).
After 30,000,000 steps, the script will terminate. If everything went well, the `valid` success rate should be 1
and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a
little below 1,000.
## Testing
The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the
directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:
```bash
PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_mujoco_output \
-s 0 \
-e \
--eval \
--checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE
```
If everything went well, the `test` success rate should converge to 1, the `test` success rate should be 1
and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a
little below 1,000. The `gif` results can be seen in the image tab of Tensorboard while testing.
The output should be something like this:
.
And the `gif` results can be seen in the image tab of Tensorboard while testing.

If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running
remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display
available:
```bash
DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_mujoco_output \
-s 0 \
-e \
--eval \
--checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE
```
"""
================================================
FILE: projects/tutorials/gym_tutorial.py
================================================
# literate: tutorials/gym-tutorial.md
# %%
"""# Tutorial: OpenAI gym for continuous control."""
# %%
"""
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the
`gym_plugin`. The latter can be installed by
```bash
pip install -r allenact_plugins/gym_plugin/extra_requirements.txt
```
In this tutorial, we:
1. Introduce the `gym_plugin`, which enables some of the tasks in [OpenAI's gym](https://gym.openai.com/) for training
and inference within AllenAct.
1. Show an example of continuous control with an arbitrary action space covering 2 policies for one of the `gym` tasks.
## The task
For this tutorial, we'll focus on one of the continuous-control environments under the `Box2D` group of `gym`
environments: [LunarLanderContinuous-v2](https://gym.openai.com/envs/LunarLanderContinuous-v2/). In this task, the goal
is to smoothly land a lunar module in a landing pad, as shown below.
.
To achieve this goal, we need to provide continuous control for a main engine and directional one (2 real values). In
order to solve the task, the expected reward is of at least 200 points. The controls for main and directional engines
are both in the range [-1.0, 1.0] and the observation space is composed of 8 scalars indicating `x` and `y` positions,
`x` and `y` velocities, lander angle and angular velocity, and left and right ground contact. Note that these 8 scalars
provide a full observation of the state.
## Implementation
For this tutorial, we'll use the readily available `gym_plugin`, which includes a
[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a
[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and
[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a
[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`
environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).
The experiment config, similar to the one used for the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:
"""
# %%
from typing import Dict, Optional, List, Any, cast
import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from allenact.utils.viz_utils import VizSuite, AgentViewViz
class GymTutorialExperimentConfig(ExperimentConfig):
@classmethod
def tag(cls) -> str:
return "GymTutorial"
# %%
"""
### Sensors and Model
As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide
full observations from the state of the `gym` environment to our model.
"""
# %%
SENSORS = [
GymBox2DSensor("LunarLanderContinuous-v2", uuid="gym_box_data"),
]
# %%
"""
We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,
[MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since
this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a
[Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.
"""
# %%
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return MemorylessActorCritic(
input_uuid="gym_box_data",
action_space=gym.spaces.Box(
-1.0, 1.0, (2,)
), # 2 actors, each in the range [-1.0, 1.0]
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
action_std=0.5,
)
# %%
"""
### Task samplers
We use an available `TaskSampler` implementation for `gym` environments that allows to sample
[GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):
[GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task
sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created
above, which contain a custom identifier for the actual observation space (`gym_box_data`) also used by the model.
"""
# %%
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return GymTaskSampler(**kwargs)
# %%
"""
For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three
modes, `train, valid, test`:
"""
# %%
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="train", seeds=seeds
)
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(
process_ind=process_ind, mode="valid", seeds=seeds
)
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)
# %%
"""
Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,
during testing (or validation), we sample a fixed number of tasks.
"""
# %%
def _get_sampler_args(
self, process_ind: int, mode: str, seeds: List[int]
) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 3
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
gym_env_types=["LunarLanderContinuous-v2"],
sensors=self.SENSORS, # sensors used to return observations to the agent
max_tasks=max_tasks, # see above
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
seed=seeds[process_ind],
)
# %%
"""
Note that we just sample 3 tasks for validation and testing in this case, which suffice to illustrate the model's
success.
### Machine parameters
Given the simplicity of the task and model, we can just train the model on the CPU. During training, success should
reach 100% in less than 10 minutes, whereas solving the task (evaluation reward > 200) might take about 20 minutes
(on a laptop CPU).
We allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to
CPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode.
"""
# %%
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
visualizer = None
if mode == "test":
visualizer = VizSuite(
mode=mode,
video_viz=AgentViewViz(
label="episode_vid",
max_clip_length=400,
vector_task_source=("render", {"mode": "rgb_array"}),
fps=30,
),
)
return {
"nprocesses": 8 if mode == "train" else 1,
"devices": [],
"visualizer": visualizer,
}
# %%
"""
### Training pipeline
The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate
and 80 single-batch update repeats per rollout:
"""
# %%
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
ppo_steps = int(1.2e6)
return TrainingPipeline(
named_losses=dict(
ppo_loss=PPO(
clip_param=0.2,
value_loss_coef=0.5,
entropy_coef=0.0,
),
), # type:ignore
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-3)),
num_mini_batch=1,
update_repeats=80,
max_grad_norm=100,
num_steps=2000,
gamma=0.99,
use_gae=False,
gae_lambda=0.95,
advance_scene_rollout_period=None,
save_interval=200000,
metric_accumulate_interval=50000,
lr_scheduler_builder=Builder(
LambdaLR,
{"lr_lambda": LinearDecay(steps=ppo_steps)}, # type:ignore
),
)
# %%
"""
## Training and validation
We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_tutorial.py`.
To start training from scratch, we just need to invoke
```bash
PYTHONPATH=. python allenact/main.py gym_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_output -s 54321 -e
```
from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.
If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/gym_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).
After 1,200,000 steps, the script will terminate. If everything went well, the `valid` success rate should quickly
converge to 1 and the mean reward to above 250, while the average episode length should stay below or near 300.
## Testing
The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the
directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:
```bash
PYTHONPATH=. python allenact/main.py gym_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_output \
-s 54321 \
-e \
--eval \
--checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \
--approx_ckpt_step_interval 800000 # Skip some checkpoints
```
The option `--approx_ckpt_step_interval 800000` tells AllenAct that we only want to evaluate checkpoints
which were saved every ~800000 steps, this lets us avoid evaluating every saved checkpoint. If everything went well,
the `test` success rate should converge to 1, the episode length below or near 300 steps, and the mean reward to above
250. The images tab in tensorboard will contain videos for the sampled test episodes.
.
If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running
remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display
available:
```bash
DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_output \
-s 54321 \
-e \
--eval \
--checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \
--approx_ckpt_step_interval 800000
```
"""
================================================
FILE: projects/tutorials/minigrid_offpolicy_tutorial.py
================================================
# literate: tutorials/offpolicy-tutorial.md
# %%
"""# Tutorial: Off-policy training."""
# %%
"""
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the `extra_requirements`
for the `babyai_plugin` and `minigrid_plugin`. The latter can be installed with:
```bash
pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt; pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt
```
In this tutorial we'll learn how to train an agent from an external dataset by imitating expert actions via
Behavior Cloning. We'll use a [BabyAI agent](/api/allenact_plugins/babyai_plugin/babyai_models#BabyAIRecurrentACModel) to solve
`GoToLocal` tasks on [MiniGrid](https://github.com/maximecb/gym-minigrid); see the
`projects/babyai_baselines/experiments/go_to_local` directory for more details.
This tutorial assumes `AllenAct`'s [abstractions](../getting_started/abstractions.md) are known.
## The task
In a `GoToLocal` task, the agent immersed in a grid world has to navigate to a specific object in the presence of
multiple distractors, requiring the agent to understand `go to` instructions like "go to the red ball". For further
details, please consult the [original paper](https://arxiv.org/abs/1810.08272).
## Getting the dataset
We will use a large dataset (**more than 4 GB**) including expert demonstrations for `GoToLocal` tasks. To download
the data we'll run
```bash
PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py GoToLocal
```
from the project's root directory, which will download `BabyAI-GoToLocal-v0.pkl` and `BabyAI-GoToLocal-v0_valid.pkl` to
the `allenact_plugins/babyai_plugin/data/demos` directory.
We will also generate small versions of the datasets, which will be useful if running on CPU, by calling
```bash
PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py
```
from the project's root directory, which will generate `BabyAI-GoToLocal-v0-small.pkl` under the same
`allenact_plugins/babyai_plugin/data/demos` directory.
## Data storage
In order to train with an off-policy dataset, we need to define an `ExperienceStorage`. In AllenAct, an
`ExperienceStorage` object has two primary functions:
1. It stores/manages relevant data (e.g. similarly to the `Dataset` class in PyTorch).
2. It loads stored data into batches that will be used for loss computation (e.g. similarly to the `Dataloader`
class in PyTorch).
Unlike a PyTorch `Dataset` however, an `ExperienceStorage` object can build its dataset **at runtime** by processing
rollouts from the agent. This flexibility allows for us to, for exmaple, implement the experience replay datastructure
used in deep Q-learning. For this tutorial we won't need this additional functionality as our off-policy dataset
is a fixed collection of expert trajectories.
An example of a `ExperienceStorage` for BabyAI expert demos might look as follows:
"""
# %% import_summary allenact_plugins.minigrid_plugin.minigrid_offpolicy.MiniGridExpertTrajectoryStorage
# %%
"""
A complete example can be found in
[MiniGridExpertTrajectoryStorage](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridExpertTrajectoryStorage).
## Loss function
Off-policy losses must implement the
[`GenericAbstractLoss`](/api/allenact/base_abstractions/misc/#genericabstractloss)
interface. In this case, we minimize the cross-entropy between the actor's policy and the expert action:
"""
# %% import allenact_plugins.minigrid_plugin.minigrid_offpolicy.MiniGridOffPolicyExpertCELoss
# %%
"""
A complete example can be found in
[MiniGridOffPolicyExpertCELoss](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridOffPolicyExpertCELoss).
Note that in this case we train the entire actor, but it would also be possible to forward data through a different
subgraph of the ActorCriticModel.
## Experiment configuration
For the experiment configuration, we'll build on top of an existing
[base BabyAI GoToLocal Experiment Config](/api/projects/babyai_baselines/experiments/go_to_local/base/#basebabyaigotolocalexperimentconfig).
The complete `ExperimentConfig` file for off-policy training is
[here](/api/projects/tutorials/minigrid_offpolicy_tutorial/#bcoffpolicybabyaigotolocalexperimentconfig), but let's
focus on the most relevant aspect to enable this type of training:
providing an [OffPolicyPipelineComponent](/api/allenact/utils/experiment_utils/#offpolicypipelinecomponent) object as input to a
`PipelineStage` when instantiating the `TrainingPipeline` in the `training_pipeline` method.
"""
# %% hide
import os
from typing import Optional, List, Tuple
import torch
from gym_minigrid.minigrid import MiniGridEnv
from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage
from allenact.utils.experiment_utils import (
PipelineStage,
StageComponent,
TrainingSettings,
)
from allenact_plugins.babyai_plugin.babyai_constants import (
BABYAI_EXPERT_TRAJECTORIES_DIR,
)
from allenact_plugins.minigrid_plugin.minigrid_offpolicy import (
MiniGridOffPolicyExpertCELoss,
MiniGridExpertTrajectoryStorage,
)
from projects.babyai_baselines.experiments.go_to_local.base import (
BaseBabyAIGoToLocalExperimentConfig,
)
# %%
class BCOffPolicyBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
"""BC Off-policy imitation."""
DATASET: Optional[List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]] = None
GPU_ID = 0 if torch.cuda.is_available() else None
@classmethod
def tag(cls):
return "BabyAIGoToLocalBCOffPolicy"
@classmethod
def METRIC_ACCUMULATE_INTERVAL(cls):
# See BaseBabyAIGoToLocalExperimentConfig for how this is used.
return 1
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=-1)
num_mini_batch = ppo_info["num_mini_batch"]
update_repeats = ppo_info["update_repeats"]
# fmt: off
return cls._training_pipeline(
named_losses={
"offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss(
total_episodes_in_epoch=int(1e6)
),
},
named_storages={
"onpolicy": RolloutBlockStorage(),
"minigrid_offpolicy_expert": MiniGridExpertTrajectoryStorage(
data_path=os.path.join(
BABYAI_EXPERT_TRAJECTORIES_DIR,
"BabyAI-GoToLocal-v0{}.pkl".format(
"" if torch.cuda.is_available() else "-small"
),
),
num_samplers=cls.NUM_TRAIN_SAMPLERS,
rollout_len=cls.ROLLOUT_STEPS,
instr_len=cls.INSTR_LEN,
),
},
pipeline_stages=[
# Single stage, only with off-policy training
PipelineStage(
loss_names=["offpolicy_expert_ce_loss"], # no on-policy losses
max_stage_steps=total_train_steps, # keep sampling episodes in the stage
stage_components=[
StageComponent(
uuid="offpolicy",
storage_uuid="minigrid_offpolicy_expert",
loss_names=["offpolicy_expert_ce_loss"],
training_settings=TrainingSettings(
update_repeats=num_mini_batch * update_repeats,
num_mini_batch=1,
)
)
],
),
],
# As we don't have any on-policy losses, we set the next
# two values to zero to ensure we don't attempt to
# compute gradients for on-policy rollouts:
num_mini_batch=0,
update_repeats=0,
total_train_steps=total_train_steps,
)
# fmt: on
# %%
"""
You'll have noted that it is possible to combine on-policy and off-policy training in the same stage, even though here
we apply pure off-policy training.
## Training
We recommend using a machine with a CUDA-capable GPU for this experiment. In order to start training, we just need to
invoke
```bash
PYTHONPATH=. python allenact/main.py -b projects/tutorials minigrid_offpolicy_tutorial -m 8 -o
```
Note that with the `-m 8` option we limit to 8 the number of on-policy task sampling processes used between off-policy
updates.
If everything goes well, the training success should quickly reach values around 0.7-0.8 on GPU and converge to values
close to 1 if given sufficient time to train.
If running tensorboard, you'll notice a separate group of scalars named `train-offpolicy-losses` and
`train-offpolicy-misc` with losses, approximate "experiences per second" (i.e. the number of off-policy experiences/steps
being used to update the model per second), and other tracked values in addition to the standard `train-onpolicy-*`
used for on-policy training. In the `train-metrics` and `train-misc` sections you'll find the metrics
quantifying the performance of the agent throughout training and some other plots showing training details.
*Note that the x-axis for these plots is different than for the `train-offpolicy-*` sections*. This
is because these plots use the number of rollout steps as the x-axis (i.e. steps that the trained agent
takes interactively) while the `train-offpolicy-*` plots uses the number of offpolicy "experiences" that have
been shown to the agent.
A view of the training progress about 5 hours after starting on a CUDA-capable GPU should look similar to the below
(note that training reached >99% success after about 50 minutes).

"""
================================================
FILE: projects/tutorials/minigrid_tutorial.py
================================================
# literate: tutorials/minigrid-tutorial.md
# %%
"""# Tutorial: Navigation in MiniGrid."""
# %%
"""
In this tutorial, we will train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the
[MiniGrid](https://github.com/maximecb/gym-minigrid) environment. We will demonstrate how to:
* Write an experiment configuration file with a simple training pipeline from scratch.
* Use one of the supported environments with minimal user effort.
* Train, validate and test your experiment from the command line.
This tutorial assumes the [installation instructions](../installation/installation-allenact.md) have already been
followed and that, to some extent, this framework's [abstractions](../getting_started/abstractions.md) are known.
The `extra_requirements` for `minigrid_plugin` and `babyai_plugin` can be installed with.
```bash
pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt; pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt
```
## The task
A `MiniGrid-Empty-Random-5x5-v0` task consists of a grid of dimensions 5x5 where an agent spawned at a random
location and orientation has to navigate to the visitable bottom right corner cell of the grid by sequences of three
possible actions (rotate left/right and move forward). A visualization of the environment with expert steps in a random
`MiniGrid-Empty-Random-5x5-v0` task looks like

The observation for the agent is a subset of the entire grid, simulating a simplified limited field of view, as
depicted by the highlighted rectangle (observed subset of the grid) around the agent (red arrow). Gray cells correspond
to walls.
## Experiment configuration file
Our complete experiment consists of:
* Training a basic actor-critic agent with memory to solve randomly sampled navigation tasks.
* Validation on a fixed set of tasks (running in parallel with training).
* A second stage where we test saved checkpoints with a larger fixed set of tasks.
The entire configuration for the experiment, including training, validation, and testing, is encapsulated in a single
class implementing the `ExperimentConfig` abstraction. For this tutorial, we will follow the config under
`projects/tutorials/minigrid_tutorial.py`.
The `ExperimentConfig` abstraction is used by the
[OnPolicyTrainer](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicytrainer) class (for training) and the
[OnPolicyInference](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicyinference) class (for validation and testing)
invoked through the entry script `main.py` that calls an orchestrating
[OnPolicyRunner](../api/allenact/algorithms/onpolicy_sync/runner.md#onpolicyrunner) class. It includes:
* A `tag` method to identify the experiment.
* A `create_model` method to instantiate actor-critic models.
* A `make_sampler_fn` method to instantiate task samplers.
* Three `{train,valid,test}_task_sampler_args` methods describing initialization parameters for task samplers used in
training, validation, and testing; including assignment of workers to devices for simulation.
* A `machine_params` method with configuration parameters that will be used for training, validation, and testing.
* A `training_pipeline` method describing a possibly multi-staged training pipeline with different types of losses,
an optimizer, and other parameters like learning rates, batch sizes, etc.
### Preliminaries
We first import everything we'll need to define our experiment.
"""
# %%
from typing import Dict, Optional, List, Any, cast
import gym
from gym_minigrid.envs import EmptyRandomEnv5x5
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvRNN
from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor
from allenact_plugins.minigrid_plugin.minigrid_tasks import (
MiniGridTaskSampler,
MiniGridTask,
)
# %%
"""
We now create the `MiniGridTutorialExperimentConfig` class which we will use to define our experiment.
For pedagogical reasons, we will add methods to this class one at a time below with a description of what
these classes do.
"""
# %%
class MiniGridTutorialExperimentConfig(ExperimentConfig):
# %%
"""An experiment is identified by a `tag`."""
# %%
@classmethod
def tag(cls) -> str:
return "MiniGridTutorial"
# %%
"""
### Sensors and Model
A readily available Sensor type for MiniGrid,
[EgocentricMiniGridSensor](../api/allenact_plugins/minigrid_plugin/minigrid_sensors.md#egocentricminigridsensor),
allows us to extract observations in a format consumable by an `ActorCriticModel` agent:
"""
# %%
SENSORS = [
EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),
]
# %%
"""
The three `view_channels` include objects, colors and states corresponding to a partial observation of the environment
as an image tensor, equivalent to that from `ImgObsWrapper` in
[MiniGrid](https://github.com/maximecb/gym-minigrid#wrappers). The
relatively large `agent_view_size` means the view will only be clipped by the environment walls in the forward and
lateral directions with respect to the agent's orientation.
We define our `ActorCriticModel` agent using a lightweight implementation with recurrent memory for MiniGrid
environments, [MiniGridSimpleConvRNN](../api/allenact_plugins/minigrid_plugin/minigrid_models.md#minigridsimpleconvrnn):
"""
# %%
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return MiniGridSimpleConvRNN(
action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())),
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
num_objects=cls.SENSORS[0].num_objects,
num_colors=cls.SENSORS[0].num_colors,
num_states=cls.SENSORS[0].num_states,
)
# %%
"""
### Task samplers
We use an available TaskSampler implementation for MiniGrid environments that allows to sample both random and
deterministic `MiniGridTasks`,
[MiniGridTaskSampler](../api/allenact_plugins/minigrid_plugin/minigrid_tasks.md#minigridtasksampler):
"""
# %%
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return MiniGridTaskSampler(**kwargs)
# %%
"""
This task sampler will during training (or validation/testing), randomly initialize new tasks for the agent to complete.
While it is not quite as important for this task type (as we test our agent in the same setting it is trained on) there
are a lot of good reasons we would like to sample tasks differently during training than during validation or testing.
One good reason, that is applicable in this tutorial, is that, during training, we would like to be able to sample tasks
forever while, during testing, we would like to sample a fixed number of tasks (as otherwise we would never finish
testing!). In `allenact` this is made possible by defining different arguments for the task sampler:
"""
# %%
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="train")
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="valid")
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="test")
# %%
"""
where, for convenience, we have defined a `_get_sampler_args` method:
"""
# %%
def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 20 + 20 * (mode == "test") # 20 tasks for valid, 40 for test
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
max_tasks=max_tasks, # see above
env_class=self.make_env, # builder for third-party environment (defined below)
sensors=self.SENSORS, # sensors used to return observations to the agent
env_info=dict(), # parameters for environment builder (none for now)
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
)
@staticmethod
def make_env(*args, **kwargs):
return EmptyRandomEnv5x5()
# %%
"""
Note that the `env_class` argument to the Task Sampler is the one determining which task type we are going to train the
model for (in this case, `MiniGrid-Empty-Random-5x5-v0` from
[gym-minigrid](https://github.com/maximecb/gym-minigrid#empty-environment))
. The sparse reward is
[given by the environment](https://github.com/maximecb/gym-minigrid/blob/6e22a44dc67414b647063692258a4f95ce789161/gym_minigrid/minigrid.py#L819)
, and the maximum task length is 100. For training, we opt for a default random sampling, whereas for validation and
test we define fixed sets of randomly sampled tasks without needing to explicitly define a dataset.
In this toy example, the maximum number of different tasks is 32. For validation we sample 320 tasks using 16 samplers,
or 640 for testing, so we can be fairly sure that all possible tasks are visited at least once during evaluation.
### Machine parameters
Given the simplicity of the task and model, we can quickly train the model on the CPU:
"""
# %%
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
return {
"nprocesses": 128 if mode == "train" else 16,
"devices": [],
}
# %%
"""
We allocate a larger number of samplers for training (128) than for validation or testing (16), and we default to CPU
usage by returning an empty list of `devices`.
### Training pipeline
The last definition required before starting to train is a training pipeline. In this case, we just use a single PPO
stage with linearly decaying learning rate:
"""
# %%
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
ppo_steps = int(150000)
return TrainingPipeline(
named_losses=dict(ppo_loss=PPO(**PPOConfig)), # type:ignore
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
num_mini_batch=4,
update_repeats=3,
max_grad_norm=0.5,
num_steps=16,
gamma=0.99,
use_gae=True,
gae_lambda=0.95,
advance_scene_rollout_period=None,
save_interval=10000,
metric_accumulate_interval=1,
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore
),
)
# %%
"""
You can see that we use a `Builder` class to postpone the construction of some of the elements, like the optimizer,
for which the model weights need to be known.
## Training and validation
We have a complete implementation of this experiment's configuration class in `projects/tutorials/minigrid_tutorial.py`.
To start training from scratch, we just need to invoke
```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o /PATH/TO/minigrid_output -s 12345
```
from the `allenact` root directory.
* With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file
will be found in the `projects/tutorials` directory.
* With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers).
* With `-o minigrid_output` we set the output folder into which results and logs will be saved.
* With `-s 12345` we set the random seed.
If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/minigrid_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).
After 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder.
The training curves should look similar to:

If everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4.
(For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the
not-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example
with a different random seed). The validation curves should look similar to:

## Testing
The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a particular checkpoint, we need to pass the `--eval` flag and specify the checkpoint with the
`--checkpoint CHECKPOINT_PATH` option:
```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/minigrid_output \
-s 12345 \
--eval \
--checkpoint /PATH/TO/minigrid_output/checkpoints/MiniGridTutorial/YOUR_START_DATE/exp_MiniGridTutorial__stage_00__steps_000000151552.pt
```
Again, if everything went well, the `test` success rate should converge to 1 and the mean episode length to a value
below 4. Detailed results are saved under a `metrics` subfolder in the output folder.
The test curves should look similar to:

"""
================================================
FILE: projects/tutorials/minigrid_tutorial_conds.py
================================================
from typing import Dict, Optional, List, Any, cast, Callable, Union, Tuple
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from gym_minigrid.envs import EmptyRandomEnv5x5
from gym_minigrid.minigrid import MiniGridEnv
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig
from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel, DistributionType
from allenact.base_abstractions.distributions import (
CategoricalDistr,
ConditionalDistr,
SequentialDistr,
)
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.misc import ActorCriticOutput, Memory, RLStepResult
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.embodiedai.models.basic_models import RNNStateEncoder
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvBase
from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor
from allenact_plugins.minigrid_plugin.minigrid_tasks import (
MiniGridTaskSampler,
MiniGridTask,
)
class ConditionedLinearActorCriticHead(nn.Module):
def __init__(
self, input_size: int, master_actions: int = 2, subpolicy_actions: int = 2
):
super().__init__()
self.input_size = input_size
self.master_and_critic = nn.Linear(input_size, master_actions + 1)
self.embed_higher = nn.Embedding(num_embeddings=2, embedding_dim=input_size)
self.actor = nn.Linear(2 * input_size, subpolicy_actions)
nn.init.orthogonal_(self.master_and_critic.weight)
nn.init.constant_(self.master_and_critic.bias, 0)
nn.init.orthogonal_(self.actor.weight)
nn.init.constant_(self.actor.bias, 0)
def lower_policy(self, *args, **kwargs):
assert "higher" in kwargs
assert "state_embedding" in kwargs
emb = self.embed_higher(kwargs["higher"])
logits = self.actor(torch.cat([emb, kwargs["state_embedding"]], dim=-1))
return CategoricalDistr(logits=logits)
def forward(self, x):
out = self.master_and_critic(x)
master_logits = out[..., :-1]
values = out[..., -1:]
# noinspection PyArgumentList
cond1 = ConditionalDistr(
distr_conditioned_on_input_fn_or_instance=CategoricalDistr(
logits=master_logits
),
action_group_name="higher",
)
cond2 = ConditionalDistr(
distr_conditioned_on_input_fn_or_instance=lambda *args, **kwargs: ConditionedLinearActorCriticHead.lower_policy(
self, *args, **kwargs
),
action_group_name="lower",
state_embedding=x,
)
return (
SequentialDistr(cond1, cond2),
values.view(*values.shape[:2], -1), # [steps, samplers, flattened]
)
class ConditionedLinearActorCritic(ActorCriticModel[SequentialDistr]):
def __init__(
self,
input_uuid: str,
action_space: gym.spaces.Dict,
observation_space: gym.spaces.Dict,
):
super().__init__(action_space=action_space, observation_space=observation_space)
assert (
input_uuid in observation_space.spaces
), "ConditionedLinearActorCritic expects only a single observational input."
self.input_uuid = input_uuid
box_space: gym.spaces.Box = observation_space[self.input_uuid]
assert isinstance(box_space, gym.spaces.Box), (
"ConditionedLinearActorCritic requires that"
"observation space corresponding to the input uuid is a Box space."
)
assert len(box_space.shape) == 1
self.in_dim = box_space.shape[0]
self.head = ConditionedLinearActorCriticHead(
input_size=self.in_dim,
master_actions=action_space["higher"].n,
subpolicy_actions=action_space["lower"].n,
)
# noinspection PyMethodMayBeStatic
def _recurrent_memory_specification(self):
return None
def forward(self, observations, memory, prev_actions, masks):
dists, values = self.head(observations[self.input_uuid])
# noinspection PyArgumentList
return (
ActorCriticOutput(
distributions=dists,
values=values,
extras={},
),
None,
)
class ConditionedRNNActorCritic(ActorCriticModel[SequentialDistr]):
def __init__(
self,
input_uuid: str,
action_space: gym.spaces.Dict,
observation_space: gym.spaces.Dict,
hidden_size: int = 128,
num_layers: int = 1,
rnn_type: str = "GRU",
head_type: Callable[
..., ActorCriticModel[SequentialDistr]
] = ConditionedLinearActorCritic,
):
super().__init__(action_space=action_space, observation_space=observation_space)
self.hidden_size = hidden_size
self.rnn_type = rnn_type
assert (
input_uuid in observation_space.spaces
), "LinearActorCritic expects only a single observational input."
self.input_uuid = input_uuid
box_space: gym.spaces.Box = observation_space[self.input_uuid]
assert isinstance(box_space, gym.spaces.Box), (
"RNNActorCritic requires that"
"observation space corresponding to the input uuid is a Box space."
)
assert len(box_space.shape) == 1
self.in_dim = box_space.shape[0]
self.state_encoder = RNNStateEncoder(
input_size=self.in_dim,
hidden_size=hidden_size,
num_layers=num_layers,
rnn_type=rnn_type,
trainable_masked_hidden_state=True,
)
self.head_uuid = "{}_{}".format("rnn", input_uuid)
self.ac_nonrecurrent_head: ActorCriticModel[SequentialDistr] = head_type(
input_uuid=self.head_uuid,
action_space=action_space,
observation_space=gym.spaces.Dict(
{
self.head_uuid: gym.spaces.Box(
low=np.float32(0.0), high=np.float32(1.0), shape=(hidden_size,)
)
}
),
)
self.memory_key = "rnn"
@property
def recurrent_hidden_state_size(self) -> int:
return self.hidden_size
@property
def num_recurrent_layers(self) -> int:
return self.state_encoder.num_recurrent_layers
def _recurrent_memory_specification(self):
return {
self.memory_key: (
(
("layer", self.num_recurrent_layers),
("sampler", None),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
}
def forward( # type:ignore
self,
observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]],
memory: Memory,
prev_actions: torch.Tensor,
masks: torch.FloatTensor,
) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
rnn_out, mem_return = self.state_encoder(
x=observations[self.input_uuid],
hidden_states=memory.tensor(self.memory_key),
masks=masks,
)
# noinspection PyCallingNonCallable
out, _ = self.ac_nonrecurrent_head(
observations={self.head_uuid: rnn_out},
memory=None,
prev_actions=prev_actions,
masks=masks,
)
# noinspection PyArgumentList
return (
out,
memory.set_tensor(self.memory_key, mem_return),
)
class ConditionedMiniGridSimpleConvRNN(MiniGridSimpleConvBase):
def __init__(
self,
action_space: gym.spaces.Dict,
observation_space: gym.spaces.Dict,
num_objects: int,
num_colors: int,
num_states: int,
object_embedding_dim: int = 8,
hidden_size=512,
num_layers=1,
rnn_type="GRU",
head_type: Callable[
..., ActorCriticModel[SequentialDistr]
] = ConditionedLinearActorCritic,
**kwargs,
):
super().__init__(**prepare_locals_for_super(locals()))
self._hidden_size = hidden_size
agent_view_x, agent_view_y, view_channels = observation_space[
"minigrid_ego_image"
].shape
self.actor_critic = ConditionedRNNActorCritic(
input_uuid=self.ac_key,
action_space=action_space,
observation_space=gym.spaces.Dict(
{
self.ac_key: gym.spaces.Box(
low=np.float32(-1.0),
high=np.float32(1.0),
shape=(
self.object_embedding_dim
* agent_view_x
* agent_view_y
* view_channels,
),
)
}
),
hidden_size=hidden_size,
num_layers=num_layers,
rnn_type=rnn_type,
head_type=head_type,
)
self.memory_key = "rnn"
self.train()
@property
def num_recurrent_layers(self):
return self.actor_critic.num_recurrent_layers
@property
def recurrent_hidden_state_size(self):
return self._hidden_size
def _recurrent_memory_specification(self):
return {
self.memory_key: (
(
("layer", self.num_recurrent_layers),
("sampler", None),
("hidden", self.recurrent_hidden_state_size),
),
torch.float32,
)
}
class ConditionedMiniGridTask(MiniGridTask):
_ACTION_NAMES = ("left", "right", "forward", "pickup")
_ACTION_IND_TO_MINIGRID_IND = tuple(
MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES
)
@property
def action_space(self) -> gym.spaces.Dict:
return gym.spaces.Dict(
higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
)
def _step(self, action: Dict[str, int]) -> RLStepResult:
assert len(action) == 2, "got action={}".format(action)
minigrid_obs, reward, self._minigrid_done, info = self.env.step(
action=(
self._ACTION_IND_TO_MINIGRID_IND[action["lower"] + 2 * action["higher"]]
)
)
# self.env.render()
return RLStepResult(
observation=self.get_observations(minigrid_output_obs=minigrid_obs),
reward=reward,
done=self.is_done(),
info=info,
)
def query_expert(self, **kwargs) -> Tuple[int, bool]:
if kwargs["expert_sensor_group_name"] == "higher":
if self._minigrid_done:
raise ValueError("Episode is completed, but expert is still queried.")
# return 0, False
self.cached_expert = super().query_expert(**kwargs)
if self.cached_expert[1]:
return self.cached_expert[0] // 2, True
else:
return 0, False
else:
assert hasattr(self, "cached_expert")
if self.cached_expert[1]:
res = (self.cached_expert[0] % 2, True)
else:
res = (0, False)
del self.cached_expert
return res
class MiniGridTutorialExperimentConfig(ExperimentConfig):
@classmethod
def tag(cls) -> str:
return "MiniGridTutorial"
SENSORS = [
EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),
ExpertActionSensor(
action_space=gym.spaces.Dict(
higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
)
),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return ConditionedMiniGridSimpleConvRNN(
action_space=gym.spaces.Dict(
higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
),
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
num_objects=cls.SENSORS[0].num_objects,
num_colors=cls.SENSORS[0].num_colors,
num_states=cls.SENSORS[0].num_states,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return MiniGridTaskSampler(**kwargs)
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="train")
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="valid")
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="test")
def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 20 + 20 * (
mode == "test"
) # 20 tasks for valid, 40 for test (per sampler)
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
max_tasks=max_tasks, # see above
env_class=self.make_env, # builder for third-party environment (defined below)
sensors=self.SENSORS, # sensors used to return observations to the agent
env_info=dict(), # parameters for environment builder (none for now)
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
task_class=ConditionedMiniGridTask,
)
@staticmethod
def make_env(*args, **kwargs):
return EmptyRandomEnv5x5()
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
return {
"nprocesses": 128 if mode == "train" else 16,
"devices": [],
}
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
ppo_steps = int(150000)
return TrainingPipeline(
named_losses=dict(
imitation_loss=Imitation(
cls.SENSORS[1]
), # 0 is Minigrid, 1 is ExpertActionSensor
ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"),
), # type:ignore
pipeline_stages=[
PipelineStage(
teacher_forcing=LinearDecay(
startp=1.0,
endp=0.0,
steps=ppo_steps // 2,
),
loss_names=["imitation_loss", "ppo_loss"],
max_stage_steps=ppo_steps,
)
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
num_mini_batch=4,
update_repeats=3,
max_grad_norm=0.5,
num_steps=16,
gamma=0.99,
use_gae=True,
gae_lambda=0.95,
advance_scene_rollout_period=None,
save_interval=10000,
metric_accumulate_interval=1,
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore
),
)
================================================
FILE: projects/tutorials/navtopartner_robothor_rgb_ppo.py
================================================
from math import ceil
from typing import Dict, Any, List, Optional
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import (
Builder,
PipelineStage,
TrainingPipeline,
LinearDecay,
)
from allenact.utils.multi_agent_viz_utils import MultiTrajectoryViz
from allenact.utils.viz_utils import VizSuite, AgentViewViz
from allenact_plugins.robothor_plugin.robothor_models import (
NavToPartnerActorCriticSimpleConvRNN,
)
from allenact_plugins.robothor_plugin.robothor_sensors import RGBSensorMultiRoboThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
NavToPartnerTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import NavToPartnerTask
from allenact_plugins.robothor_plugin.robothor_viz import ThorMultiViz
class NavToPartnerRoboThorRGBPPOExperimentConfig(ExperimentConfig):
"""A Multi-Agent Navigation experiment configuration in RoboThor."""
# Task Parameters
MAX_STEPS = 500
REWARD_CONFIG = {
"step_penalty": -0.01,
"max_success_distance": 0.75,
"success_reward": 5.0,
}
# Simulator Parameters
CAMERA_WIDTH = 300
CAMERA_HEIGHT = 300
SCREEN_SIZE = 224
# Training Engine Parameters
ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
NUM_PROCESSES = 20
TRAINING_GPUS: List[int] = [0]
VALIDATION_GPUS: List[int] = [0]
TESTING_GPUS: List[int] = [0]
SENSORS = [
RGBSensorMultiRoboThor(
agent_count=2,
height=SCREEN_SIZE,
width=SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb",
),
]
OBSERVATIONS = [
"rgb",
]
ENV_ARGS = dict(
width=CAMERA_WIDTH,
height=CAMERA_HEIGHT,
rotateStepDegrees=30.0,
visibilityDistance=1.0,
gridSize=0.25,
agentCount=2,
)
@classmethod
def tag(cls):
return "NavToPartnerRobothorRGBPPO"
@classmethod
def training_pipeline(cls, **kwargs):
ppo_steps = int(1000000)
lr = 3e-4
num_mini_batch = 1
update_repeats = 3
num_steps = 30
save_interval = 200000
log_interval = 1
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={"ppo_loss": PPO(**PPOConfig)},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
def split_num_processes(self, ndevices):
assert self.NUM_PROCESSES >= ndevices, "NUM_PROCESSES {} < ndevices {}".format(
self.NUM_PROCESSES, ndevices
)
res = [0] * ndevices
for it in range(self.NUM_PROCESSES):
res[it % ndevices] += 1
return res
viz: Optional[VizSuite] = None
def get_viz(self, mode):
if self.viz is not None:
return self.viz
self.viz = VizSuite(
mode=mode,
# Basic 2D trajectory visualizer (task output source):
base_trajectory=MultiTrajectoryViz(), # plt_colormaps=["cool", "cool"]),
# Egocentric view visualizer (vector task source):
egeocentric=AgentViewViz(max_video_length=100, max_episodes_in_group=1),
# Specialized 2D trajectory visualizer (task output source):
thor_trajectory=ThorMultiViz(
figsize=(16, 8),
viz_rows_cols=(448, 448),
scenes=("FloorPlan_Train{}_{}", 1, 1, 1, 1),
),
)
return self.viz
def machine_params(self, mode="train", **kwargs):
visualizer = None
if mode == "train":
devices = (
["cpu"] if not torch.cuda.is_available() else list(self.TRAINING_GPUS)
)
nprocesses = (
4
if not torch.cuda.is_available()
else self.split_num_processes(len(devices))
)
elif mode == "valid":
nprocesses = 0
devices = ["cpu"] if not torch.cuda.is_available() else self.VALIDATION_GPUS
elif mode == "test":
nprocesses = 1
devices = ["cpu"] if not torch.cuda.is_available() else self.TESTING_GPUS
visualizer = self.get_viz(mode=mode)
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
return {
"nprocesses": nprocesses,
"devices": devices,
"visualizer": visualizer,
}
# TODO Define Model
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return NavToPartnerActorCriticSimpleConvRNN(
action_space=gym.spaces.Tuple(
[
gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),
gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),
]
),
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
hidden_size=512,
)
# Define Task Sampler
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return NavToPartnerTaskSampler(**kwargs)
# Utility Functions for distributing scenes between GPUs
@staticmethod
def _partition_inds(n: int, num_parts: int):
return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
np.int32
)
def _get_sampler_args_for_scene_split(
self,
scenes: List[str],
process_ind: int,
total_processes: int,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
if total_processes > len(scenes): # oversample some scenes -> bias
if total_processes % len(scenes) != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisible by the number of scenes"
)
scenes = scenes * int(ceil(total_processes / len(scenes)))
scenes = scenes[: total_processes * (len(scenes) // total_processes)]
else:
if len(scenes) % total_processes != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisor of the number of scenes"
)
inds = self._partition_inds(len(scenes), total_processes)
return {
"scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Tuple(
[
gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),
gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),
]
),
"seed": seeds[process_ind] if seeds is not None else None,
"deterministic_cudnn": deterministic_cudnn,
"rewards_config": self.REWARD_CONFIG,
}
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
scenes = ["FloorPlan_Train1_1"]
res = self._get_sampler_args_for_scene_split(
scenes,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["env_args"] = {
**self.ENV_ARGS,
"x_display": (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
),
}
return res
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
scenes = ["FloorPlan_Train1_1"]
res = self._get_sampler_args_for_scene_split(
scenes,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["env_args"] = {
**self.ENV_ARGS,
"x_display": (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
),
}
res["max_tasks"] = 20
return res
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
scenes = ["FloorPlan_Train1_1"]
res = self._get_sampler_args_for_scene_split(
scenes,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["env_args"] = {
**self.ENV_ARGS,
"x_display": (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
),
}
res["max_tasks"] = 4
return res
================================================
FILE: projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py
================================================
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
Builder,
PipelineStage,
TrainingPipeline,
LinearDecay,
)
from allenact.base_abstractions.sensor import ExpertActionSensor
from projects.tutorials.object_nav_ithor_ppo_one_object import (
ObjectNavThorPPOExperimentConfig,
ObjectNaviThorGridTask,
)
class ObjectNavThorDaggerThenPPOExperimentConfig(ObjectNavThorPPOExperimentConfig):
"""A simple object navigation experiment in THOR.
Training with DAgger and then PPO.
"""
SENSORS = ObjectNavThorPPOExperimentConfig.SENSORS + [
ExpertActionSensor(
action_space=len(ObjectNaviThorGridTask.class_action_names()),
),
]
@classmethod
def tag(cls):
return "ObjectNavThorDaggerThenPPO"
@classmethod
def training_pipeline(cls, **kwargs):
dagger_steos = int(1e4)
ppo_steps = int(1e6)
lr = 2.5e-4
num_mini_batch = 2 if not torch.cuda.is_available() else 6
update_repeats = 4
num_steps = 128
metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks
save_interval = 10000
gamma = 0.99
use_gae = True
gae_lambda = 1.0
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={
"ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
"imitation_loss": Imitation(), # We add an imitation loss.
},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(
loss_names=["imitation_loss"],
teacher_forcing=LinearDecay(
startp=1.0,
endp=0.0,
steps=dagger_steos,
),
max_stage_steps=dagger_steos,
),
PipelineStage(
loss_names=["ppo_loss"],
max_stage_steps=ppo_steps,
),
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
================================================
FILE: projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.py
================================================
from projects.tutorials.object_nav_ithor_dagger_then_ppo_one_object import (
ObjectNavThorDaggerThenPPOExperimentConfig,
)
from allenact.utils.viz_utils import (
VizSuite,
TrajectoryViz,
AgentViewViz,
ActorViz,
TensorViz1D,
)
from allenact_plugins.ithor_plugin.ithor_viz import ThorViz
class ObjectNavThorDaggerThenPPOVizExperimentConfig(
ObjectNavThorDaggerThenPPOExperimentConfig
):
"""A simple object navigation experiment in THOR.
Training with DAgger and then PPO + using viz for test.
"""
TEST_SAMPLES_IN_SCENE = 4
@classmethod
def tag(cls):
return "ObjectNavThorDaggerThenPPOViz"
viz = None
def get_viz(self, mode):
if self.viz is not None:
return self.viz
self.viz = VizSuite(
mode=mode,
base_trajectory=TrajectoryViz(
path_to_target_location=None,
path_to_rot_degrees=("rotation",),
),
egeocentric=AgentViewViz(max_video_length=100),
action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),
taken_action_logprobs=TensorViz1D(),
episode_mask=TensorViz1D(rollout_source=("masks",)),
thor_trajectory=ThorViz(
path_to_target_location=None,
figsize=(8, 8),
viz_rows_cols=(448, 448),
),
)
return self.viz
def machine_params(self, mode="train", **kwargs):
params = super().machine_params(mode, **kwargs)
if mode == "test":
params.set_visualizer(self.get_viz(mode))
return params
================================================
FILE: projects/tutorials/object_nav_ithor_ppo_one_object.py
================================================
from math import ceil
from typing import Dict, Any, List, Optional
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import (
Builder,
PipelineStage,
TrainingPipeline,
LinearDecay,
)
from allenact_plugins.ithor_plugin.ithor_sensors import (
RGBSensorThor,
GoalObjectTypeThorSensor,
)
from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler
from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
from allenact_plugins.navigation_plugin.objectnav.models import ObjectNavActorCritic
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
"""A simple object navigation experiment in THOR.
Training with PPO.
"""
# A simple setting, train/valid/test are all the same single scene
# and we're looking for a single object
OBJECT_TYPES = ["Tomato"]
TRAIN_SCENES = ["FloorPlan1_physics"]
VALID_SCENES = ["FloorPlan1_physics"]
TEST_SCENES = ["FloorPlan1_physics"]
# Setting up sensors and basic environment details
SCREEN_SIZE = 224
SENSORS = [
RGBSensorThor(
height=SCREEN_SIZE,
width=SCREEN_SIZE,
use_resnet_normalization=True,
),
GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),
]
ENV_ARGS = {
"player_screen_height": SCREEN_SIZE,
"player_screen_width": SCREEN_SIZE,
"quality": "Very Low",
}
MAX_STEPS = 128
ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
VALID_SAMPLES_IN_SCENE = 10
TEST_SAMPLES_IN_SCENE = 100
@classmethod
def tag(cls):
return "ObjectNavThorPPO"
@classmethod
def training_pipeline(cls, **kwargs):
ppo_steps = int(1e6)
lr = 2.5e-4
num_mini_batch = 2 if not torch.cuda.is_available() else 6
update_repeats = 4
num_steps = 128
metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks
save_interval = 10000
gamma = 0.99
use_gae = True
gae_lambda = 1.0
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=metric_accumulate_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={
"ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(
loss_names=["ppo_loss"],
max_stage_steps=ppo_steps,
),
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
@classmethod
def machine_params(cls, mode="train", **kwargs):
num_gpus = torch.cuda.device_count()
has_gpu = num_gpus != 0
if mode == "train":
nprocesses = 20 if has_gpu else 4
gpu_ids = [0] if has_gpu else []
elif mode == "valid":
nprocesses = 1
gpu_ids = [1 % num_gpus] if has_gpu else []
elif mode == "test":
nprocesses = 1
gpu_ids = [0] if has_gpu else []
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
return MachineParams(
nprocesses=nprocesses,
devices=gpu_ids,
)
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return ObjectNavActorCritic(
action_space=gym.spaces.Discrete(
len(ObjectNaviThorGridTask.class_action_names())
),
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
rgb_uuid=cls.SENSORS[0].uuid,
depth_uuid=None,
goal_sensor_uuid="goal_object_type_ind",
hidden_size=512,
object_type_embedding_dim=8,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return ObjectNavTaskSampler(**kwargs)
@staticmethod
def _partition_inds(n: int, num_parts: int):
return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
np.int32
)
def _get_sampler_args_for_scene_split(
self,
scenes: List[str],
process_ind: int,
total_processes: int,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
if total_processes > len(scenes): # oversample some scenes -> bias
if total_processes % len(scenes) != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisible by the number of scenes"
)
scenes = scenes * int(ceil(total_processes / len(scenes)))
scenes = scenes[: total_processes * (len(scenes) // total_processes)]
else:
if len(scenes) % total_processes != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisor of the number of scenes"
)
inds = self._partition_inds(len(scenes), total_processes)
return {
"scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
"object_types": self.OBJECT_TYPES,
"env_args": self.ENV_ARGS,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(
len(ObjectNaviThorGridTask.class_action_names())
),
"seed": seeds[process_ind] if seeds is not None else None,
"deterministic_cudnn": deterministic_cudnn,
}
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
self.TRAIN_SCENES,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_period"] = "manual"
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
return res
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
self.VALID_SCENES,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_period"] = self.VALID_SAMPLES_IN_SCENE
res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"])
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
return res
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
self.TEST_SCENES,
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_period"] = self.TEST_SAMPLES_IN_SCENE
res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"])
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
return res
================================================
FILE: projects/tutorials/pointnav_habitat_rgb_ddppo.py
================================================
import os
from typing import Dict, Any, List, Optional, Sequence
import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.utils.experiment_utils import (
Builder,
PipelineStage,
TrainingPipeline,
LinearDecay,
evenly_distribute_count_into_bins,
)
from allenact_plugins.habitat_plugin.habitat_constants import (
HABITAT_DATASETS_DIR,
HABITAT_CONFIGS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_sensors import (
RGBSensorHabitat,
TargetCoordinatesSensorHabitat,
)
from allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler
from allenact_plugins.habitat_plugin.habitat_utils import (
construct_env_configs,
get_habitat_config,
)
from allenact_plugins.navigation_plugin.objectnav.models import (
ResnetTensorNavActorCritic,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
class PointNavHabitatRGBPPOTutorialExperimentConfig(ExperimentConfig):
"""A Point Navigation experiment configuration in Habitat."""
# Task Parameters
MAX_STEPS = 500
REWARD_CONFIG = {
"step_penalty": -0.01,
"goal_success_reward": 10.0,
"failed_stop_reward": 0.0,
"shaping_weight": 1.0,
}
DISTANCE_TO_GOAL = 0.2
# Simulator Parameters
CAMERA_WIDTH = 640
CAMERA_HEIGHT = 480
SCREEN_SIZE = 224
# Training Engine Parameters
ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
NUM_PROCESSES = max(5 * torch.cuda.device_count() - 1, 4)
TRAINING_GPUS = list(range(torch.cuda.device_count()))
VALIDATION_GPUS = [torch.cuda.device_count() - 1]
TESTING_GPUS = [torch.cuda.device_count() - 1]
task_data_dir_template = os.path.join(
HABITAT_DATASETS_DIR, "pointnav/gibson/v1/{}/{}.json.gz"
)
TRAIN_SCENES = task_data_dir_template.format(*(["train"] * 2))
VALID_SCENES = task_data_dir_template.format(*(["val"] * 2))
TEST_SCENES = task_data_dir_template.format(*(["test"] * 2))
CONFIG = get_habitat_config(
os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav_gibson.yaml")
)
CONFIG.defrost()
CONFIG.NUM_PROCESSES = NUM_PROCESSES
CONFIG.SIMULATOR_GPU_IDS = TRAINING_GPUS
CONFIG.DATASET.SCENES_DIR = "habitat/habitat-api/data/scene_datasets/"
CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = ["*"]
CONFIG.DATASET.DATA_PATH = TRAIN_SCENES
CONFIG.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"]
CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH
CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT
CONFIG.SIMULATOR.TURN_ANGLE = 30
CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25
CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS
CONFIG.TASK.TYPE = "Nav-v0"
CONFIG.TASK.SUCCESS_DISTANCE = DISTANCE_TO_GOAL
CONFIG.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR"
CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2
CONFIG.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass"
CONFIG.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"]
CONFIG.TASK.SPL.TYPE = "SPL"
CONFIG.TASK.SPL.SUCCESS_DISTANCE = DISTANCE_TO_GOAL
CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = DISTANCE_TO_GOAL
CONFIG.MODE = "train"
SENSORS = [
RGBSensorHabitat(
height=SCREEN_SIZE,
width=SCREEN_SIZE,
use_resnet_normalization=True,
),
TargetCoordinatesSensorHabitat(coordinate_dims=2),
]
PREPROCESSORS = [
Builder(
ResNetPreprocessor,
{
"input_height": SCREEN_SIZE,
"input_width": SCREEN_SIZE,
"output_width": 7,
"output_height": 7,
"output_dims": 512,
"pool": False,
"torchvision_resnet_model": models.resnet18,
"input_uuids": ["rgb_lowres"],
"output_uuid": "rgb_resnet",
},
),
]
OBSERVATIONS = [
"rgb_resnet",
"target_coordinates_ind",
]
TRAIN_CONFIGS = construct_env_configs(CONFIG)
@classmethod
def tag(cls):
return "PointNavHabitatRGBPPO"
@classmethod
def training_pipeline(cls, **kwargs):
ppo_steps = int(250000000)
lr = 3e-4
num_mini_batch = 1
update_repeats = 3
num_steps = 30
save_interval = 5000000
log_interval = 10000
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={"ppo_loss": PPO(**PPOConfig)},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
def machine_params(self, mode="train", **kwargs):
if mode == "train":
workers_per_device = 1
gpu_ids = (
[]
if not torch.cuda.is_available()
else self.TRAINING_GPUS * workers_per_device
)
nprocesses = (
1
if not torch.cuda.is_available()
else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
)
elif mode == "valid":
nprocesses = 1
gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS
elif mode == "test":
nprocesses = 1
gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
sensor_preprocessor_graph = (
SensorPreprocessorGraph(
source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
preprocessors=self.PREPROCESSORS,
)
if mode == "train"
or (
(isinstance(nprocesses, int) and nprocesses > 0)
or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
)
else None
)
return MachineParams(
nprocesses=nprocesses,
devices=gpu_ids,
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
# Define Model
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return ResnetTensorNavActorCritic(
action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
goal_sensor_uuid="target_coordinates_ind",
rgb_resnet_preprocessor_uuid="rgb_resnet",
hidden_size=512,
goal_dims=32,
)
# Define Task Sampler
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return PointNavTaskSampler(**kwargs)
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
config = self.TRAIN_CONFIGS[process_ind]
return {
"env_config": config,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
"distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore
}
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
config = self.CONFIG.clone()
config.defrost()
config.DATASET.DATA_PATH = self.VALID_SCENES
config.MODE = "validate"
config.freeze()
return {
"env_config": config,
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
"distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore
}
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
raise NotImplementedError("Testing not implemented for this tutorial.")
================================================
FILE: projects/tutorials/pointnav_ithor_rgb_ddppo.py
================================================
import glob
import os
from math import ceil
from typing import Dict, Any, List, Optional, Sequence
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.utils.experiment_utils import (
Builder,
PipelineStage,
TrainingPipeline,
LinearDecay,
evenly_distribute_count_into_bins,
)
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.navigation_plugin.objectnav.models import (
ResnetTensorNavActorCritic,
)
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
PointNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
class PointNaviThorRGBPPOExperimentConfig(ExperimentConfig):
"""A Point Navigation experiment configuration in iTHOR."""
# Task Parameters
MAX_STEPS = 500
REWARD_CONFIG = {
"step_penalty": -0.01,
"goal_success_reward": 10.0,
"failed_stop_reward": 0.0,
"shaping_weight": 1.0,
}
# Simulator Parameters
CAMERA_WIDTH = 640
CAMERA_HEIGHT = 480
SCREEN_SIZE = 224
# Training Engine Parameters
ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
NUM_PROCESSES = 60
TRAINING_GPUS = list(range(torch.cuda.device_count()))
VALIDATION_GPUS = [torch.cuda.device_count() - 1]
TESTING_GPUS = [torch.cuda.device_count() - 1]
# Dataset Parameters
TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train")
VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val")
SENSORS = [
RGBSensorThor(
height=SCREEN_SIZE,
width=SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GPSCompassSensorRoboThor(),
]
PREPROCESSORS = [
Builder(
ResNetPreprocessor,
{
"input_height": SCREEN_SIZE,
"input_width": SCREEN_SIZE,
"output_width": 7,
"output_height": 7,
"output_dims": 512,
"pool": False,
"torchvision_resnet_model": models.resnet18,
"input_uuids": ["rgb_lowres"],
"output_uuid": "rgb_resnet",
},
),
]
OBSERVATIONS = [
"rgb_resnet",
"target_coordinates_ind",
]
ENV_ARGS = dict(
width=CAMERA_WIDTH,
height=CAMERA_HEIGHT,
rotateStepDegrees=30.0,
visibilityDistance=1.0,
gridSize=0.25,
)
@classmethod
def tag(cls):
return "PointNavithorRGBPPO"
@classmethod
def training_pipeline(cls, **kwargs):
ppo_steps = int(250000000)
lr = 3e-4
num_mini_batch = 1
update_repeats = 3
num_steps = 30
save_interval = 5000000
log_interval = 10000
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={"ppo_loss": PPO(**PPOConfig)},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
def machine_params(self, mode="train", **kwargs):
sampler_devices: Sequence[int] = []
if mode == "train":
workers_per_device = 1
gpu_ids = (
[]
if not torch.cuda.is_available()
else self.TRAINING_GPUS * workers_per_device
)
nprocesses = (
1
if not torch.cuda.is_available()
else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
)
sampler_devices = self.TRAINING_GPUS
elif mode == "valid":
nprocesses = 1
gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS
elif mode == "test":
nprocesses = 1
gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
sensor_preprocessor_graph = (
SensorPreprocessorGraph(
source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
preprocessors=self.PREPROCESSORS,
)
if mode == "train"
or (
(isinstance(nprocesses, int) and nprocesses > 0)
or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
)
else None
)
return MachineParams(
nprocesses=nprocesses,
devices=gpu_ids,
sampler_devices=(
sampler_devices if mode == "train" else gpu_ids
), # ignored with > 1 gpu_ids
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
# Define Model
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return ResnetTensorNavActorCritic(
action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
goal_sensor_uuid="target_coordinates_ind",
rgb_resnet_preprocessor_uuid="rgb_resnet",
hidden_size=512,
goal_dims=32,
)
# Define Task Sampler
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return PointNavDatasetTaskSampler(**kwargs)
# Utility Functions for distributing scenes between GPUs
@staticmethod
def _partition_inds(n: int, num_parts: int):
return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
np.int32
)
def _get_sampler_args_for_scene_split(
self,
scenes_dir: str,
process_ind: int,
total_processes: int,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
path = os.path.join(scenes_dir, "*.json.gz")
scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
if len(scenes) == 0:
raise RuntimeError(
(
"Could find no scene dataset information in directory {}."
" Are you sure you've downloaded them? "
" If not, see https://allenact.org/installation/download-datasets/ information"
" on how this can be done."
).format(scenes_dir)
)
if total_processes > len(scenes): # oversample some scenes -> bias
if total_processes % len(scenes) != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisible by the number of scenes"
)
scenes = scenes * int(ceil(total_processes / len(scenes)))
scenes = scenes[: total_processes * (len(scenes) // total_processes)]
else:
if len(scenes) % total_processes != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisor of the number of scenes"
)
inds = self._partition_inds(len(scenes), total_processes)
return {
"scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
"seed": seeds[process_ind] if seeds is not None else None,
"deterministic_cudnn": deterministic_cudnn,
"rewards_config": self.REWARD_CONFIG,
}
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.TRAIN_DATASET_DIR
res["loop_dataset"] = True
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
res["allow_flipping"] = True
return res
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.VAL_DATASET_DIR, "episodes"),
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.VAL_DATASET_DIR
res["loop_dataset"] = False
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
return res
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.VAL_DATASET_DIR, "episodes"),
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.VAL_DATASET_DIR
res["loop_dataset"] = False
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
return res
================================================
FILE: projects/tutorials/running_inference_tutorial.py
================================================
# literate: tutorials/running-inference-on-a-pretrained-model.md
# %%
"""# Tutorial: Inference with a pre-trained model."""
# %%
"""
In this tutorial we will run inference on a pre-trained model for the PointNav task
in the RoboTHOR environment. In this task the agent is tasked with going to a specific location
within a realistic 3D environment.
For information on how to train a PointNav Model see [this tutorial](training-a-pointnav-model.md)
We will need to [install the full AllenAct library](../installation/installation-allenact.md#full-library),
the `robothor_plugin` requirements via
```bash
pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt
```
and [download the
RoboTHOR Pointnav dataset](../installation/download-datasets.md) before we get started.
For this tutorial we will download the weights of a model trained on the debug dataset.
This can be done with a handy script in the `pretrained_model_ckpts` directory:
```bash
bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-pointnav-rgb-resnet
```
This will download the weights for an RGB model that has been
trained on the PointNav task in RoboTHOR to `pretrained_model_ckpts/robothor-pointnav-rgb-resnet`
Next we need to run the inference, using the PointNav experiment config from the
[tutorial on making a PointNav experiment](training-a-pointnav-model.md).
We can do this with the following command:
```bash
PYTHONPATH=. python allenact/main.py -o -b -c --eval
```
Where `` is the location where the results of the test will be dumped, `` is the
location of the downloaded model weights, and `` is a path to the directory where
our experiment definition is stored.
For our current setup the following command would work:
```bash
PYTHONPATH=. python allenact/main.py \
training_a_pointnav_model \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \
--eval
```
For testing on all saved checkpoints we pass a directory to `--checkpoint` rather than just a single file:
```bash
PYTHONPATH=. python allenact/main.py \
training_a_pointnav_model \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30
--eval
```
## Visualization
We also show examples of visualizations that can be extracted from the `"valid"` and `"test"` modes. Currently,
visualization is still undergoing design changes and does not support multi-agent tasks, but the available functionality
is sufficient for pointnav in RoboThor.
Following up on the example above, we can make a specialized pontnav `ExperimentConfig` where we instantiate
the base visualization class, `VizSuite`, defined in
[`allenact.utils.viz_utils`](https://github.com/allenai/allenact/tree/master/allenact/utils/viz_utils.py), when in `test` mode.
Each visualization type can be thought of as a plugin to the base `VizSuite`. For example, all `episode_ids` passed to
`VizSuite` will be processed with each of the instantiated visualization types (possibly with the exception of the
`AgentViewViz`). In the example below we show how to instantiate different visualization types from 4 different data
sources.
The data sources available to `VizSuite` are:
* Task output (e.g. 2D trajectories)
* Vector task (e.g. egocentric views)
* Rollout storage (e.g. recurrent memory, taken action logprobs...)
* `ActorCriticOutput` (e.g. action probabilities)
The visualization types included below are:
* `TrajectoryViz`: Generic 2D trajectory view.
* `AgentViewViz`: RGB egocentric view.
* `ActorViz`: Action probabilities from `ActorCriticOutput[CategoricalDistr]`.
* `TensorViz1D`: Evolution of a point from RolloutStorage over time.
* `TensorViz2D`: Evolution of a vector from RolloutStorage over time.
* `ThorViz`: Specialized 2D trajectory view
[for RoboThor](https://github.com/allenai/allenact/tree/master/allenact_plugins/robothor_plugin/robothor_viz.py).
Note that we need to explicitly set the `episode_ids` that we wish to visualize. For `AgentViewViz` we have the option
of using a different (typically shorter) list of episodes or enforce the ones used for the rest of visualizations.
"""
# %% hide
from typing import Optional
from allenact.utils.viz_utils import (
VizSuite,
TrajectoryViz,
ActorViz,
AgentViewViz,
TensorViz1D,
TensorViz2D,
)
from allenact_plugins.robothor_plugin.robothor_viz import ThorViz
from projects.tutorials.training_a_pointnav_model import (
PointNavRoboThorRGBPPOExperimentConfig,
)
# %%
class PointNavRoboThorRGBPPOVizExperimentConfig(PointNavRoboThorRGBPPOExperimentConfig):
"""ExperimentConfig used to demonstrate how to set up visualization code.
# Attributes
viz_ep_ids : Scene names that will be visualized.
viz_video_ids : Scene names that will have videos visualizations associated with them.
"""
viz_ep_ids = [
"FloorPlan_Train1_1_3",
"FloorPlan_Train1_1_4",
"FloorPlan_Train1_1_5",
"FloorPlan_Train1_1_6",
]
viz_video_ids = [["FloorPlan_Train1_1_3"], ["FloorPlan_Train1_1_4"]]
viz: Optional[VizSuite] = None
def get_viz(self, mode):
if self.viz is not None:
return self.viz
self.viz = VizSuite(
episode_ids=self.viz_ep_ids,
mode=mode,
# Basic 2D trajectory visualizer (task output source):
base_trajectory=TrajectoryViz(
path_to_target_location=(
"task_info",
"target",
),
),
# Egocentric view visualizer (vector task source):
egeocentric=AgentViewViz(
max_video_length=100, episode_ids=self.viz_video_ids
),
# Default action probability visualizer (actor critic output source):
action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),
# Default taken action logprob visualizer (rollout storage source):
taken_action_logprobs=TensorViz1D(),
# Same episode mask visualizer (rollout storage source):
episode_mask=TensorViz1D(rollout_source=("masks",)),
# Default recurrent memory visualizer (rollout storage source):
rnn_memory=TensorViz2D(
rollout_source=("memory_first_last", "single_belief")
),
# Specialized 2D trajectory visualizer (task output source):
thor_trajectory=ThorViz(
figsize=(16, 8),
viz_rows_cols=(448, 448),
scenes=("FloorPlan_Train{}_{}", 1, 1, 1, 1),
),
)
return self.viz
def machine_params(self, mode="train", **kwargs):
res = super().machine_params(mode, **kwargs)
if mode == "test":
res.set_visualizer(self.get_viz(mode))
return res
# %%
"""
Running test on the same downloaded models, but using the visualization-enabled `ExperimentConfig` with
```bash
PYTHONPATH=. python allenact/main.py \
running_inference_tutorial \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \
--eval
```
generates different types of visualization and logs them in tensorboard. If everything is properly setup and
tensorboard includes the `robothor-pointnav-rgb-resnet` folder, under the `IMAGES` tab, we should see something similar
to

"""
================================================
FILE: projects/tutorials/training_a_pointnav_model.py
================================================
# literate: tutorials/training-a-pointnav-model.md
# %%
"""# Tutorial: PointNav in RoboTHOR."""
# %%
"""

## Introduction
One of the most obvious tasks that an embodied agent should master is navigating the world it inhabits.
Before we can teach a robot to cook or clean it first needs to be able to move around. The simplest
way to formulate "moving around" into a task is by making your agent find a beacon somewhere in the environment.
This beacon transmits its location, such that at any time, the agent can get the direction and euclidian distance
to the beacon. This particular task is often called Point Navigation, or **PointNav** for short.
#### PointNav
At first glance, this task seems trivial. If the agent is given the direction and distance of the target at
all times, can it not simply follow this signal directly? The answer is no, because agents are often trained
on this task in environments that emulate real-world buildings which are not wide-open spaces, but rather
contain many smaller rooms. Because of this, the agent has to learn to navigate human spaces and use doors
and hallways to efficiently navigate from one side of the building to the other. This task becomes particularly
difficult when the agent is tested in an environment that it is not trained in. If the agent does not know
how the floor plan of an environment looks, it has to learn to predict the design of man-made structures,
to efficiently navigate across them, much like how people instinctively know how to move around a building
they have never seen before based on their experience navigating similar buildings.
#### What is an environment anyways?
Environments are worlds in which embodied agents exist. If our embodied agent is simply a neural network that is being
trained in a simulator, then that simulator is its environment. Similarly, if our agent is a
physical robot then its environment is the real world. The agent interacts with the environment by taking one
of several available actions (such as "move forward", or "turn left"). After each action, the environment
produces a new frame that the agent can analyze to determine its next step. For many tasks, including PointNav
the agent also has a special "stop" action which indicates that the agent thinks it has reached the target.
After this action is called the agent will be reset to a new location, regardless if it reached the
target. The hope is that after enough training the agent will learn to correctly assess that it has successfully
navigated to the target.

There are many simulators designed for the training
of embodied agents. In this tutorial, we will be using a simulator called [RoboTHOR](https://ai2thor.allenai.org/robothor/),
which is designed specifically to train models that can easily be transferred to a real robot, by providing a
photo-realistic virtual environment and a real-world replica of the environment that researchers can have access to.
RoboTHOR contains 60 different virtual scenes with different floor plans and furniture and 15 validation scenes.
It is also important to mention that **AllenAct**
has a class abstraction called Environment. This is not the actual simulator game engine or robotics controller,
but rather a shallow wrapper that provides a uniform interface to the actual environment.
#### Learning algorithm
Finally, let us briefly touch on the algorithm that we will use to train our embodied agent to navigate. While
*AllenAct* offers us great flexibility to train models using complex pipelines, we will be using a simple
pure reinforcement learning approach for this tutorial. More specifically, we will be using DD-PPO,
a decentralized and distributed variant of the ubiquitous PPO algorithm. For those unfamiliar with Reinforcement
Learning we highly recommend [this tutorial](http://karpathy.github.io/2016/05/31/rl/) by Andrej Karpathy, and [this
book](http://www.incompleteideas.net/book/the-book-2nd.html) by Sutton and Barto. Essentially what we are doing
is letting our agent explore the environment on its own, rewarding it for taking actions that bring it closer
to its goal and penalizing it for actions that take it away from its goal. We then optimize the agent's model
to maximize this reward.
## Requirements
To train the model on the PointNav task, we need to [install the RoboTHOR environment](../installation/installation-framework.md)
and [download the RoboTHOR PointNav dataset](../installation/download-datasets.md)
The dataset contains a list of episodes with thousands of randomly generated starting positions and target locations for each of the scenes
as well as a precomputed cache of distances, containing the shortest path from each point in a scene, to every other point in that scene.
This is used to reward the agent for moving closer to the target in terms of geodesic distance - the actual path distance (as opposed to a
straight line distance).
## Config File Setup
Now comes the most important part of the tutorial, we are going to write an experiment config file.
If this is your first experience with experiment config files in AllenAct, we suggest that you
first see our how-to on [defining an experiment](../howtos/defining-an-experiment.md) which will
walk you through creating a simplified experiment config file.
Unlike a library that can be imported into python, **AllenAct** is structured as a framework with a runner script called
`main.py` which will run the experiment specified in a config file. This design forces us to keep meticulous records of
exactly which settings were used to produce a particular result,
which can be very useful given how expensive RL models are to train.
The `projects/` directory is home to different projects using `AllenAct`. Currently it is populated with baselines
of popular tasks and tutorials.
We already have all the code for this tutorial stored in `projects/tutorials/training_a_pointnav_model.py`. We will
be using this file to run our experiments, but you can create a new directory in `projects/` and start writing your
experiment there.
We start off by importing everything we will need:
"""
# %%
import glob
import os
from math import ceil
from typing import Dict, Any, List, Optional, Sequence
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.utils.experiment_utils import (
Builder,
PipelineStage,
TrainingPipeline,
LinearDecay,
evenly_distribute_count_into_bins,
)
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.navigation_plugin.objectnav.models import (
ResnetTensorNavActorCritic,
)
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
PointNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
# %%
"""Next we define a new experiment config class:"""
# %%
class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig):
"""A Point Navigation experiment configuration in RoboThor."""
# %%
"""
We then define the task parameters. For PointNav, these include the maximum number of steps our agent
can take before being reset (this prevents the agent from wandering on forever), and a configuration
for the reward function that we will be using.
"""
# %%
# Task Parameters
MAX_STEPS = 500
REWARD_CONFIG = {
"step_penalty": -0.01,
"goal_success_reward": 10.0,
"failed_stop_reward": 0.0,
"shaping_weight": 1.0,
}
# %%
"""
In this case, we set the maximum number of steps to 500.
We give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal
in as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination.
If the agent selects the `stop` action without reaching the target we do not punish it (although this is
sometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves
closer to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should
be and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around
with them.
Next, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render
every frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set
to a 224 by 224 box).
"""
# %%
# Simulator Parameters
CAMERA_WIDTH = 640
CAMERA_HEIGHT = 480
SCREEN_SIZE = 224
# %%
"""
Next, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel
processes that will be used to train the model. In general, more processes result in faster training,
but since each process is a unique instance of the environment in which we are training they can take up a
lot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may
need to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to
be the number of training scenes in RoboTHOR, which allows each process to load only a single scene into
memory, saving time and space.
`TRAINING_GPUS` takes the ids of the GPUS on which
the model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which
the validation and testing will occur. During training, a validation process is constantly running and evaluating
the current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea.
If our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default
to running everything on the CPU with only 1 process.
"""
# %%
ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
NUM_PROCESSES = 20
TRAINING_GPUS: Sequence[int] = [0]
VALIDATION_GPUS: Sequence[int] = [0]
TESTING_GPUS: Sequence[int] = [0]
# %%
"""
Since we are using a dataset to train our model we need to define the path to where we have stored it. If we
download the dataset instructed above we can define the path as follows
"""
# %%
TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug")
VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug")
# %%
"""
Next, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the
raw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we
specified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks
the point our agent needs to move to. It tells us the direction and distance to our goal at every time step.
"""
# %%
SENSORS = [
RGBSensorThor(
height=SCREEN_SIZE,
width=SCREEN_SIZE,
use_resnet_normalization=True,
uuid="rgb_lowres",
),
GPSCompassSensorRoboThor(),
]
# %%
"""
For the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct*
the preprocessor abstraction is designed with large models with frozen weights in mind. These models often
hail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a
complex embedding, which then gets stored and used as input to our trainable model instead of the original image.
Most other preprocessing work is done in the sensor classes (as we just saw with the RGB
sensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should
use this abstraction.
"""
# %%
PREPROCESSORS = [
Builder(
ResNetPreprocessor,
{
"input_height": SCREEN_SIZE,
"input_width": SCREEN_SIZE,
"output_width": 7,
"output_height": 7,
"output_dims": 512,
"pool": False,
"torchvision_resnet_model": models.resnet18,
"input_uuids": ["rgb_lowres"],
"output_uuid": "rgb_resnet",
},
),
]
# %%
"""
Next, we must define all of the observation inputs that our model will use. These are just
the hardcoded ids of the sensors we are using in the experiment.
"""
# %%
OBSERVATIONS = [
"rgb_resnet",
"target_coordinates_ind",
]
# %%
"""
Finally, we must define the settings of our simulator. We set the camera dimensions to the values
we defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a
turn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the
agent moves forward, it will do so by 0.25 meters.
"""
# %%
ENV_ARGS = dict(
width=CAMERA_WIDTH,
height=CAMERA_HEIGHT,
rotateStepDegrees=30.0,
visibilityDistance=1.0,
gridSize=0.25,
agentMode="bot",
)
# %%
"""
Now we move on to the methods that we must define to finish implementing an experiment config. Firstly we
have a simple method that just returns the name of the experiment.
"""
# %%
@classmethod
def tag(cls):
return "PointNavRobothorRGBPPO"
# %%
"""
Next, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms
we will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4.
We specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters
respectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval`
sets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how
often we save the model weights and run validation on them.
"""
# %%
@classmethod
def training_pipeline(cls, **kwargs):
ppo_steps = int(250000000)
lr = 3e-4
num_mini_batch = 1
update_repeats = 3
num_steps = 30
save_interval = 5000000
log_interval = 1000
gamma = 0.99
use_gae = True
gae_lambda = 0.95
max_grad_norm = 0.5
return TrainingPipeline(
save_interval=save_interval,
metric_accumulate_interval=log_interval,
optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
num_mini_batch=num_mini_batch,
update_repeats=update_repeats,
max_grad_norm=max_grad_norm,
num_steps=num_steps,
named_losses={"ppo_loss": PPO(**PPOConfig)},
gamma=gamma,
use_gae=use_gae,
gae_lambda=gae_lambda,
advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
pipeline_stages=[
PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
],
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
),
)
# %%
"""
The `machine_params` method returns the hardware parameters of each
process, based on the list of devices we defined above.
"""
# %%
def machine_params(self, mode="train", **kwargs):
sampler_devices: List[int] = []
if mode == "train":
workers_per_device = 1
gpu_ids = (
[]
if not torch.cuda.is_available()
else list(self.TRAINING_GPUS) * workers_per_device
)
nprocesses = (
8
if not torch.cuda.is_available()
else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
)
sampler_devices = list(self.TRAINING_GPUS)
elif mode == "valid":
nprocesses = 1
gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS
elif mode == "test":
nprocesses = 1
gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS
else:
raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
sensor_preprocessor_graph = (
SensorPreprocessorGraph(
source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
preprocessors=self.PREPROCESSORS,
)
if mode == "train"
or (
(isinstance(nprocesses, int) and nprocesses > 0)
or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
)
else None
)
return MachineParams(
nprocesses=nprocesses,
devices=gpu_ids,
sampler_devices=(
sampler_devices if mode == "train" else gpu_ids
), # ignored with > 1 gpu_ids
sensor_preprocessor_graph=sensor_preprocessor_graph,
)
# %%
"""
Now we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch,
so any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which
unsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different
actions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space`
We specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define
the size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and
distance to the target) with `goal_dims`.
"""
# %%
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return ResnetTensorNavActorCritic(
action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
goal_sensor_uuid="target_coordinates_ind",
rgb_resnet_preprocessor_uuid="rgb_resnet",
hidden_size=512,
goal_dims=32,
)
# %%
"""
We also need to define the task sampler that we will be using. This is a piece of code that generates instances
of tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting
our tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets
the agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the
`stop` action.
"""
# %%
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return PointNavDatasetTaskSampler(**kwargs)
# %%
"""
You might notice that we did not specify the task sampler's arguments, but are rather passing them in. The
reason for this is that each process will have its own task sampler, and we need to specify exactly which scenes
each process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this.
"""
# %%
@staticmethod
def _partition_inds(n: int, num_parts: int):
return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
np.int32
)
def _get_sampler_args_for_scene_split(
self,
scenes_dir: str,
process_ind: int,
total_processes: int,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
path = os.path.join(scenes_dir, "*.json.gz")
scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
if len(scenes) == 0:
raise RuntimeError(
(
"Could find no scene dataset information in directory {}."
" Are you sure you've downloaded them? "
" If not, see https://allenact.org/installation/download-datasets/ information"
" on how this can be done."
).format(scenes_dir)
)
if total_processes > len(scenes): # oversample some scenes -> bias
if total_processes % len(scenes) != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisible by the number of scenes"
)
scenes = scenes * int(ceil(total_processes / len(scenes)))
scenes = scenes[: total_processes * (len(scenes) // total_processes)]
else:
if len(scenes) % total_processes != 0:
print(
"Warning: oversampling some of the scenes to feed all processes."
" You can avoid this by setting a number of workers divisor of the number of scenes"
)
inds = self._partition_inds(len(scenes), total_processes)
return {
"scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
"max_steps": self.MAX_STEPS,
"sensors": self.SENSORS,
"action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
"seed": seeds[process_ind] if seeds is not None else None,
"deterministic_cudnn": deterministic_cudnn,
"rewards_config": self.REWARD_CONFIG,
}
# %%
"""
The very last things we need to define are the sampler arguments themselves. We define them separately for a train,
validation, and test sampler, but in this case, they are almost the same. The arguments need to include the location
of the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above
and are just referencing here. The only consequential differences between these task samplers are the path to the dataset
we are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since
we want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of
RoboTHOR are private we are also testing on our validation set.
"""
# %%
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.TRAIN_DATASET_DIR
res["loop_dataset"] = True
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
res["allow_flipping"] = True
return res
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.VAL_DATASET_DIR, "episodes"),
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.VAL_DATASET_DIR
res["loop_dataset"] = False
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
res["env_args"]["x_display"] = (
("0.%d" % devices[process_ind % len(devices)])
if devices is not None and len(devices) > 0
else None
)
return res
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
res = self._get_sampler_args_for_scene_split(
os.path.join(self.VAL_DATASET_DIR, "episodes"),
process_ind,
total_processes,
seeds=seeds,
deterministic_cudnn=deterministic_cudnn,
)
res["scene_directory"] = self.VAL_DATASET_DIR
res["loop_dataset"] = False
res["env_args"] = {}
res["env_args"].update(self.ENV_ARGS)
return res
# %%
"""
This is it! If we copy all of the code into a file we should be able to run our experiment!
## Training Model On Debug Dataset
We can test if our installation worked properly by training our model on a small dataset of 4 episodes. This
should take about 20 minutes on a computer with a NVIDIA GPU.
We can now train a model by running:
```bash
PYTHONPATH=. python allenact/main.py -o -c -b
```
If using the same configuration as we have set up, the following command should work:
```bash
PYTHONPATH=. python allenact/main.py training_a_pointnav_model -o storage/robothor-pointnav-rgb-resnet-resnet -b projects/tutorials
```
If we start up a tensorboard server during training and specify that `output_dir=storage` the output should look
something like this:

## Training Model On Full Dataset
We can also train the model on the full dataset by changing back our dataset path and running the same command as above.
But be aware, training this takes nearly 2 days on a machine with 8 GPU.
## Testing Model
To test the performance of a model please refer to [this tutorial](running-inference-on-a-pretrained-model.md).
## Conclusion
In this tutorial, we learned how to create a new PointNav experiment using **AllenAct**. There are many simple
and obvious ways to modify the experiment from here - changing the model, the learning algorithm and the environment
each requires very few lines of code changed in the above file, allowing us to explore our embodied ai research ideas
across different frameworks with ease.
"""
================================================
FILE: requirements.txt
================================================
certifi==2020.12.5
chardet==4.0.0
cloudpickle==1.6.0
cycler==0.10.0
decorator==4.4.2
filelock==3.0.12
future==0.18.2
gym==0.17.3
idna==2.10
imageio==2.9.0
imageio-ffmpeg==0.4.3
kiwisolver==1.3.1
matplotlib==3.3.3
moviepy==1.0.3
networkx==2.5
numpy==1.19.5
opencv-python==4.5.1.48
Pillow>=8.2.0,<9.0.0
proglog==0.1.9
protobuf==3.14.0
pyglet==1.5.0
pyparsing==2.4.7
python-dateutil>=2.8.1
requests==2.25.1
scipy==1.5.4
setproctitle==1.2.1
six>=1.15.0
tensorboardX==2.1
torch>=1.6.0,!=1.8.0,<2.0.0
torchvision>=0.7.0,<0.10.0
tqdm==4.56.0
urllib3==1.26.5
attr
attrs
wandb
================================================
FILE: scripts/auto_format.sh
================================================
#!/bin/bash
# Move to the directory containing the directory that this file is in
cd "$( cd "$( dirname "${BASH_SOURCE[0]}/.." )" >/dev/null 2>&1 && pwd )" || exit
echo RUNNING BLACK
black . --exclude src --exclude external_projects
echo BLACK DONE
echo ""
echo RUNNING DOCFORMATTER
find . -name "*.py" | grep -v ^./src | grep -v ^./external_projects | grep -v used_configs | xargs docformatter --in-place -r
echo DOCFORMATTER DONE
echo ALL DONE
================================================
FILE: scripts/build_docs.py
================================================
import glob
import os
import shutil
import sys
from pathlib import Path
from subprocess import check_output
from threading import Thread
from typing import Dict, Union, Optional, Set, List, Sequence, Mapping
from git import Git
from ruamel.yaml import YAML # type: ignore
from constants import ABS_PATH_OF_TOP_LEVEL_DIR
# TODO: the scripts directory shouldn't be a module (as it conflicts with
# some local developmment workflows) but we do want to import scripts/literate.py.
# Temporary solution is just to modify the sys.path when this script is run.
sys.path.append(os.path.abspath(os.path.dirname(Path(__file__))))
from literate import literate_python_to_markdown
class StringColors:
HEADER = "\033[95m"
OKBLUE = "\033[94m"
OKGREEN = "\033[92m"
WARNING = "\033[93m"
FAIL = "\033[91m"
ENDC = "\033[0m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"
exclude_files = [
".DS_Store",
"__init__.py",
"__init__.pyc",
"README.md",
"version.py",
"run.py",
"setup.py",
"main.py",
]
def render_file(
relative_src_path: str, src_file: str, to_file: str, modifier=""
) -> None:
"""Shells out to pydocmd, which creates a .md file from the docstrings of
python functions and classes in the file we specify.
The modifer specifies the depth at which to generate docs for
classes and functions in the file. More information here:
https://pypi.org/project/pydoc-markdown/
"""
# First try literate
was_literate = False
try:
was_literate = literate_python_to_markdown(
path=os.path.join(relative_src_path, src_file)
)
except Exception as _:
pass
if was_literate:
return
# Now do standard pydocmd
relative_src_namespace = relative_src_path.replace("/", ".")
src_base = src_file.replace(".py", "")
if relative_src_namespace == "":
namespace = f"{src_base}{modifier}"
else:
namespace = f"{relative_src_namespace}.{src_base}{modifier}"
pydoc_config = """'{
renderer: {
type: markdown,
code_headers: true,
descriptive_class_title: false,
add_method_class_prefix: true,
source_linker: {type: github, repo: allenai/allenact},
header_level_by_type: {
Module: 1,
Class: 2,
Method: 3,
Function: 3,
Data: 3,
}
}
}'"""
pydoc_config = " ".join(pydoc_config.split())
args = ["pydoc-markdown", "-m", namespace, pydoc_config]
try:
call_result = check_output([" ".join(args)], shell=True, env=os.environ).decode(
"utf-8"
)
# noinspection PyShadowingNames
with open(to_file, "w") as f:
doc_split = call_result.split("\n")
# github_path = "https://github.com/allenai/allenact/tree/master/"
# path = (
# github_path + namespace.replace(".", "/") + ".py"
# )
# mdlink = "[[source]]({})".format(path)
mdlink = "" # Removing the above source link for now.
call_result = "\n".join([doc_split[0] + " " + mdlink] + doc_split[1:])
call_result = call_result.replace("_DOC_COLON_", ":")
f.write(call_result)
print(
f"{StringColors.OKGREEN}[SUCCESS]{StringColors.ENDC} built docs for {src_file} -> {to_file}."
)
except Exception as _:
cmd = " ".join(args)
print(
f"{StringColors.WARNING}[SKIPPING]{StringColors.ENDC} could not"
f" build docs for {src_file} (missing an import?). CMD: '{cmd}'"
)
# noinspection PyShadowingNames
def build_docs_for_file(
relative_path: str, file_name: str, docs_dir: str, threads: List
) -> Dict[str, str]:
"""Build docs for an individual python file."""
clean_filename = file_name.replace(".py", "")
markdown_filename = f"{clean_filename}.md"
output_path = os.path.join(docs_dir, relative_path, markdown_filename)
nav_path = os.path.join("api", relative_path, markdown_filename)
thread = Thread(target=render_file, args=(relative_path, file_name, output_path))
thread.start()
threads.append(thread)
return {os.path.basename(clean_filename): nav_path}
# noinspection PyShadowingNames
def build_docs(
base_dir: Union[Path, str],
root_path: Union[Path, str],
docs_dir: Union[Path, str],
threads: List,
allowed_dirs: Optional[Set[str]] = None,
):
base_dir, root_path, docs_dir = str(base_dir), str(root_path), str(docs_dir)
nav_root = []
for child in os.listdir(root_path):
relative_path = os.path.join(root_path, child)
if (
(allowed_dirs is not None)
and (os.path.isdir(relative_path))
and (os.path.abspath(relative_path) not in allowed_dirs)
# or ".git" in relative_path
# or ".idea" in relative_path
# or "__pycache__" in relative_path
# or "tests" in relative_path
# or "mypy_cache" in relative_path
):
print("SKIPPING {}".format(relative_path))
continue
# without_allenact = str(root_path).replace("allenact/", "")
new_path = os.path.relpath(root_path, base_dir).replace(".", "")
target_dir = os.path.join(docs_dir, new_path)
if not os.path.exists(target_dir):
os.mkdir(target_dir)
if os.path.isdir(relative_path):
nav_subsection = build_docs(
base_dir,
relative_path,
docs_dir,
threads=threads,
allowed_dirs=allowed_dirs,
)
if not nav_subsection:
continue
nav_root.append({child: nav_subsection})
else:
if child in exclude_files or not child.endswith(".py"):
continue
nav = build_docs_for_file(new_path, child, docs_dir, threads=threads)
nav_root.append(nav)
return nav_root
def project_readme_paths_to_nav_structure(project_readmes):
nested_dict = {}
for fp in project_readmes:
has_seen_project_dir = False
sub_nested_dict = nested_dict
split_fp = os.path.dirname(fp).split("/")
for i, yar in enumerate(split_fp):
has_seen_project_dir = has_seen_project_dir or yar == "projects"
if not has_seen_project_dir or yar == "projects":
continue
if yar not in sub_nested_dict:
if i == len(split_fp) - 1:
sub_nested_dict[yar] = fp.replace("docs/", "")
break
else:
sub_nested_dict[yar] = {}
sub_nested_dict = sub_nested_dict[yar]
def recursively_create_nav_structure(nested_dict):
if isinstance(nested_dict, str):
return nested_dict
to_return = []
for key in nested_dict:
to_return.append({key: recursively_create_nav_structure(nested_dict[key])})
return to_return
return recursively_create_nav_structure(nested_dict)
def pruned_nav_entries(nav_entries):
if isinstance(nav_entries, str):
if os.path.exists(os.path.join("docs", nav_entries)):
return nav_entries
else:
return None
elif isinstance(nav_entries, Sequence):
new_entries = []
for entry in nav_entries:
entry = pruned_nav_entries(entry)
if entry:
new_entries.append(entry)
return new_entries
elif isinstance(nav_entries, Mapping):
new_entries = {}
for k, entry in nav_entries.items():
entry = pruned_nav_entries(entry)
if entry:
new_entries[k] = entry
return new_entries
else:
raise NotImplementedError()
def main():
os.chdir(ABS_PATH_OF_TOP_LEVEL_DIR)
print("Copying all README.md files to docs.")
with open("README.md") as f:
readme_content = f.readlines()
readme_content = [x.replace("docs/", "") for x in readme_content]
with open("docs/index.md", "w") as f:
f.writelines(readme_content)
project_readmes = []
for readme_file_path in glob.glob("projects/**/README.md", recursive=True):
if "docs/" not in readme_file_path:
new_path = os.path.join("docs", readme_file_path)
os.makedirs(os.path.dirname(new_path), exist_ok=True)
shutil.copy(readme_file_path, new_path)
project_readmes.append(new_path)
print("Copying LICENSE file to docs.")
shutil.copy("LICENSE", "docs/LICENSE.md")
print("Copying CONTRIBUTING.md file to docs.")
shutil.copy("CONTRIBUTING.md", "docs/CONTRIBUTING.md")
# print("Copying CNAME file to docs.")
# shutil.copy("CNAME", "docs/CNAME")
print("Building the docs.")
parent_folder_path = Path(__file__).parent.parent
yaml_path = parent_folder_path / "mkdocs.yml"
source_path = parent_folder_path
docs_dir = str(parent_folder_path / "docs" / "api")
if not os.path.exists(docs_dir):
os.mkdir(docs_dir)
# Adding project readmes to the yaml
yaml = YAML()
mkdocs_yaml = yaml.load(yaml_path)
site_nav = mkdocs_yaml["nav"]
# TODO Find a way to do the following in a way that results in nice titles.
# projects_key = "Projects using allenact"
# nav_obj = None
# for obj in site_nav:
# if projects_key in obj:
# nav_obj = obj
# break
# nav_obj[projects_key] = project_readme_paths_to_nav_structure(project_readmes)
with open(yaml_path, "w") as f:
yaml.dump(mkdocs_yaml, f)
# Get directories to ignore
git_dirs = set(
os.path.abspath(os.path.split(p)[0]) for p in Git(".").ls_files().split("\n")
)
ignore_rel_dirs = [
"docs",
"scripts",
"experiments",
"src",
".pip_src",
"dist",
"build",
]
ignore_abs_dirs = set(
os.path.abspath(os.path.join(str(parent_folder_path), rel_dir))
for rel_dir in ignore_rel_dirs
)
for d in ignore_abs_dirs:
if d in git_dirs:
git_dirs.remove(d)
threads: List = []
nav_entries = build_docs(
parent_folder_path,
source_path,
docs_dir,
threads=threads,
allowed_dirs=git_dirs,
)
nav_entries.sort(key=lambda x: list(x)[0], reverse=False)
for thread in threads:
thread.join()
nav_entries = pruned_nav_entries(nav_entries)
docs_key = "API"
# Find the yaml corresponding to the API
nav_obj = None
for obj in site_nav:
if docs_key in obj:
nav_obj = obj
break
nav_obj[docs_key] = nav_entries
with open(yaml_path, "w") as f:
yaml.dump(mkdocs_yaml, f)
if __name__ == "__main__":
main()
================================================
FILE: scripts/build_docs.sh
================================================
#!/usr/bin/env bash
set -e
# Add allenact to the python path
export PYTHONPATH=$PYTHONPATH:$PWD
# Alter the relative path of the README image for the docs.
#sed -i '1s/docs/./' docs/README.md
python scripts/build_docs.py
================================================
FILE: scripts/dcommand.py
================================================
#!/usr/bin/env python3
"""Tool to run command on multiple nodes through SSH."""
import argparse
import glob
import os
def get_argument_parser():
"""Creates the argument parser."""
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(
description="dcommand",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--runs_on",
required=False,
type=str,
default=None,
help="Comma-separated IP addresses of machines. If empty, the tool will scan for lists of IP addresses"
" in `screen_ids_file`s in the `~/.allenact` directory.",
)
parser.add_argument(
"--ssh_cmd",
required=False,
type=str,
default="ssh {addr}",
help="SSH command. Useful to utilize a pre-shared key with 'ssh -i path/to/mykey.pem ubuntu@{addr}'.",
)
parser.add_argument(
"--command",
required=False,
default="nvidia-smi | head -n 35",
type=str,
help="Command to be run through ssh onto each machine",
)
return parser
def get_args():
"""Creates the argument parser and parses any input arguments."""
parser = get_argument_parser()
args = parser.parse_args()
return args
def wrap_double(text):
return f'"{text}"'
def wrap_single(text):
return f"'{text}'"
def wrap_single_nested(text, quote=r"'\''"):
return f"{quote}{text}{quote}"
if __name__ == "__main__":
args = get_args()
all_addresses = []
if args.runs_on is not None:
all_addresses = args.runs_on.split(",")
else:
all_files = sorted(
glob.glob(os.path.join(os.path.expanduser("~"), ".allenact", "*.killfile")),
reverse=True,
)
if len(all_files) == 0:
print(
f"No screen_ids_file found under {os.path.join(os.path.expanduser('~'), '.allenact')}"
)
for killfile in all_files:
with open(killfile, "r") as f:
# Each line contains 'IP_address screen_ID'
nodes = [tuple(line[:-1].split(" ")) for line in f.readlines()]
all_addresses.extend(node[0] for node in nodes)
use_addresses = ""
while use_addresses not in ["y", "n"]:
use_addresses = input(
f"Run on {all_addresses} from {killfile}? [Y/n] "
).lower()
if use_addresses == "":
use_addresses = "y"
if use_addresses == "n":
all_addresses.clear()
else:
break
print(f"Running on IP addresses {all_addresses}")
for it, addr in enumerate(all_addresses):
ssh_command = f"{args.ssh_cmd.format(addr=addr)} {wrap_single(args.command)}"
print(f"{it} {addr} SSH command {ssh_command}")
os.system(ssh_command)
print("DONE")
================================================
FILE: scripts/dconfig.py
================================================
#!/usr/bin/env python3
import os
import argparse
def get_argument_parser():
"""Creates the argument parser."""
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(
description="dconfig",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--runs_on",
required=True,
type=str,
help="Comma-separated IP addresses of machines",
)
parser.add_argument(
"--config_script",
required=True,
type=str,
help="Path to bash script with configuration",
)
parser.add_argument(
"--ssh_cmd",
required=False,
type=str,
default="ssh -f {addr}",
help="SSH command. Useful to utilize a pre-shared key with 'ssh -i path/to/mykey.pem -f ubuntu@{addr}'. "
"The option `-f` should be used, since we want a non-interactive session",
)
parser.add_argument(
"--distribute_public_rsa_key",
dest="distribute_public_rsa_key",
action="store_true",
required=False,
help="if you pass the `--distribute_public_rsa_key` flag, the manager node's public key will be added to the "
"authorized keys of all workers (this is necessary in default-configured EC2 instances to use "
"`scripts/dmain.py`)",
)
parser.set_defaults(distribute_public_rsa_key=False)
return parser
def get_args():
"""Creates the argument parser and parses any input arguments."""
parser = get_argument_parser()
args = parser.parse_args()
return args
def wrap_double(text):
return f'"{text}"'
def wrap_single(text):
return f"'{text}'"
def wrap_single_nested(text, quote=r"'\''"):
return f"{quote}{text}{quote}"
if __name__ == "__main__":
args = get_args()
all_addresses = args.runs_on.split(",")
print(f"Running on addresses {all_addresses}")
remote_config_script = f"{args.config_script}.distributed"
for it, addr in enumerate(all_addresses):
if args.distribute_public_rsa_key:
key_command = (
f"{args.ssh_cmd.format(addr=addr)} "
f"{wrap_double('echo $(cat ~/.ssh/id_rsa.pub) >> ~/.ssh/authorized_keys')}"
)
print(f"Key command {key_command}")
os.system(f"{key_command}")
scp_cmd = (
args.ssh_cmd.replace("ssh ", "scp ")
.replace("-f", args.config_script)
.format(addr=addr)
)
print(f"SCP command {scp_cmd}:{remote_config_script}")
os.system(f"{scp_cmd}:{remote_config_script}")
screen_name = f"allenact_config_machine{it}"
bash_command = wrap_single_nested(
f"source {remote_config_script} &>> log_allenact_distributed_config"
)
screen_command = wrap_single(
f"screen -S {screen_name} -dm bash -c {bash_command}"
)
ssh_command = f"{args.ssh_cmd.format(addr=addr)} {screen_command}"
print(f"SSH command {ssh_command}")
os.system(ssh_command)
print(f"{addr} {screen_name}")
print("DONE")
================================================
FILE: scripts/dkill.py
================================================
#!/usr/bin/env python3
"""Tool to terminate multi-node (distributed) training."""
import os
import argparse
import glob
def get_argument_parser():
"""Creates the argument parser."""
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(
description="dkill",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--screen_ids_file",
required=False,
type=str,
default=None,
help="Path to file generated by dmain.py with IPs and screen ids for nodes running process."
" If empty, the tool will scan the `~/.allenact` directory for `screen_ids_file`s.",
)
parser.add_argument(
"--ssh_cmd",
required=False,
type=str,
default="ssh {addr}",
help="SSH command. Useful to utilize a pre-shared key with 'ssh -i mykey.pem ubuntu@{addr}'. ",
)
return parser
def get_args():
"""Creates the argument parser and parses any input arguments."""
parser = get_argument_parser()
args = parser.parse_args()
return args
if __name__ == "__main__":
args = get_args()
all_files = (
[args.screen_ids_file]
if args.screen_ids_file is not None
else sorted(
glob.glob(os.path.join(os.path.expanduser("~"), ".allenact", "*.killfile")),
reverse=True,
)
)
if len(all_files) == 0:
print(
f"No screen_ids_file found under {os.path.join(os.path.expanduser('~'), '.allenact')}"
)
for killfile in all_files:
with open(killfile, "r") as f:
nodes = [tuple(line[:-1].split(" ")) for line in f.readlines()]
do_kill = ""
while do_kill not in ["y", "n"]:
do_kill = input(
f"Stopping processes on {nodes} from {killfile}? [y/N] "
).lower()
if do_kill == "":
do_kill = "n"
if do_kill == "y":
for it, node in enumerate(nodes):
addr, screen_name = node
print(f"Killing screen {screen_name} on {addr}")
ssh_command = (
f"{args.ssh_cmd.format(addr=addr)} '"
f"screen -S {screen_name} -p 0 -X quit ; "
f"sleep 1 ; "
f"echo Master processes left running: ; "
f"ps aux | grep Master: | grep -v grep ; "
f"echo ; "
f"'"
)
# print(f"SSH command {ssh_command}")
os.system(ssh_command)
do_delete = ""
while do_delete not in ["y", "n"]:
do_delete = input(f"Delete file {killfile}? [y/N] ").lower()
if do_delete == "":
do_delete = "n"
if do_delete == "y":
os.system(f"rm {killfile}")
print(f"Deleted {killfile}")
print("DONE")
================================================
FILE: scripts/dmain.py
================================================
#!/usr/bin/env python3
"""Entry point to multi-node (distributed) training for a user given experiment
name."""
import os
import random
import string
import subprocess
import sys
import time
from pathlib import Path
from typing import Optional
# Add to PYTHONPATH the path of the parent directory of the current file's directory
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(Path(__file__)))))
from allenact.main import get_argument_parser as get_main_arg_parser
from allenact.utils.system import init_logging, get_logger
from constants import ABS_PATH_OF_TOP_LEVEL_DIR
def get_argument_parser():
"""Creates the argument parser."""
parser = get_main_arg_parser()
parser.description = f"distributed {parser.description}"
parser.add_argument(
"--runs_on",
required=True,
type=str,
help="Comma-separated IP addresses of machines",
)
parser.add_argument(
"--ssh_cmd",
required=False,
type=str,
default="ssh -f {addr}",
help="SSH command. Useful to utilize a pre-shared key with 'ssh -i mykey.pem -f ubuntu@{addr}'. "
"The option `-f` should be used for non-interactive session",
)
parser.add_argument(
"--env_activate_path",
required=True,
type=str,
help="Path to the virtual environment's `activate` script. It must be the same across all machines",
)
parser.add_argument(
"--allenact_path",
required=False,
type=str,
default="allenact",
help="Path to allenact top directory. It must be the same across all machines",
)
# Required distributed_ip_and_port
idx = [a.dest for a in parser._actions].index("distributed_ip_and_port")
parser._actions[idx].required = True
return parser
def get_args():
"""Creates the argument parser and parses any input arguments."""
parser = get_argument_parser()
args = parser.parse_args()
return args
def get_raw_args():
raw_args = sys.argv[1:]
filtered_args = []
remove: Optional[str] = None
enclose_in_quotes: Optional[str] = None
for arg in raw_args:
if remove is not None:
remove = None
elif enclose_in_quotes is not None:
# Within backslash expansion: close former single, open double, create single, close double, reopen single
inner_quote = r"\'\"\'\"\'"
# Convert double quotes into backslash double for later expansion
filtered_args.append(
inner_quote + arg.replace('"', r"\"").replace("'", r"\"") + inner_quote
)
enclose_in_quotes = None
elif arg in [
"--runs_on",
"--ssh_cmd",
"--env_activate_path",
"--allenact_path",
"--extra_tag",
"--machine_id",
]:
remove = arg
elif arg == "--config_kwargs":
enclose_in_quotes = arg
filtered_args.append(arg)
else:
filtered_args.append(arg)
return filtered_args
def wrap_single(text):
return f"'{text}'"
def wrap_single_nested(text):
# Close former single, start backslash expansion (via $), create new single quote for expansion:
quote_enter = r"'$'\'"
# New closing single quote for expansion, close backslash expansion, reopen former single:
quote_leave = r"\'''"
return f"{quote_enter}{text}{quote_leave}"
def wrap_double(text):
return f'"{text}"'
def id_generator(size=4, chars=string.ascii_uppercase + string.digits):
return "".join(random.choice(chars) for _ in range(size))
# Assume we can ssh into each of the `runs_on` machines through port 22
if __name__ == "__main__":
# Tool must be called from AllenAct project's root directory
cwd = os.path.abspath(os.getcwd())
assert cwd == ABS_PATH_OF_TOP_LEVEL_DIR, (
f"`dmain.py` called from {cwd}."
f"\nIt should be called from AllenAct's top level directory {ABS_PATH_OF_TOP_LEVEL_DIR}."
)
args = get_args()
init_logging(args.log_level)
raw_args = get_raw_args()
if args.seed is None:
seed = random.randint(0, 2**31 - 1)
raw_args.extend(["-s", f"{seed}"])
get_logger().info(f"Using random seed {seed} in all workers (none was given)")
all_addresses = args.runs_on.split(",")
get_logger().info(f"Running on IP addresses {all_addresses}")
assert args.distributed_ip_and_port.split(":")[0] in all_addresses, (
f"Missing listener IP address {args.distributed_ip_and_port.split(':')[0]}"
f" in list of worker addresses {all_addresses}"
)
time_str = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime(time.time()))
global_job_id = id_generator()
killfilename = os.path.join(
os.path.expanduser("~"), ".allenact", f"{time_str}_{global_job_id}.killfile"
)
os.makedirs(os.path.dirname(killfilename), exist_ok=True)
code_src = "."
with open(killfilename, "w") as killfile:
for it, addr in enumerate(all_addresses):
code_tget = f"{addr}:{args.allenact_path}/"
get_logger().info(f"rsync {code_src} to {code_tget}")
os.system(f"rsync -rz {code_src} {code_tget}")
job_id = id_generator()
command = " ".join(
["python", "main.py"]
+ raw_args
+ [
"--extra_tag",
f"{args.extra_tag}{'__' if len(args.extra_tag) > 0 else ''}machine{it}",
]
+ ["--machine_id", f"{it}"]
)
logfile = (
f"{args.output_dir}/log_{time_str}_{global_job_id}_{job_id}_machine{it}"
)
env_and_command = wrap_single_nested(
f"for NCCL_SOCKET_IFNAME in $(route | grep default) ; do : ; done && export NCCL_SOCKET_IFNAME"
f" && cd {args.allenact_path}"
f" && mkdir -p {args.output_dir}"
f" && source {args.env_activate_path} &>> {logfile}"
f" && echo pwd=$(pwd) &>> {logfile}"
f" && echo output_dir={args.output_dir} &>> {logfile}"
f" && echo python_version=$(python --version) &>> {logfile}"
f" && echo python_path=$(which python) &>> {logfile}"
f" && set | grep NCCL_SOCKET_IFNAME &>> {logfile}"
f" && echo &>> {logfile}"
f" && {command} &>> {logfile}"
)
screen_name = f"allenact_{time_str}_{global_job_id}_{job_id}_machine{it}"
screen_command = wrap_single(
f"screen -S {screen_name} -dm bash -c {env_and_command}"
)
ssh_command = f"{args.ssh_cmd.format(addr=addr)} {screen_command}"
get_logger().debug(f"SSH command {ssh_command}")
subprocess.run(ssh_command, shell=True, executable="/bin/bash")
get_logger().info(f"{addr} {screen_name}")
killfile.write(f"{addr} {screen_name}\n")
get_logger().info("")
get_logger().info(f"Running screen ids saved to {killfilename}")
get_logger().info("")
get_logger().info("DONE")
================================================
FILE: scripts/literate.py
================================================
"""Helper functions used to create literate documentation from python files."""
import importlib
import inspect
import os
from typing import Optional, Sequence, List, cast
from typing.io import TextIO
from constants import ABS_PATH_OF_DOCS_DIR, ABS_PATH_OF_TOP_LEVEL_DIR
def get_literate_output_path(file: TextIO) -> Optional[str]:
for l in file:
l = l.strip()
if l != "":
if l.lower().startswith(("# literate", "#literate")):
parts = l.split(":")
if len(parts) == 1:
assert (
file.name[-3:].lower() == ".py"
), "Can only run literate on python (*.py) files."
return file.name[:-3] + ".md"
elif len(parts) == 2:
rel_outpath = parts[1].strip()
outpath = os.path.abspath(
os.path.join(ABS_PATH_OF_DOCS_DIR, rel_outpath)
)
assert outpath.startswith(
ABS_PATH_OF_DOCS_DIR
), f"Path {outpath} is not allowed, must be within {ABS_PATH_OF_DOCS_DIR}."
return outpath
else:
raise NotImplementedError(
f"Line '{l}' is not of the correct format."
)
else:
return None
return None
def source_to_markdown(dot_path: str, summarize: bool = False):
importlib.invalidate_caches()
module_path, obj_name = ".".join(dot_path.split(".")[:-1]), dot_path.split(".")[-1]
module = importlib.import_module(module_path)
obj = getattr(module, obj_name)
source = inspect.getsource(obj)
if not summarize:
return source
elif inspect.isclass(obj):
lines = source.split("\n")
newlines = [lines[0]]
whitespace_len = float("inf")
k = 1
started = False
while k < len(lines):
l = lines[k]
lstripped = l.lstrip()
if started:
newlines.append(l)
started = "):" not in l and "->" not in l
if not started:
newlines.append(l[: cast(int, whitespace_len)] + " ...\n")
if (
l.lstrip().startswith("def ")
and len(l) - len(lstripped) <= whitespace_len
):
whitespace_len = len(l) - len(lstripped)
newlines.append(l)
started = "):" not in l and "->" not in l
if not started:
newlines.append(l[:whitespace_len] + " ...\n")
k += 1
return "\n".join(newlines).strip()
elif inspect.isfunction(obj):
return source.split("\n")[0] + "\n ..."
else:
return
def _strip_empty_lines(lines: Sequence[str]) -> List[str]:
lines = list(lines)
if len(lines) == 0:
return lines
for i in range(len(lines)):
if lines[i].strip() != "":
lines = lines[i:]
break
for i in reversed(list(range(len(lines)))):
if lines[i].strip() != "":
lines = lines[: i + 1]
break
return lines
def literate_python_to_markdown(path: str) -> bool:
assert path[-3:].lower() == ".py", "Can only run literate on python (*.py) files."
with open(path, "r") as file:
output_path = get_literate_output_path(file)
if output_path is None:
return False
output_lines = [
f" ",
f"\n",
]
md_lines: List[str] = []
code_lines = md_lines
lines = file.readlines()
mode = None
for line in lines:
line = line.rstrip()
stripped_line = line.strip()
if (mode is None or mode == "change") and line.strip() == "":
continue
if mode == "markdown":
if stripped_line in ['"""', "'''"]:
output_lines.extend(_strip_empty_lines(md_lines) + [""])
md_lines.clear()
mode = None
elif stripped_line.endswith(('"""', "'''")):
output_lines.extend(
_strip_empty_lines(md_lines) + [stripped_line[:-3]]
)
md_lines.clear()
mode = None
# TODO: Does not account for the case where a string is ended with a comment.
else:
md_lines.append(line.strip())
elif stripped_line.startswith(("# %%", "#%%")):
last_mode = mode
mode = "change"
if last_mode == "code":
output_lines.extend(
["```python"] + _strip_empty_lines(code_lines) + ["```"]
)
code_lines.clear()
if " import " in stripped_line:
path = stripped_line.split(" import ")[-1].strip()
output_lines.append(
"```python\n" + source_to_markdown(path) + "\n```"
)
elif " import_summary " in stripped_line:
path = stripped_line.split(" import_summary ")[-1].strip()
output_lines.append(
"```python\n"
+ source_to_markdown(path, summarize=True)
+ "\n```"
)
elif " hide" in stripped_line:
mode = "hide"
elif mode == "hide":
continue
elif mode == "change":
if stripped_line.startswith(('"""', "'''")):
mode = "markdown"
if len(stripped_line) != 3:
if stripped_line.endswith(('"""', "'''")):
output_lines.append(stripped_line[3:-3])
mode = "change"
else:
output_lines.append(stripped_line[3:])
else:
mode = "code"
code_lines.append(line)
elif mode == "code":
code_lines.append(line)
else:
raise NotImplementedError(
f"mode {mode} is not implemented. Last 5 lines: "
+ "\n".join(output_lines[-5:])
)
if mode == "code" and len(code_lines) != 0:
output_lines.extend(
["```python"] + _strip_empty_lines(code_lines) + ["```"]
)
with open(output_path, "w") as f:
f.writelines([l + "\n" for l in output_lines])
return True
if __name__ == "__main__":
# print(
# source_to_markdown(
# "allenact_plugins.minigrid_plugin.minigrid_offpolicy.ExpertTrajectoryIterator",
# True
# )
# )
literate_python_to_markdown(
os.path.join(
ABS_PATH_OF_TOP_LEVEL_DIR,
"projects/tutorials/training_a_pointnav_model.py",
)
)
================================================
FILE: scripts/release.py
================================================
import os
import sys
from pathlib import Path
from subprocess import getoutput
def make_package(name, verbose=False):
"""Prepares sdist for allenact or allenact_plugins."""
orig_dir = os.getcwd()
base_dir = os.path.join(os.path.abspath(os.path.dirname(Path(__file__))), "..")
os.chdir(base_dir)
with open(".VERSION", "r") as f:
__version__ = f.readline().strip()
# generate sdist via setuptools
output = getoutput(f"{sys.executable} {name}/setup.py sdist")
if verbose:
print(output)
os.chdir(os.path.join(base_dir, "dist"))
# uncompress the tar.gz sdist
output = getoutput(f"tar zxvf {name}-{__version__}.tar.gz")
if verbose:
print(output)
# copy setup.py to the top level of the package (required by pip install)
output = getoutput(
f"cp {name}-{__version__}/{name}/setup.py {name}-{__version__}/setup.py"
)
if verbose:
print(output)
# create new source file with version
getoutput(
f"printf '__version__ = \"{__version__}\"\n' >> {name}-{__version__}/{name}/_version.py"
)
# include it in sources
getoutput(
f'printf "\n{name}/_version.py" >> {name}-{__version__}/{name}.egg-info/SOURCES.txt'
)
# recompress tar.gz
output = getoutput(f"tar zcvf {name}-{__version__}.tar.gz {name}-{__version__}/")
if verbose:
print(output)
# remove temporary directory
output = getoutput(f"rm -r {name}-{__version__}")
if verbose:
print(output)
os.chdir(orig_dir)
if __name__ == "__main__":
verbose = False
make_package("allenact", verbose)
make_package("allenact_plugins", verbose)
================================================
FILE: scripts/run_tests.sh
================================================
#!/usr/bin/env bash
echo RUNNING PYTEST WITH COVERAGE
pipenv run coverage run -m --source=. pytest tests/
echo DONE
echo ""
echo GENERATING COVERAGE HTML
coverage html
echo HTML GENERATED
if [ "$(uname)" == "Darwin" ]; then
echo OPENING COVERAGE INFO
open htmlcov/index.html
fi
================================================
FILE: scripts/startx.py
================================================
import atexit
import os
import platform
import re
import shlex
import subprocess
import tempfile
# Turning off automatic black formatting for this script as it breaks quotes.
# fmt: off
def pci_records():
records = []
command = shlex.split("lspci -vmm")
output = subprocess.check_output(command).decode()
for devices in output.strip().split("\n\n"):
record = {}
records.append(record)
for row in devices.split("\n"):
key, value = row.split("\t")
record[key.split(":")[0]] = value
return records
def generate_xorg_conf(devices):
xorg_conf = []
device_section = """
Section "Device"
Identifier "Device{device_id}"
Driver "nvidia"
VendorName "NVIDIA Corporation"
BusID "{bus_id}"
EndSection
"""
server_layout_section = """
Section "ServerLayout"
Identifier "Layout0"
{screen_records}
EndSection
"""
screen_section = """
Section "Screen"
Identifier "Screen{screen_id}"
Device "Device{device_id}"
DefaultDepth 24
Option "AllowEmptyInitialConfiguration" "True"
SubSection "Display"
Depth 24
Virtual 1024 768
EndSubSection
EndSection
"""
screen_records = []
for i, bus_id in enumerate(devices):
xorg_conf.append(device_section.format(device_id=i, bus_id=bus_id))
xorg_conf.append(screen_section.format(device_id=i, screen_id=i))
screen_records.append('Screen {screen_id} "Screen{screen_id}" 0 0'.format(screen_id=i))
xorg_conf.append(server_layout_section.format(screen_records="\n ".join(screen_records)))
output = "\n".join(xorg_conf)
return output
def startx(display=0):
if platform.system() != "Linux":
raise Exception("Can only run startx on linux")
devices = []
for r in pci_records():
if r.get("Vendor", "") == "NVIDIA Corporation"\
and r["Class"] in ["VGA compatible controller", "3D controller"]:
bus_id = "PCI:" + ":".join(map(lambda x: str(int(x, 16)), re.split(r"[:\.]", r["Slot"])))
devices.append(bus_id)
if not devices:
raise Exception("no nvidia cards found")
fd = None
path = None
try:
fd, path = tempfile.mkstemp()
with open(path, "w") as f:
f.write(generate_xorg_conf(devices))
command = shlex.split("Xorg -noreset +extension GLX +extension RANDR +extension RENDER -config %s :%s" % (path, display))
proc = subprocess.Popen(command)
atexit.register(lambda: proc.poll() is None and proc.kill())
proc.wait()
finally:
if fd is not None:
os.close(fd)
os.unlink(path)
# fmt: on
if __name__ == "__main__":
startx()
================================================
FILE: tests/.gitignore
================================================
tmp
.DS_Store
!.py
!.gitignore
================================================
FILE: tests/__init__.py
================================================
================================================
FILE: tests/hierarchical_policies/__init__.py
================================================
================================================
FILE: tests/hierarchical_policies/test_minigrid_conditional.py
================================================
import os
from tempfile import mkdtemp
from typing import Dict, Optional, List, Any, cast
import gym
from gym_minigrid.envs import EmptyRandomEnv5x5
from torch import nn
from torch import optim
from torch.optim.lr_scheduler import LambdaLR
from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig
from allenact.algorithms.onpolicy_sync.runner import OnPolicyRunner
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.utils.experiment_utils import (
TrainingPipeline,
Builder,
PipelineStage,
LinearDecay,
)
from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor
from allenact_plugins.minigrid_plugin.minigrid_tasks import MiniGridTaskSampler
from projects.tutorials.minigrid_tutorial_conds import (
ConditionedMiniGridSimpleConvRNN,
ConditionedMiniGridTask,
)
class MiniGridCondTestExperimentConfig(ExperimentConfig):
@classmethod
def tag(cls) -> str:
return "MiniGridCondTest"
SENSORS = [
EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),
ExpertActionSensor(
action_space=gym.spaces.Dict(
higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
)
),
]
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return ConditionedMiniGridSimpleConvRNN(
action_space=gym.spaces.Dict(
higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
),
observation_space=SensorSuite(cls.SENSORS).observation_spaces,
num_objects=cls.SENSORS[0].num_objects,
num_colors=cls.SENSORS[0].num_colors,
num_states=cls.SENSORS[0].num_states,
)
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return MiniGridTaskSampler(**kwargs)
def train_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="train")
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="valid")
def test_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
return self._get_sampler_args(process_ind=process_ind, mode="test")
def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:
"""Generate initialization arguments for train, valid, and test
TaskSamplers.
# Parameters
process_ind : index of the current task sampler
mode: one of `train`, `valid`, or `test`
"""
if mode == "train":
max_tasks = None # infinite training tasks
task_seeds_list = None # no predefined random seeds for training
deterministic_sampling = False # randomly sample tasks in training
else:
max_tasks = 20 + 20 * (
mode == "test"
) # 20 tasks for valid, 40 for test (per sampler)
# one seed for each task to sample:
# - ensures different seeds for each sampler, and
# - ensures a deterministic set of sampled tasks.
task_seeds_list = list(
range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
)
deterministic_sampling = (
True # deterministically sample task in validation/testing
)
return dict(
max_tasks=max_tasks, # see above
env_class=self.make_env, # builder for third-party environment (defined below)
sensors=self.SENSORS, # sensors used to return observations to the agent
env_info=dict(), # parameters for environment builder (none for now)
task_seeds_list=task_seeds_list, # see above
deterministic_sampling=deterministic_sampling, # see above
task_class=ConditionedMiniGridTask,
)
@staticmethod
def make_env(*args, **kwargs):
return EmptyRandomEnv5x5()
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
return {
"nprocesses": 4 if mode == "train" else 1,
"devices": [],
}
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
ppo_steps = int(512)
return TrainingPipeline(
named_losses=dict(
imitation_loss=Imitation(
cls.SENSORS[1]
), # 0 is Minigrid, 1 is ExpertActionSensor
ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"),
), # type:ignore
pipeline_stages=[
PipelineStage(
teacher_forcing=LinearDecay(
startp=1.0,
endp=0.0,
steps=ppo_steps // 2,
),
loss_names=["imitation_loss", "ppo_loss"],
max_stage_steps=ppo_steps,
)
],
optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
num_mini_batch=4,
update_repeats=3,
max_grad_norm=0.5,
num_steps=16,
gamma=0.99,
use_gae=True,
gae_lambda=0.95,
advance_scene_rollout_period=None,
save_interval=10000,
metric_accumulate_interval=1,
lr_scheduler_builder=Builder(
LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore
),
)
class TestMiniGridCond:
def test_train(self, tmpdir):
cfg = MiniGridCondTestExperimentConfig()
train_runner = OnPolicyRunner(
config=cfg,
output_dir=tmpdir,
loaded_config_src_files=None,
seed=12345,
mode="train",
deterministic_cudnn=False,
deterministic_agents=False,
extra_tag="",
disable_tensorboard=True,
disable_config_saving=True,
)
start_time_str, valid_results = train_runner.start_train(
checkpoint=None,
restart_pipeline=False,
max_sampler_processes_per_worker=1,
collect_valid_results=True,
)
assert len(valid_results) > 0
test_runner = OnPolicyRunner(
config=cfg,
output_dir=tmpdir,
loaded_config_src_files=None,
seed=12345,
mode="test",
deterministic_cudnn=False,
deterministic_agents=False,
extra_tag="",
disable_tensorboard=True,
disable_config_saving=True,
)
test_results = test_runner.start_test(
checkpoint_path_dir_or_pattern=os.path.join(
tmpdir, "checkpoints", "**", start_time_str, "*.pt"
),
max_sampler_processes_per_worker=1,
inference_expert=True,
)
assert test_results[-1]["test-metrics/ep_length"] < 4
if __name__ == "__main__":
TestMiniGridCond().test_train(mkdtemp()) # type:ignore
================================================
FILE: tests/manipulathor_plugin/__init__.py
================================================
================================================
FILE: tests/manipulathor_plugin/test_utils.py
================================================
from allenact_plugins.manipulathor_plugin.arm_calculation_utils import (
world_coords_to_agent_coords,
)
class TestArmCalculationUtils(object):
def test_translation_functions(self):
agent_coordinate = {
"position": {"x": 1, "y": 0, "z": 2},
"rotation": {"x": 0, "y": -45, "z": 0},
}
obj_coordinate = {
"position": {"x": 0, "y": 1, "z": 0},
"rotation": {"x": 0, "y": 0, "z": 0},
}
rotated = world_coords_to_agent_coords(obj_coordinate, agent_coordinate)
eps = 0.01
assert (
abs(rotated["position"]["x"] - (-2.12)) < eps
and abs(rotated["position"]["y"] - (1.0)) < eps
and abs(rotated["position"]["z"] - (-0.70)) < eps
)
if __name__ == "__main__":
TestArmCalculationUtils().test_translation_functions()
================================================
FILE: tests/mapping/__init__.py
================================================
================================================
FILE: tests/mapping/test_ai2thor_mapping.py
================================================
import os
import platform
import random
import sys
import urllib
import urllib.request
import warnings
from collections import defaultdict
# noinspection PyUnresolvedReferences
from tempfile import mkdtemp
from typing import Dict, List, Tuple, cast
# noinspection PyUnresolvedReferences
import ai2thor
# noinspection PyUnresolvedReferences
import ai2thor.wsgi_server
import compress_pickle
import numpy as np
import torch
from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage
from allenact.base_abstractions.misc import Memory, ActorCriticOutput
from allenact.embodiedai.mapping.mapping_utils.map_builders import SemanticMapBuilder
from allenact.utils.experiment_utils import set_seed
from allenact.utils.system import get_logger
from allenact.utils.tensor_utils import batch_observations
from allenact_plugins.ithor_plugin.ithor_sensors import (
RelativePositionChangeTHORSensor,
ReachableBoundsTHORSensor,
BinnedPointCloudMapTHORSensor,
SemanticMapTHORSensor,
)
from allenact_plugins.ithor_plugin.ithor_util import get_open_x_displays
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from constants import ABS_PATH_OF_TOP_LEVEL_DIR
class TestAI2THORMapSensors(object):
def setup_path_for_use_with_rearrangement_project(self) -> bool:
if platform.system() != "Darwin" and len(get_open_x_displays()) == 0:
wrn_msg = "Cannot run tests as there seem to be no open displays!"
warnings.warn(wrn_msg)
get_logger().warning(wrn_msg)
return False
os.chdir(ABS_PATH_OF_TOP_LEVEL_DIR)
sys.path.append(
os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "projects/ithor_rearrangement")
)
try:
import rearrange
except ImportError:
wrn_msg = (
"Could not import `rearrange`. Is it possible you have"
" not initialized the submodules (i.e. by running"
" `git submodule init; git submodule update;`)?"
)
warnings.warn(wrn_msg)
get_logger().warning(wrn_msg)
return False
return True
def test_binned_and_semantic_mapping(self, tmpdir):
try:
if not self.setup_path_for_use_with_rearrangement_project():
return
from baseline_configs.rearrange_base import RearrangeBaseExperimentConfig
from baseline_configs.walkthrough.walkthrough_rgb_base import (
WalkthroughBaseExperimentConfig,
)
from rearrange.constants import (
FOV,
PICKUPABLE_OBJECTS,
OPENABLE_OBJECTS,
)
from datagen.datagen_utils import get_scenes
ORDERED_OBJECT_TYPES = list(sorted(PICKUPABLE_OBJECTS + OPENABLE_OBJECTS))
map_range_sensor = ReachableBoundsTHORSensor(margin=1.0)
map_info = dict(
map_range_sensor=map_range_sensor,
vision_range_in_cm=40 * 5,
map_size_in_cm=1050,
resolution_in_cm=5,
)
map_sensors = [
RelativePositionChangeTHORSensor(),
map_range_sensor,
DepthSensorThor(
height=224,
width=224,
use_normalization=False,
uuid="depth",
),
BinnedPointCloudMapTHORSensor(
fov=FOV,
ego_only=False,
**map_info,
),
SemanticMapTHORSensor(
fov=FOV,
ego_only=False,
ordered_object_types=ORDERED_OBJECT_TYPES,
**map_info,
),
]
all_sensors = [*WalkthroughBaseExperimentConfig.SENSORS, *map_sensors]
open_x_displays = []
try:
open_x_displays = get_open_x_displays()
except (AssertionError, IOError):
pass
walkthrough_task_sampler = WalkthroughBaseExperimentConfig.make_sampler_fn(
stage="train",
sensors=all_sensors,
scene_to_allowed_rearrange_inds={s: [0] for s in get_scenes("train")},
force_cache_reset=True,
allowed_scenes=None,
seed=1,
x_display=open_x_displays[0] if len(open_x_displays) != 0 else None,
thor_controller_kwargs={
**RearrangeBaseExperimentConfig.THOR_CONTROLLER_KWARGS,
# "server_class": ai2thor.wsgi_server.WsgiServer, # Only for debugging
},
)
targets_path = os.path.join(tmpdir, "rearrange_mapping_examples.pkl.gz")
urllib.request.urlretrieve(
"https://ai2-prior-allenact-public-test.s3-us-west-2.amazonaws.com/ai2thor_mapping/rearrange_mapping_examples.pkl.gz",
targets_path,
)
goal_obs_dict = compress_pickle.load(targets_path)
def compare_recursive(obs, goal_obs, key_list: List):
if isinstance(obs, Dict):
for k in goal_obs:
compare_recursive(
obs=obs[k], goal_obs=goal_obs[k], key_list=key_list + [k]
)
elif isinstance(obs, (List, Tuple)):
for i in range(len(goal_obs)):
compare_recursive(
obs=obs[i], goal_obs=goal_obs[i], key_list=key_list + [i]
)
else:
# Should be a numpy array at this point
assert isinstance(obs, np.ndarray) and isinstance(
goal_obs, np.ndarray
), f"After {key_list}, not numpy arrays, obs={obs}, goal_obs={goal_obs}"
obs = 1.0 * obs
goal_obs = 1.0 * goal_obs
goal_where_nan = np.isnan(goal_obs)
obs_where_nan = np.isnan(obs)
where_nan_not_equal = (goal_where_nan != obs_where_nan).sum()
# assert (
# where_nan_not_equal.sum() <= 1
# and where_nan_not_equal.mean() < 1e3
# )
where_nan = np.logical_or(goal_where_nan, obs_where_nan)
obs[where_nan] = 0.0
goal_obs[where_nan] = 0.0
def special_mean(v):
while len(v.shape) > 2:
v = v.sum(-1)
return v.mean()
numer = np.abs(obs - goal_obs)
denom = np.abs(
np.stack((obs, goal_obs, np.ones_like(obs)), axis=0)
).max(0)
difference = special_mean(numer / denom)
# assert (
# difference < 1.2e-3
# ), f"Difference of {np.abs(obs - goal_obs).mean()} at {key_list}."
if (
len(obs.shape) >= 2
and obs.shape[0] == obs.shape[1]
and obs.shape[0] > 1
):
# Sanity check that rotating the observations makes them not-equal
rot_obs = np.rot90(obs)
numer = np.abs(rot_obs - goal_obs)
denom = np.abs(
np.stack((rot_obs, goal_obs, np.ones_like(obs)), axis=0)
).max(0)
rot_difference = special_mean(numer / denom)
assert (
difference < rot_difference or (obs == rot_obs).all()
), f"Too small a difference ({(numer / denom).mean()})."
observations_dict = defaultdict(lambda: [])
for i in range(5): # Why 5, why not 5?
set_seed(i)
task = walkthrough_task_sampler.next_task()
obs_list = observations_dict[i]
obs_list.append(task.get_observations())
k = 0
compare_recursive(
obs=obs_list[0], goal_obs=goal_obs_dict[i][0], key_list=[i, k]
)
while not task.is_done():
obs = task.step(
action=task.action_names().index(
random.choice(
3
* [
"move_ahead",
"rotate_right",
"rotate_left",
"look_up",
"look_down",
]
+ ["done"]
)
)
).observation
k += 1
obs_list.append(obs)
compare_recursive(
obs=obs,
goal_obs=goal_obs_dict[i][task.num_steps_taken()],
key_list=[i, k],
)
# Free space metric map in RGB using pointclouds coming from depth images. This
# is built iteratively after every step.
# R - is used to encode points at a height < 0.02m (i.e. the floor)
# G - is used to encode points at a height between 0.02m and 2m, i.e. objects the agent would run into
# B - is used to encode points higher than 2m, i.e. ceiling
# Uncomment if you wish to visualize the observations:
import matplotlib.pyplot as plt
plt.imshow(
np.flip(255 * (obs["binned_pc_map"]["map"] > 0), 0)
) # np.flip because we expect "up" to be -row
plt.title("Free space map")
plt.show()
plt.close()
# See also `obs["binned_pc_map"]["egocentric_update"]` to see the
# the metric map from the point of view of the agent before it is
# rotated into the world-space coordinates and merged with past observations.
# Semantic map in RGB which is iteratively revealed using depth maps to figure out what
# parts of the scene the agent has seen so far.
# This map has shape 210x210x72 with the 72 channels corresponding to the 72
# object types in `ORDERED_OBJECT_TYPES`
semantic_map = obs["semantic_map"]["map"]
# We can't display all 72 channels in an RGB image so instead we randomly assign
# each object a color and then just allow them to overlap each other
colored_semantic_map = (
SemanticMapBuilder.randomly_color_semantic_map(semantic_map)
)
# Here's the full semantic map with nothing masked out because the agent
# hasn't seen it yet
colored_semantic_map_no_fog = (
SemanticMapBuilder.randomly_color_semantic_map(
map_sensors[
-1
].semantic_map_builder.ground_truth_semantic_map
)
)
# Uncomment if you wish to visualize the observations:
# import matplotlib.pyplot as plt
# plt.imshow(
# np.flip( # np.flip because we expect "up" to be -row
# np.concatenate(
# (
# colored_semantic_map,
# 255 + 0 * colored_semantic_map[:, :10, :],
# colored_semantic_map_no_fog,
# ),
# axis=1,
# ),
# 0,
# )
# )
# plt.title("Semantic map with and without exploration fog")
# plt.show()
# plt.close()
# See also
# * `obs["semantic_map"]["egocentric_update"]`
# * `obs["semantic_map"]["explored_mask"]`
# * `obs["semantic_map"]["egocentric_mask"]`
# To save observations for comparison against future runs, uncomment the below.
# os.makedirs("tmp_out", exist_ok=True)
# compress_pickle.dump(
# {**observations_dict}, "tmp_out/rearrange_mapping_examples.pkl.gz"
# )
finally:
try:
walkthrough_task_sampler.close()
except NameError:
pass
def test_pretrained_rearrange_walkthrough_mapping_agent(self, tmpdir):
try:
if not self.setup_path_for_use_with_rearrangement_project():
return
from baseline_configs.rearrange_base import RearrangeBaseExperimentConfig
from baseline_configs.walkthrough.walkthrough_rgb_mapping_ppo import (
WalkthroughRGBMappingPPOExperimentConfig,
)
from rearrange.constants import (
FOV,
PICKUPABLE_OBJECTS,
OPENABLE_OBJECTS,
)
from datagen.datagen_utils import get_scenes
open_x_displays = []
try:
open_x_displays = get_open_x_displays()
except (AssertionError, IOError):
pass
walkthrough_task_sampler = (
WalkthroughRGBMappingPPOExperimentConfig.make_sampler_fn(
stage="train",
scene_to_allowed_rearrange_inds={
s: [0] for s in get_scenes("train")
},
force_cache_reset=True,
allowed_scenes=None,
seed=2,
x_display=open_x_displays[0] if len(open_x_displays) != 0 else None,
)
)
named_losses = (
WalkthroughRGBMappingPPOExperimentConfig.training_pipeline()._named_losses
)
ckpt_path = os.path.join(
tmpdir, "pretrained_walkthrough_mapping_agent_75mil.pt"
)
if not os.path.exists(ckpt_path):
urllib.request.urlretrieve(
"https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/rearrangement/walkthrough/pretrained_walkthrough_mapping_agent_75mil.pt",
ckpt_path,
)
state_dict = torch.load(
ckpt_path,
map_location="cpu",
)
walkthrough_model = WalkthroughRGBMappingPPOExperimentConfig.create_model()
walkthrough_model.load_state_dict(state_dict["model_state_dict"])
memory = RolloutBlockStorage.create_memory(
spec=walkthrough_model.recurrent_memory_specification, num_samplers=1
).step_squeeze(0)
masks = torch.FloatTensor([0]).view(1, 1, 1)
binned_map_losses = []
semantic_map_losses = []
for i in range(5):
masks = 0 * masks
set_seed(i + 1)
task = walkthrough_task_sampler.next_task()
def add_step_dim(input):
if isinstance(input, torch.Tensor):
return input.unsqueeze(0)
elif isinstance(input, Dict):
return {k: add_step_dim(v) for k, v in input.items()}
else:
raise NotImplementedError
batch = add_step_dim(batch_observations([task.get_observations()]))
while not task.is_done():
# noinspection PyTypeChecker
ac_out, memory = cast(
Tuple[ActorCriticOutput, Memory],
walkthrough_model.forward(
observations=batch,
memory=memory,
prev_actions=None,
masks=masks,
),
)
binned_map_losses.append(
named_losses["binned_map_loss"]
.loss(
step_count=0, # Not used in this loss
batch={"observations": batch},
actor_critic_output=ac_out,
)[0]
.item()
)
assert (
binned_map_losses[-1] < 0.16
), f"Binned map loss to large at ({i}, {task.num_steps_taken()})"
semantic_map_losses.append(
named_losses["semantic_map_loss"]
.loss(
step_count=0, # Not used in this loss
batch={"observations": batch},
actor_critic_output=ac_out,
)[0]
.item()
)
assert (
semantic_map_losses[-1] < 0.004
), f"Semantic map loss to large at ({i}, {task.num_steps_taken()})"
masks = masks.fill_(1.0)
obs = task.step(
action=ac_out.distributions.sample().item()
).observation
batch = add_step_dim(batch_observations([obs]))
if task.num_steps_taken() >= 10:
break
# To save observations for comparison against future runs, uncomment the below.
# os.makedirs("tmp_out", exist_ok=True)
# compress_pickle.dump(
# {**observations_dict}, "tmp_out/rearrange_mapping_examples.pkl.gz"
# )
finally:
try:
walkthrough_task_sampler.close()
except NameError:
pass
if __name__ == "__main__":
TestAI2THORMapSensors().test_binned_and_semantic_mapping(mkdtemp()) # type:ignore
# TestAI2THORMapSensors().test_binned_and_semantic_mapping("tmp_out") # Used for local debugging
# TestAI2THORMapSensors().test_pretrained_rearrange_walkthrough_mapping_agent(
# mkdtemp() # "tmp_out"
# ) # Used for local debugging
================================================
FILE: tests/multiprocessing/__init__.py
================================================
================================================
FILE: tests/multiprocessing/test_frozen_attribs.py
================================================
from typing import Dict, Any
import torch.multiprocessing as mp
import torch.nn as nn
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import TrainingPipeline
# noinspection PyAbstractClass,PyTypeChecker
class MyConfig(ExperimentConfig):
MY_VAR: int = 3
@classmethod
def tag(cls) -> str:
return ""
@classmethod
def training_pipeline(cls, **kwargs) -> TrainingPipeline:
return None
@classmethod
def create_model(cls, **kwargs) -> nn.Module:
return None
@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
return None
def my_var_is(self, val):
assert self.MY_VAR == val
# noinspection PyAbstractClass
class MySpecConfig(MyConfig):
MY_VAR = 6
@classmethod
def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
return {}
@classmethod
def tag(cls) -> str:
return "SpecTag"
scfg = MySpecConfig()
class TestFrozenAttribs(object):
def test_frozen_inheritance(self):
from abc import abstractmethod
from allenact.base_abstractions.experiment_config import FrozenClassVariables
class SomeBase(metaclass=FrozenClassVariables):
yar = 3
@abstractmethod
def use(self):
raise NotImplementedError()
class SomeDerived(SomeBase):
yar = 33
def use(self):
return self.yar
failed = False
try:
SomeDerived.yar = 6 # Error
except Exception as _:
failed = True
assert failed
inst = SomeDerived()
inst2 = SomeDerived()
inst.yar = 12 # No error
assert inst.use() == 12
assert inst2.use() == 33
@staticmethod
def my_func(config, val):
config.my_var_is(val)
def test_frozen_experiment_config(self):
val = 5
failed = False
try:
MyConfig()
except (RuntimeError, TypeError):
failed = True
assert failed
scfg.MY_VAR = val
scfg.my_var_is(val)
failed = False
try:
MyConfig.MY_VAR = val
except RuntimeError:
failed = True
assert failed
failed = False
try:
MySpecConfig.MY_VAR = val
except RuntimeError:
failed = True
assert failed
for fork_method in ["forkserver", "fork"]:
ctxt = mp.get_context(fork_method)
p = ctxt.Process(target=self.my_func, kwargs=dict(config=scfg, val=val))
p.start()
p.join()
if __name__ == "__main__":
TestFrozenAttribs().test_frozen_inheritance() # type:ignore
TestFrozenAttribs().test_frozen_experiment_config() # type:ignore
================================================
FILE: tests/sync_algs_cpu/__init__.py
================================================
================================================
FILE: tests/sync_algs_cpu/test_to_to_obj_trains.py
================================================
import io
import math
import os
import pathlib
from contextlib import redirect_stdout, redirect_stderr
from typing import Optional, List, Dict, Any
import torch
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.algorithms.onpolicy_sync.runner import OnPolicyRunner
from allenact.algorithms.onpolicy_sync.storage import (
StreamingStorageMixin,
ExperienceStorage,
RolloutBlockStorage,
)
from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.misc import (
Memory,
GenericAbstractLoss,
ModelType,
LossOutput,
)
from allenact.utils.experiment_utils import PipelineStage, StageComponent
from allenact.utils.misc_utils import prepare_locals_for_super
from projects.babyai_baselines.experiments.go_to_obj.ppo import (
PPOBabyAIGoToObjExperimentConfig,
)
SILLY_STORAGE_VALUES = [1.0, 2.0, 3.0, 4.0]
SILLY_STORAGE_REPEATS = [1, 2, 3, 4]
class FixedConstantLoss(AbstractActorCriticLoss):
def __init__(self, name: str, value: float):
super().__init__()
self.name = name
self.value = value
def loss( # type: ignore
self,
*args,
**kwargs,
):
return self.value, {self.name: self.value}
class SillyStorage(ExperienceStorage, StreamingStorageMixin):
def __init__(self, values_to_return: List[float], repeats: List[int]):
self.values_to_return = values_to_return
self.repeats = repeats
assert len(self.values_to_return) == len(self.repeats)
self.index = 0
def initialize(self, *, observations: ObservationType, **kwargs):
pass
def add(
self,
observations: ObservationType,
memory: Optional[Memory],
actions: torch.Tensor,
action_log_probs: torch.Tensor,
value_preds: torch.Tensor,
rewards: torch.Tensor,
masks: torch.Tensor,
):
pass
def to(self, device: torch.device):
pass
def set_partition(self, index: int, num_parts: int):
pass
@property
def total_experiences(self) -> int:
return 0
@total_experiences.setter
def total_experiences(self, value: int):
pass
def next_batch(self) -> Dict[str, Any]:
if self.index >= len(self.values_to_return):
raise EOFError
to_return = {
"value": torch.tensor(
[self.values_to_return[self.index]] * self.repeats[self.index]
),
}
self.index += 1
return to_return
def reset_stream(self):
self.index = 0
def empty(self) -> bool:
return len(self.values_to_return) == 0
class AverageBatchValueLoss(GenericAbstractLoss):
def loss(
self,
*,
model: ModelType,
batch: ObservationType,
batch_memory: Memory,
stream_memory: Memory,
) -> LossOutput:
v = batch["value"].mean()
return LossOutput(
value=v,
info={"avg_batch_val": v},
per_epoch_info={},
batch_memory=batch_memory,
stream_memory=stream_memory,
bsize=batch["value"].shape[0],
)
class PPOBabyAIGoToObjTestExperimentConfig(PPOBabyAIGoToObjExperimentConfig):
NUM_CKPTS_TO_SAVE = 2
@classmethod
def tag(cls):
return "BabyAIGoToObjPPO-TESTING"
@classmethod
def machine_params(cls, mode="train", **kwargs):
mp = super().machine_params(mode=mode, **kwargs)
if mode == "valid":
mp = MachineParams(
nprocesses=1,
devices=mp.devices,
sensor_preprocessor_graph=mp.sensor_preprocessor_graph,
sampler_devices=mp.sampler_devices,
visualizer=mp.visualizer,
local_worker_ids=mp.local_worker_ids,
)
return mp
@classmethod
def training_pipeline(cls, **kwargs):
total_train_steps = cls.TOTAL_RL_TRAIN_STEPS
ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps)
tp = cls._training_pipeline(
named_losses={
"ppo_loss": ppo_info["loss"],
"3_loss": FixedConstantLoss("3_loss", 3.0),
"avg_value_loss": AverageBatchValueLoss(),
},
named_storages={
"onpolicy": RolloutBlockStorage(),
"silly_storage": SillyStorage(
values_to_return=SILLY_STORAGE_VALUES, repeats=SILLY_STORAGE_REPEATS
),
},
pipeline_stages=[
PipelineStage(
loss_names=["ppo_loss", "3_loss"],
max_stage_steps=total_train_steps,
stage_components=[
StageComponent(
uuid="onpolicy",
storage_uuid="onpolicy",
loss_names=["ppo_loss", "3_loss"],
)
],
),
],
num_mini_batch=ppo_info["num_mini_batch"],
update_repeats=ppo_info["update_repeats"],
total_train_steps=total_train_steps,
valid_pipeline_stage=PipelineStage(
loss_names=["ppo_loss", "3_loss"],
max_stage_steps=-1,
update_repeats=1,
num_mini_batch=1,
),
test_pipeline_stage=PipelineStage(
loss_names=["avg_value_loss"],
stage_components=[
StageComponent(
uuid="debug",
storage_uuid="silly_storage",
loss_names=["avg_value_loss"],
),
],
max_stage_steps=-1,
update_repeats=1,
num_mini_batch=1,
),
)
tp.training_settings.save_interval = int(
math.ceil(cls.TOTAL_RL_TRAIN_STEPS / cls.NUM_CKPTS_TO_SAVE)
)
return tp
def valid_task_sampler_args(
self,
process_ind: int,
total_processes: int,
devices: Optional[List[int]] = None,
seeds: Optional[List[int]] = None,
deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
# Also run validation
return self.test_task_sampler_args(**prepare_locals_for_super(locals()))
# Wrapper context manager to redirect stdout and stderr to a file when potentially
# using pytest capsys
class RedirectOutput:
def __init__(self, capsys: Optional, capfd: Optional):
self.capsys = capsys
self.capfd = capfd
self.f = io.StringIO()
self.redirect_stdout = redirect_stdout(self.f)
self.redirect_stderr = redirect_stderr(self.f)
self.capsys_output = ""
self.capfd_output = ""
# self.capsys_disabler = None
def get_output(self):
return self.f.getvalue() + self.capsys_output + self.capfd_output
def __enter__(self):
if self.capsys is not None:
self.capsys.readouterr() # Clear out any existing output
if self.capfd is not None:
self.capfd.readouterr() # Clear out any existing output
# self.capsys_disabler = self.capsys.disabled()
# self.capsys_disabler.__enter__()
self.redirect_stdout.__enter__()
self.redirect_stderr.__enter__()
def __exit__(self, *args):
if self.capsys is not None:
captured = self.capsys.readouterr()
self.capsys_output = captured.out + captured.err
# self.capsys_disabler.__exit__(*args)
if self.capfd is not None:
captured = self.capfd.readouterr()
self.capfd_output = captured.out + captured.err
self.redirect_stdout.__exit__(*args)
self.redirect_stderr.__exit__(*args)
class TestGoToObjTrains:
def test_ppo_trains(self, capfd, tmpdir):
cfg = PPOBabyAIGoToObjTestExperimentConfig()
d = tmpdir / "test_ppo_trains"
if isinstance(d, pathlib.Path):
d.mkdir(parents=True, exist_ok=True)
else:
d.mkdir()
output_dir = str(d)
train_runner = OnPolicyRunner(
config=cfg,
output_dir=output_dir,
loaded_config_src_files=None,
seed=1,
mode="train",
deterministic_cudnn=True,
)
output_redirector = RedirectOutput(capsys=None, capfd=capfd)
with output_redirector:
start_time_str = train_runner.start_train(
max_sampler_processes_per_worker=1
)
s = output_redirector.get_output()
def extract_final_metrics_from_log(s: str, mode: str):
lines = s.splitlines()
lines = [l for l in lines if mode.upper() in l]
try:
metrics_and_losses_list = (
lines[-1].split(")")[-1].split("[")[0].strip().split(" ")
)
except IndexError:
raise RuntimeError(f"Failed to parse log:\n{s}")
def try_float(f):
try:
return float(f)
except ValueError:
return f
metrics_and_losses_dict = {
k: try_float(v)
for k, v in zip(
metrics_and_losses_list[::2], metrics_and_losses_list[1::2]
)
}
return metrics_and_losses_dict
train_metrics = extract_final_metrics_from_log(s, "train")
assert train_metrics["global_batch_size"] == 256
valid_metrics = extract_final_metrics_from_log(s, "valid")
assert valid_metrics["3_loss/3_loss"] == 3, "Incorrect validation loss"
assert (
valid_metrics["new_tasks_completed"] == cfg.NUM_TEST_TASKS
), "Incorrect number of tasks evaluated in validation"
test_runner = OnPolicyRunner(
config=cfg,
output_dir=output_dir,
loaded_config_src_files=None,
seed=1,
mode="test",
deterministic_cudnn=True,
)
test_results = test_runner.start_test(
checkpoint_path_dir_or_pattern=os.path.join(
output_dir, "checkpoints", "**", start_time_str, "*.pt"
),
max_sampler_processes_per_worker=1,
)
assert (
len(test_results) == 2
), f"Too many or too few test results ({test_results})"
tr = test_results[-1]
assert (
tr["training_steps"]
== round(
math.ceil(
cfg.TOTAL_RL_TRAIN_STEPS
/ (cfg.ROLLOUT_STEPS * cfg.NUM_TRAIN_SAMPLERS)
)
)
* cfg.ROLLOUT_STEPS
* cfg.NUM_TRAIN_SAMPLERS
), "Incorrect number of training steps"
assert len(tr["tasks"]) == cfg.NUM_TEST_TASKS, "Incorrect number of test tasks"
assert tr["test-metrics/success"] == sum(
task["success"] for task in tr["tasks"]
) / len(tr["tasks"]), "Success counts don't seem to match"
assert (
tr["test-metrics/success"] > 0.95
), f"PPO did not seem to converge for the go_to_obj task (success {tr['success']})."
assert tr["test-debug-losses/avg_value_loss/avg_batch_val"] == sum(
ssv * ssr for ssv, ssr in zip(SILLY_STORAGE_VALUES, SILLY_STORAGE_REPEATS)
) / sum(SILLY_STORAGE_REPEATS)
assert tr["test-debug-losses/avg_value_loss/avg_batch_val"] == sum(
ssv * ssr for ssv, ssr in zip(SILLY_STORAGE_VALUES, SILLY_STORAGE_REPEATS)
) / sum(SILLY_STORAGE_REPEATS)
assert tr["test-debug-misc/worker_batch_size"] == sum(
SILLY_STORAGE_VALUES
) / len(SILLY_STORAGE_VALUES)
if __name__ == "__main__":
TestGoToObjTrains().test_ppo_trains(
pathlib.Path("experiment_output/testing"), capsys=None, capfd=None
) # type:ignore
================================================
FILE: tests/utils/__init__.py
================================================
================================================
FILE: tests/utils/test_inference_agent.py
================================================
from collections import Counter
import torch
from allenact.utils.experiment_utils import set_seed
from allenact.utils.inference import InferenceAgent
from projects.babyai_baselines.experiments.go_to_obj.ppo import (
PPOBabyAIGoToObjExperimentConfig,
)
from packaging.version import parse
if parse(torch.__version__) >= parse("2.0.0"):
expected_results = [
{
"ep_length": 39,
"reward": 0.45999999999999996,
"task_info": {},
"success": 1.0,
},
{"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
]
else:
expected_results = [
{"ep_length": 64, "reward": 0.0, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "success": 0.0},
{"ep_length": 17, "reward": 0.7646153846153846, "success": 1.0},
{"ep_length": 22, "reward": 0.6953846153846154, "success": 1.0},
{"ep_length": 64, "reward": 0.0, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "success": 0.0},
{"ep_length": 64, "reward": 0.0, "success": 0.0},
]
class TestInferenceAgent(object):
def test_inference_agent_from_minigrid_config(self):
set_seed(1)
exp_config = PPOBabyAIGoToObjExperimentConfig()
agent = InferenceAgent.from_experiment_config(
exp_config=exp_config,
device=torch.device("cpu"),
)
task_sampler = exp_config.make_sampler_fn(
**exp_config.test_task_sampler_args(process_ind=0, total_processes=1)
)
all_actions = []
successes = 0
for ind, expected_result in zip(range(10), expected_results):
agent.reset()
task = task_sampler.next_task()
observations = task.get_observations()
actions = []
while not task.is_done():
action = agent.act(observations=observations)
actions.append(action)
observations = task.step(action).observation
metrics = task.metrics()
successes += metrics["success"]
assert metrics["success"] == 0 or metrics["reward"] > 0
assert metrics["ep_length"] <= 64
# Random seeding seems to not work well when changing linux/mac and torch versions :(
# assert all(
# abs(v - expected_result[k]) < 1e-4
# for k, v in task.metrics().items()
# if k != "task_info"
# ), f"Failed on task {ind} with actions {actions} and metrics {task.metrics()} (expected={expected_result})."
all_actions.append(actions)
assert successes > 0, "At least one task should be successful hopefully..."
assert min(Counter(sum(all_actions, [])).values()) >= len(
sum(all_actions, [])
) * 1 / (7 + 3), (
"Statistically, all actions should be taken at around 1/7 * num_actions times. We add 3 to"
" the denominator for unlikely settings."
)
if __name__ == "__main__":
TestInferenceAgent().test_inference_agent_from_minigrid_config()
================================================
FILE: tests/utils/test_spaces.py
================================================
import warnings
from collections import OrderedDict
from typing import Tuple
import numpy as np
import torch
from gym import spaces as gyms
from allenact.utils import spaces_utils as su
class TestSpaces(object):
space = gyms.Dict(
{
"first": gyms.Tuple(
[
gyms.Box(-10, 10, (3, 4)),
gyms.MultiDiscrete([2, 3, 4]),
gyms.Box(-1, 1, ()),
]
),
"second": gyms.Tuple(
[
gyms.Dict({"third": gyms.Discrete(11)}),
gyms.MultiBinary(8),
]
),
}
)
@staticmethod
def same(a, b, bidx=None):
if isinstance(a, OrderedDict):
for key in a:
if not TestSpaces.same(a[key], b[key], bidx):
return False
return True
elif isinstance(a, Tuple):
for it in range(len(a)):
if not TestSpaces.same(a[it], b[it], bidx):
return False
return True
else:
# np.array_equal also works for torch tensors and scalars
if bidx is None:
return np.array_equal(a, b)
else:
return np.array_equal(a, b[bidx])
def test_conversion(self):
gsample = self.space.sample()
asample = su.torch_point(self.space, gsample)
back = su.numpy_point(self.space, asample)
assert self.same(back, gsample)
def test_flatten(self):
# We flatten Discrete to 1 value
assert su.flatdim(self.space) == 25
# gym flattens Discrete to one-hot
assert gyms.flatdim(self.space) == 35
asample = su.torch_point(self.space, self.space.sample())
flattened = su.flatten(self.space, asample)
unflattened = su.unflatten(self.space, flattened)
assert self.same(asample, unflattened)
# suppress `UserWarning: WARN: Box bound precision lowered by casting to float32`
with warnings.catch_warnings():
warnings.simplefilter("ignore")
flattened_space = su.flatten_space(self.space)
assert flattened_space.shape == (25,)
# The maximum comes from Discrete(11)
assert flattened_space.high.max() == 11.0
assert flattened_space.low.min() == -10.0
gym_flattened_space = gyms.flatten_space(self.space)
assert gym_flattened_space.shape == (35,)
# The maximum comes from Box(-10, 10, (3, 4))
assert gym_flattened_space.high.max() == 10.0
assert gym_flattened_space.low.min() == -10.0
def test_batched(self):
samples = [self.space.sample() for _ in range(10)]
flattened = [
su.flatten(self.space, su.torch_point(self.space, sample))
for sample in samples
]
stacked = torch.stack(flattened, dim=0)
unflattened = su.unflatten(self.space, stacked)
for bidx, refsample in enumerate(samples):
# Compare each torch-ified sample to the corresponding unflattened from the stack
assert self.same(su.torch_point(self.space, refsample), unflattened, bidx)
assert self.same(su.flatten(self.space, unflattened), stacked)
def test_tolist(self):
space = gyms.MultiDiscrete([3, 3])
actions = su.torch_point(space, space.sample()) # single sampler
actions = actions.unsqueeze(0).unsqueeze(0) # add [step, sampler]
flat_actions = su.flatten(space, actions)
al = su.action_list(space, flat_actions)
assert len(al) == 1
assert len(al[0]) == 2
space = gyms.Tuple([gyms.MultiDiscrete([3, 3]), gyms.Discrete(2)])
actions = su.torch_point(space, space.sample()) # single sampler
actions = (
actions[0].unsqueeze(0).unsqueeze(0),
torch.tensor(actions[1]).unsqueeze(0).unsqueeze(0),
) # add [step, sampler]
flat_actions = su.flatten(space, actions)
al = su.action_list(space, flat_actions)
assert len(al) == 1
assert len(al[0][0]) == 2
assert isinstance(al[0][1], int)
space = gyms.Dict(
{"tuple": gyms.MultiDiscrete([3, 3]), "scalar": gyms.Discrete(2)}
)
actions = su.torch_point(space, space.sample()) # single sampler
actions = OrderedDict(
[
("tuple", actions["tuple"].unsqueeze(0).unsqueeze(0)),
("scalar", torch.tensor(actions["scalar"]).unsqueeze(0).unsqueeze(0)),
]
)
flat_actions = su.flatten(space, actions)
al = su.action_list(space, flat_actions)
assert len(al) == 1
assert len(al[0]["tuple"]) == 2
assert isinstance(al[0]["scalar"], int)
if __name__ == "__main__":
TestSpaces().test_conversion() # type:ignore
TestSpaces().test_flatten() # type:ignore
TestSpaces().test_batched() # type:ignore
TestSpaces().test_tolist() # type:ignore
================================================
FILE: tests/vision/__init__.py
================================================
================================================
FILE: tests/vision/test_pillow_rescaling.py
================================================
import hashlib
import os
import imageio
import numpy as np
from torchvision.transforms import transforms
from allenact.utils.tensor_utils import ScaleBothSides
from constants import ABS_PATH_OF_TOP_LEVEL_DIR
to_pil = transforms.ToPILImage() # Same as used by the vision sensors
class TestPillowRescaling(object):
def _load_thor_img(self) -> np.ndarray:
img_path = os.path.join(
ABS_PATH_OF_TOP_LEVEL_DIR, "docs/img/iTHOR_framework.jpg"
)
img = imageio.v2.imread(img_path)
return img
def _get_img_hash(self, img: np.ndarray) -> str:
img_hash = hashlib.sha1(np.ascontiguousarray(img))
return img_hash.hexdigest()
def _random_rgb_image(self, width: int, height: int, seed: int) -> np.ndarray:
s = np.random.get_state()
np.random.seed(seed)
img = np.random.randint(
low=0, high=256, size=(width, height, 3), dtype=np.uint8
)
np.random.set_state(s)
return img
def _random_depthmap(
self, width: int, height: int, max_depth: float, seed: int
) -> np.ndarray:
s = np.random.get_state()
np.random.seed(seed)
img = max_depth * np.random.rand(width, height, 1)
np.random.set_state(s)
return np.float32(img)
def test_scaler_rgb_thor(self):
thor_img_arr = np.uint8(self._load_thor_img())
assert self._get_img_hash(thor_img_arr) in [
"80ff8a342b4f74966796eee91babde31409d0457",
"eb808b2218ccc2e56144131f9ef596a5c2ae3e2a",
]
img = to_pil(thor_img_arr)
scaler = ScaleBothSides(width=75, height=75)
scaled_img = np.array(scaler(img))
assert self._get_img_hash(scaled_img) in [
"2c47057aa188240cb21b2edc39e0f269c1085bac",
"b5df3cc03f181cb7be07ddd229cac8d1efd5d077",
]
scaler = ScaleBothSides(width=500, height=600)
scaled_img = np.array(scaler(img))
assert self._get_img_hash(scaled_img) in [
"faf0be2b9ec9bfd23a1b7b465c86ad961d03c259",
"cccddd7f17b59434dcdd0006dceeffbe1a969dc8",
]
def test_scaler_rgb_random(self):
arr = self._random_rgb_image(width=100, height=100, seed=1)
assert self._get_img_hash(arr) == "d01bd8ba151ab790fde9a8cc29aa8a3c63147334"
img = to_pil(arr)
scaler = ScaleBothSides(width=60, height=60)
scaled_img = np.array(scaler(img))
assert (
self._get_img_hash(scaled_img) == "22473537e50d5e39abeeec4f92dbfde51c754010"
)
scaler = ScaleBothSides(width=1000, height=800)
scaled_img = np.array(scaler(img))
assert (
self._get_img_hash(scaled_img) == "5e5b955981e4ee3b5e22287536040d001a31fbd3"
)
def test_scaler_depth_thor(self):
thor_depth_arr = 5 * np.float32(self._load_thor_img()).sum(-1)
thor_depth_arr /= thor_depth_arr.max()
assert self._get_img_hash(thor_depth_arr) in [
"d3c1474400ba57ed78f52cf4ba6a4c2a1d90516c",
"85a18befb2a174403079bf49d149630f829222c2",
]
img = to_pil(thor_depth_arr)
scaler = ScaleBothSides(width=75, height=75)
scaled_img = np.array(scaler(img))
assert self._get_img_hash(scaled_img) in [
"6a879beb6bed49021e438c1e3af7a62c428a44d8",
"868f1d2b32167bda524ba502158f1ee81c8a24d2",
]
scaler = ScaleBothSides(width=500, height=600)
scaled_img = np.array(scaler(img))
assert self._get_img_hash(scaled_img) in [
"79f11fb741ae638afca40125e4c501f54b22cc01",
"2d3012e1cced2942f7368e84bf332241fcf9d7fe",
]
def test_scaler_depth_random(self):
depth_arr = self._random_depthmap(width=96, height=103, max_depth=5.0, seed=1)
assert (
self._get_img_hash(depth_arr) == "cbd8ca127951ffafb6848536d9d731970a5397e9"
)
img = to_pil(depth_arr)
scaler = ScaleBothSides(width=60, height=60)
scaled_img = np.array(scaler(img))
assert (
self._get_img_hash(scaled_img) == "5bed173f2d783fb2badcde9b43904ef85a1a5820"
)
scaler = ScaleBothSides(width=1000, height=800)
scaled_img = np.array(scaler(img))
assert (
self._get_img_hash(scaled_img) == "9dceb7f77d767888f24a84c00913c0cf4ccd9d49"
)
if __name__ == "__main__":
TestPillowRescaling().test_scaler_rgb_thor()
TestPillowRescaling().test_scaler_rgb_random()
TestPillowRescaling().test_scaler_depth_thor()
TestPillowRescaling().test_scaler_depth_random()