Repository: allenai/allenact
Branch: main
Commit: d055fc9d4533
Files: 402
Total size: 2.0 MB

Directory structure:
gitextract_rp45h8jw/

├── .VERSION
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   ├── feature_request.md
│   │   └── support_request.md
│   └── workflows/
│       ├── black.yml
│       ├── codeql.yml
│       ├── publish.yml
│       └── pytest.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CNAME
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── ROADMAP.md
├── allenact/
│   ├── __init__.py
│   ├── _constants.py
│   ├── algorithms/
│   │   ├── __init__.py
│   │   ├── offpolicy_sync/
│   │   │   ├── __init__.py
│   │   │   └── losses/
│   │   │       ├── __init__.py
│   │   │       └── abstract_offpolicy_loss.py
│   │   └── onpolicy_sync/
│   │       ├── __init__.py
│   │       ├── engine.py
│   │       ├── losses/
│   │       │   ├── __init__.py
│   │       │   ├── a2cacktr.py
│   │       │   ├── abstract_loss.py
│   │       │   ├── grouped_action_imitation.py
│   │       │   ├── imitation.py
│   │       │   └── ppo.py
│   │       ├── misc.py
│   │       ├── policy.py
│   │       ├── runner.py
│   │       ├── storage.py
│   │       └── vector_sampled_tasks.py
│   ├── base_abstractions/
│   │   ├── __init__.py
│   │   ├── callbacks.py
│   │   ├── distributions.py
│   │   ├── experiment_config.py
│   │   ├── misc.py
│   │   ├── preprocessor.py
│   │   ├── sensor.py
│   │   └── task.py
│   ├── embodiedai/
│   │   ├── __init__.py
│   │   ├── aux_losses/
│   │   │   ├── __init__.py
│   │   │   └── losses.py
│   │   ├── mapping/
│   │   │   ├── __init__.py
│   │   │   ├── mapping_losses.py
│   │   │   ├── mapping_models/
│   │   │   │   ├── __init__.py
│   │   │   │   └── active_neural_slam.py
│   │   │   └── mapping_utils/
│   │   │       ├── __init__.py
│   │   │       ├── map_builders.py
│   │   │       └── point_cloud_utils.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── aux_models.py
│   │   │   ├── basic_models.py
│   │   │   ├── fusion_models.py
│   │   │   ├── resnet.py
│   │   │   └── visual_nav_models.py
│   │   ├── preprocessors/
│   │   │   ├── __init__.py
│   │   │   └── resnet.py
│   │   ├── sensors/
│   │   │   ├── __init__.py
│   │   │   └── vision_sensors.py
│   │   └── storage/
│   │       ├── __init__.py
│   │       └── vdr_storage.py
│   ├── main.py
│   ├── setup.py
│   └── utils/
│       ├── __init__.py
│       ├── cache_utils.py
│       ├── cacheless_frcnn.py
│       ├── experiment_utils.py
│       ├── inference.py
│       ├── misc_utils.py
│       ├── model_utils.py
│       ├── multi_agent_viz_utils.py
│       ├── spaces_utils.py
│       ├── system.py
│       ├── tensor_utils.py
│       └── viz_utils.py
├── allenact_plugins/
│   ├── __init__.py
│   ├── babyai_plugin/
│   │   ├── __init__.py
│   │   ├── babyai_constants.py
│   │   ├── babyai_models.py
│   │   ├── babyai_tasks.py
│   │   ├── configs/
│   │   │   └── __init__.py
│   │   ├── data/
│   │   │   └── __init__.py
│   │   ├── extra_environment.yml
│   │   ├── extra_requirements.txt
│   │   └── scripts/
│   │       ├── __init__.py
│   │       ├── download_babyai_expert_demos.py
│   │       ├── get_instr_length_percentiles.py
│   │       └── truncate_expert_demos.py
│   ├── clip_plugin/
│   │   ├── __init__.py
│   │   ├── clip_preprocessors.py
│   │   ├── extra_environment.yml
│   │   └── extra_requirements.txt
│   ├── gym_plugin/
│   │   ├── __init__.py
│   │   ├── extra_environment.yml
│   │   ├── extra_requirements.txt
│   │   ├── gym_distributions.py
│   │   ├── gym_environment.py
│   │   ├── gym_models.py
│   │   ├── gym_sensors.py
│   │   └── gym_tasks.py
│   ├── habitat_plugin/
│   │   ├── __init__.py
│   │   ├── data/
│   │   │   └── __init__.py
│   │   ├── extra_environment.yml
│   │   ├── extra_environment_headless.yml
│   │   ├── extra_requirements.txt
│   │   ├── habitat_constants.py
│   │   ├── habitat_environment.py
│   │   ├── habitat_preprocessors.py
│   │   ├── habitat_sensors.py
│   │   ├── habitat_task_samplers.py
│   │   ├── habitat_tasks.py
│   │   ├── habitat_utils.py
│   │   └── scripts/
│   │       ├── __init__.py
│   │       ├── agent_demo.py
│   │       └── make_map.py
│   ├── ithor_plugin/
│   │   ├── __init__.py
│   │   ├── extra_environment.yml
│   │   ├── extra_requirements.txt
│   │   ├── ithor_constants.py
│   │   ├── ithor_environment.py
│   │   ├── ithor_sensors.py
│   │   ├── ithor_task_samplers.py
│   │   ├── ithor_tasks.py
│   │   ├── ithor_util.py
│   │   ├── ithor_viz.py
│   │   └── scripts/
│   │       ├── __init__.py
│   │       ├── make_objectnav_debug_dataset.py
│   │       └── make_pointnav_debug_dataset.py
│   ├── lighthouse_plugin/
│   │   ├── __init__.py
│   │   ├── configs/
│   │   │   └── __init__.py
│   │   ├── data/
│   │   │   └── __init__.py
│   │   ├── extra_environment.yml
│   │   ├── extra_requirements.txt
│   │   ├── lighthouse_environment.py
│   │   ├── lighthouse_models.py
│   │   ├── lighthouse_sensors.py
│   │   ├── lighthouse_tasks.py
│   │   ├── lighthouse_util.py
│   │   └── scripts/
│   │       └── __init__.py
│   ├── manipulathor_plugin/
│   │   ├── __init__.py
│   │   ├── arm_calculation_utils.py
│   │   ├── armpointnav_constants.py
│   │   ├── manipulathor_constants.py
│   │   ├── manipulathor_environment.py
│   │   ├── manipulathor_sensors.py
│   │   ├── manipulathor_task_samplers.py
│   │   ├── manipulathor_tasks.py
│   │   ├── manipulathor_utils.py
│   │   └── manipulathor_viz.py
│   ├── minigrid_plugin/
│   │   ├── __init__.py
│   │   ├── configs/
│   │   │   ├── __init__.py
│   │   │   └── minigrid_nomemory.py
│   │   ├── data/
│   │   │   └── __init__.py
│   │   ├── extra_environment.yml
│   │   ├── extra_requirements.txt
│   │   ├── minigrid_environments.py
│   │   ├── minigrid_models.py
│   │   ├── minigrid_offpolicy.py
│   │   ├── minigrid_sensors.py
│   │   ├── minigrid_tasks.py
│   │   └── scripts/
│   │       └── __init__.py
│   ├── navigation_plugin/
│   │   ├── __init__.py
│   │   ├── objectnav/
│   │   │   ├── __init__.py
│   │   │   └── models.py
│   │   └── pointnav/
│   │       ├── __init__.py
│   │       └── models.py
│   ├── robothor_plugin/
│   │   ├── __init__.py
│   │   ├── configs/
│   │   │   └── __init__.py
│   │   ├── extra_environment.yml
│   │   ├── extra_requirements.txt
│   │   ├── robothor_constants.py
│   │   ├── robothor_distributions.py
│   │   ├── robothor_environment.py
│   │   ├── robothor_models.py
│   │   ├── robothor_preprocessors.py
│   │   ├── robothor_sensors.py
│   │   ├── robothor_task_samplers.py
│   │   ├── robothor_tasks.py
│   │   ├── robothor_viz.py
│   │   └── scripts/
│   │       ├── __init__.py
│   │       ├── make_objectnav_debug_dataset.py
│   │       └── make_pointnav_debug_dataset.py
│   └── setup.py
├── conda/
│   ├── environment-10.1.yml
│   ├── environment-10.2.yml
│   ├── environment-11.1.yml
│   ├── environment-9.2.yml
│   ├── environment-base.yml
│   ├── environment-cpu.yml
│   └── environment-dev.yml
├── constants.py
├── datasets/
│   ├── .gitignore
│   ├── .habitat_datasets_download_info.json
│   ├── .habitat_downloader_helper.py
│   ├── download_habitat_datasets.sh
│   └── download_navigation_datasets.sh
├── dev_requirements.txt
├── docs/
│   ├── .gitignore
│   ├── CNAME
│   ├── FAQ.md
│   ├── css/
│   │   └── extra.css
│   ├── getting_started/
│   │   ├── abstractions.md
│   │   ├── running-your-first-experiment.md
│   │   └── structure.md
│   ├── howtos/
│   │   ├── changing-rewards-and-losses.md
│   │   ├── defining-a-new-model.md
│   │   ├── defining-a-new-task.md
│   │   ├── defining-a-new-training-pipeline.md
│   │   ├── defining-an-experiment.md
│   │   ├── running-a-multi-agent-experiment.md
│   │   └── visualizing-results.md
│   ├── installation/
│   │   ├── download-datasets.md
│   │   ├── installation-allenact.md
│   │   └── installation-framework.md
│   ├── javascripts/
│   │   └── extra.js
│   ├── notebooks/
│   │   └── firstbook.md
│   ├── projects/
│   │   ├── advisor_2020/
│   │   │   └── README.md
│   │   ├── babyai_baselines/
│   │   │   └── README.md
│   │   ├── gym_baselines/
│   │   │   └── README.md
│   │   ├── objectnav_baselines/
│   │   │   └── README.md
│   │   ├── pointnav_baselines/
│   │   │   └── README.md
│   │   └── two_body_problem_2019/
│   │       └── README.md
│   └── tutorials/
│       ├── distributed-objectnav-tutorial.md
│       ├── gym-mujoco-tutorial.md
│       ├── gym-tutorial.md
│       ├── index.md
│       ├── minigrid-tutorial.md
│       ├── offpolicy-tutorial.md
│       ├── running-inference-on-a-pretrained-model.md
│       ├── training-a-pointnav-model.md
│       ├── training-pipelines.md
│       └── transfering-to-a-different-environment-framework.md
├── main.py
├── mkdocs.yml
├── mypy.ini
├── overrides/
│   └── main.html
├── pretrained_model_ckpts/
│   ├── .gitignore
│   └── download_navigation_model_ckpts.sh
├── projects/
│   ├── __init__.py
│   ├── babyai_baselines/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── experiments/
│   │       ├── __init__.py
│   │       ├── base.py
│   │       ├── go_to_local/
│   │       │   ├── __init__.py
│   │       │   ├── a2c.py
│   │       │   ├── base.py
│   │       │   ├── bc.py
│   │       │   ├── bc_teacher_forcing.py
│   │       │   ├── dagger.py
│   │       │   ├── distributed_bc_offpolicy.py
│   │       │   ├── distributed_bc_teacher_forcing.py
│   │       │   └── ppo.py
│   │       └── go_to_obj/
│   │           ├── __init__.py
│   │           ├── a2c.py
│   │           ├── base.py
│   │           ├── bc.py
│   │           ├── bc_teacher_forcing.py
│   │           ├── dagger.py
│   │           └── ppo.py
│   ├── gym_baselines/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── experiments/
│   │   │   ├── __init__.py
│   │   │   ├── gym_base.py
│   │   │   ├── gym_humanoid_base.py
│   │   │   ├── gym_humanoid_ddppo.py
│   │   │   ├── gym_mujoco_base.py
│   │   │   ├── gym_mujoco_ddppo.py
│   │   │   └── mujoco/
│   │   │       ├── __init__.py
│   │   │       ├── gym_mujoco_ant_ddppo.py
│   │   │       ├── gym_mujoco_halfcheetah_ddppo.py
│   │   │       ├── gym_mujoco_hopper_ddppo.py
│   │   │       ├── gym_mujoco_humanoid_ddppo.py
│   │   │       ├── gym_mujoco_inverteddoublependulum_ddppo.py
│   │   │       ├── gym_mujoco_invertedpendulum_ddppo.py
│   │   │       ├── gym_mujoco_reacher_ddppo.py
│   │   │       ├── gym_mujoco_swimmer_ddppo.py
│   │   │       └── gym_mujoco_walker2d_ddppo.py
│   │   └── models/
│   │       ├── __init__.py
│   │       └── gym_models.py
│   ├── manipulathor_baselines/
│   │   ├── __init__.py
│   │   └── armpointnav_baselines/
│   │       ├── __init__.py
│   │       ├── experiments/
│   │       │   ├── __init__.py
│   │       │   ├── armpointnav_base.py
│   │       │   ├── armpointnav_mixin_ddppo.py
│   │       │   ├── armpointnav_mixin_simplegru.py
│   │       │   ├── armpointnav_thor_base.py
│   │       │   └── ithor/
│   │       │       ├── __init__.py
│   │       │       ├── armpointnav_depth.py
│   │       │       ├── armpointnav_disjoint_depth.py
│   │       │       ├── armpointnav_ithor_base.py
│   │       │       ├── armpointnav_no_vision.py
│   │       │       ├── armpointnav_rgb.py
│   │       │       └── armpointnav_rgbdepth.py
│   │       └── models/
│   │           ├── __init__.py
│   │           ├── arm_pointnav_models.py
│   │           ├── base_models.py
│   │           ├── disjoint_arm_pointnav_models.py
│   │           └── manipulathor_net_utils.py
│   ├── objectnav_baselines/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── experiments/
│   │   │   ├── __init__.py
│   │   │   ├── clip/
│   │   │   │   ├── __init__.py
│   │   │   │   └── mixins.py
│   │   │   ├── habitat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── clip/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── objectnav_habitat_rgb_clipresnet50gru_ddppo.py
│   │   │   │   │   └── objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py
│   │   │   │   └── objectnav_habitat_base.py
│   │   │   ├── ithor/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── objectnav_ithor_base.py
│   │   │   │   ├── objectnav_ithor_depth_resnet18gru_ddppo.py
│   │   │   │   ├── objectnav_ithor_rgb_resnet18gru_ddppo.py
│   │   │   │   └── objectnav_ithor_rgbd_resnet18gru_ddppo.py
│   │   │   ├── objectnav_base.py
│   │   │   ├── objectnav_thor_base.py
│   │   │   └── robothor/
│   │   │       ├── __init__.py
│   │   │       ├── beta/
│   │   │       │   ├── README.md
│   │   │       │   ├── __init__.py
│   │   │       │   ├── objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.py
│   │   │       │   └── objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py
│   │   │       ├── clip/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── objectnav_robothor_rgb_clipresnet50gru_ddppo.py
│   │   │       │   └── objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py
│   │   │       ├── objectnav_robothor_base.py
│   │   │       ├── objectnav_robothor_depth_resnet18gru_ddppo.py
│   │   │       ├── objectnav_robothor_rgb_resnet18gru_dagger.py
│   │   │       ├── objectnav_robothor_rgb_resnet18gru_ddppo.py
│   │   │       ├── objectnav_robothor_rgb_resnet50gru_ddppo.py
│   │   │       ├── objectnav_robothor_rgb_unfrozenresnet18gru_ddppo.py
│   │   │       └── objectnav_robothor_rgbd_resnet18gru_ddppo.py
│   │   └── mixins.py
│   ├── pointnav_baselines/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── experiments/
│   │   │   ├── __init__.py
│   │   │   ├── habitat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── clip/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── pointnav_habitat_rgb_clipresnet50gru_ddppo.py
│   │   │   │   ├── pointnav_habitat_base.py
│   │   │   │   ├── pointnav_habitat_depth_simpleconvgru_ddppo.py
│   │   │   │   ├── pointnav_habitat_rgb_simpleconvgru_ddppo.py
│   │   │   │   └── pointnav_habitat_rgbd_simpleconvgru_ddppo.py
│   │   │   ├── ithor/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── pointnav_ithor_base.py
│   │   │   │   ├── pointnav_ithor_depth_simpleconvgru_ddppo.py
│   │   │   │   ├── pointnav_ithor_rgb_simpleconvgru_ddppo.py
│   │   │   │   └── pointnav_ithor_rgbd_simpleconvgru_ddppo.py
│   │   │   ├── pointnav_base.py
│   │   │   ├── pointnav_thor_base.py
│   │   │   └── robothor/
│   │   │       ├── __init__.py
│   │   │       ├── pointnav_robothor_base.py
│   │   │       ├── pointnav_robothor_depth_simpleconvgru_ddppo.py
│   │   │       ├── pointnav_robothor_rgb_simpleconvgru_ddppo.py
│   │   │       └── pointnav_robothor_rgbd_simpleconvgru_ddppo.py
│   │   └── mixins.py
│   └── tutorials/
│       ├── __init__.py
│       ├── distributed_objectnav_tutorial.py
│       ├── gym_mujoco_tutorial.py
│       ├── gym_tutorial.py
│       ├── minigrid_offpolicy_tutorial.py
│       ├── minigrid_tutorial.py
│       ├── minigrid_tutorial_conds.py
│       ├── navtopartner_robothor_rgb_ppo.py
│       ├── object_nav_ithor_dagger_then_ppo_one_object.py
│       ├── object_nav_ithor_dagger_then_ppo_one_object_viz.py
│       ├── object_nav_ithor_ppo_one_object.py
│       ├── pointnav_habitat_rgb_ddppo.py
│       ├── pointnav_ithor_rgb_ddppo.py
│       ├── running_inference_tutorial.py
│       └── training_a_pointnav_model.py
├── requirements.txt
├── scripts/
│   ├── auto_format.sh
│   ├── build_docs.py
│   ├── build_docs.sh
│   ├── dcommand.py
│   ├── dconfig.py
│   ├── dkill.py
│   ├── dmain.py
│   ├── literate.py
│   ├── release.py
│   ├── run_tests.sh
│   └── startx.py
└── tests/
    ├── .gitignore
    ├── __init__.py
    ├── hierarchical_policies/
    │   ├── __init__.py
    │   └── test_minigrid_conditional.py
    ├── manipulathor_plugin/
    │   ├── __init__.py
    │   └── test_utils.py
    ├── mapping/
    │   ├── __init__.py
    │   └── test_ai2thor_mapping.py
    ├── multiprocessing/
    │   ├── __init__.py
    │   └── test_frozen_attribs.py
    ├── sync_algs_cpu/
    │   ├── __init__.py
    │   └── test_to_to_obj_trains.py
    ├── utils/
    │   ├── __init__.py
    │   ├── test_inference_agent.py
    │   └── test_spaces.py
    └── vision/
        ├── __init__.py
        └── test_pillow_rescaling.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .VERSION
================================================
0.5.5a


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug
assignees: ''

---

## Problem

A clear and concise description of what the bug is.

## Steps to reproduce

Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error

## Expected behavior

A clear and concise description of what you expected to happen.

## Screenshots

If applicable, add screenshots to help explain your problem.

## Desktop

Please add the following information:
 - OS: [e.g. Ubuntu 16.04.5]
 - AllenAct Version: [e.g. current HEAD of master or v0.1.0]

## Additional context

Add any other context about the problem here.


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an enhancement or a new feature
title: ''
labels: enhancement
assignees: ''

---

## Problem

Is your feature request related to a problem? Please provide a clear and concise description of what the problem is:

E.g. I would really like to have better support for my favorite environment X.

## Desired solution

A clear and concise description of what you want to happen.

## Alternative solutions

A description of any alternative solutions or features you've considered.

## Additional context

Add any other context or screenshots about the feature request here.


================================================
FILE: .github/ISSUE_TEMPLATE/support_request.md
================================================
---
name: Support request
about: Request support regarding AllenAct
title: ''
labels: ''
assignees: ''

---

## Problem / Question

What do you need help with? E.g. "I'm having trouble running model X" or "when I run command Y I get error Z."

## Additional context

_(Optional)_ - To provide support it's helpful to have as many details as possible, add additional context here.


================================================
FILE: .github/workflows/black.yml
================================================
name: Lint

on: [push, pull_request]

jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - uses: psf/black@stable


================================================
FILE: .github/workflows/codeql.yml
================================================
name: "CodeQL"

on:
  push:
    branches: [ "main" ]
  pull_request:
    branches: [ "main" ]
  schedule:
    - cron: "13 6 * * 4"

jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      security-events: write

    strategy:
      fail-fast: false
      matrix:
        language: [ python ]

    steps:
      - name: Checkout
        uses: actions/checkout@v3

      - name: Initialize CodeQL
        uses: github/codeql-action/init@v2
        with:
          languages: ${{ matrix.language }}
          queries: +security-and-quality

      - name: Autobuild
        uses: github/codeql-action/autobuild@v2

      - name: Perform CodeQL Analysis
        uses: github/codeql-action/analyze@v2
        with:
          category: "/language:${{ matrix.language }}"


================================================
FILE: .github/workflows/publish.yml
================================================
# This workflow will upload the allenact and allenact_plugins packages using Twine (after manually triggering it)
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries

name: Publish PYPI Packages

on:
  workflow_dispatch:

jobs:
  deploy:

    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v2
    - name: Set up Python
      uses: actions/setup-python@v2
      with:
        python-version: '3.7'
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install setuptools twine
    - name: Build and publish
      env:
        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
      run: |
        python scripts/release.py
        twine upload -u __token__ dist/*


================================================
FILE: .github/workflows/pytest.yml
================================================
name: PyTest

on: [push]

jobs:
  build:

    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: [3.9]

    steps:
    - uses: actions/checkout@v2

    - uses: ouzi-dev/commit-status-updater@v1.1.0 # Updates the commit status badge to pending

    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        python -m pip install pytest wandb
        python -m pip install --editable="./allenact"
        python -m pip install --editable="./allenact_plugins[all]"
        python -m pip install -e "git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai"
        python -m pip install compress_pickle # Needed for some mapping tests
        pip list

    - name: Test with pytest
      run: |
        pytest --capture=tee-sys tests

    - if: always() # Updates the commit status badge to the result of running the tests above
      uses: ouzi-dev/commit-status-updater@v1.1.0
      with:
        status: "${{ job.status }}"


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
docs/build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pycharm
.idea/

# pytorch
*.pt

# Default output dir
experiment_output
*_out

# PDFs
*.pdf

# PNGs
*.png

# Tensorboard logs
events.out.tfevents.*

# TSV files
*.tsv

# tmp directory
tmp/

# Pickle files
*.pkl
*.pkl.gz

# Zip files
*.zip

# VSCode
.vscode/

# MacOS
.DS_Store

# Docs
docs/index.md
docs/CONTRIBUTING.md
docs/LICENSE.md

# Metrics
metrics__*.json

# Robothor
allenact_plugins/robothor_plugin/data/*

# ithor
allenact_plugins/ithor_plugin/data/*

# Habitat
external_projects/habitat-lab

# Local pip installations
src
.pip_src

# Files created when running training
**/used_configs
*.patch

# Package building
*.egg_info
*.egg-info

# Additional allenact-specific locks and hidden files
*.allenact_last_start_time_string
*.allenact_start_time_string.lock
*.lock
rsync-*

================================================
FILE: .gitmodules
================================================
[submodule "projects/ithor_rearrangement"]
	path = projects/ithor_rearrangement
	url = https://github.com/allenai/ai2thor-rearrangement.git
	branch = active_neural_slam


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
-   repo: https://github.com/ambv/black
    rev: 19.10b0
    hooks:
    - id: black
      language_version: python3.7
-   repo: https://github.com/pre-commit/mirrors-mypy
    rev: 'v0.761'  # Use the sha / tag you want to point at
    hooks:
    -   id: mypy
        args: [--follow-imports=skip]

================================================
FILE: CNAME
================================================
www.allenact.org

================================================
FILE: CONTRIBUTING.md
================================================
# Contributing

We welcome contributions from the greater community. If you would like to make such a contributions we recommend first submitting an [issue](https://github.com/allenai/allenact/issues) describing your proposed improvement.
Doing so can ensure we can validate your suggestions before you spend a great deal of time
upon them. Improvements and bug fixes should be made via a pull request
from your fork of the repository at [https://github.com/allenai/allenact](https://github.com/allenai/allenact).
 
All code in pull requests should adhere to the following guidelines.

## Found a bug or want to suggest an enhancement?

Please submit an [issue](https://github.com/allenai/allenact/issues) in which you note the steps
to reproduce the bug or in which you detail the enhancement.

## Making a pull request?

When making a pull request we require that any code respects several guidelines detailed below.

### Auto-formatting

All python code in this repository should be formatted using [black](https://black.readthedocs.io/en/stable/).
To use `black` auto-formatting across all files, simply run
```bash
bash scripts/auto_format.sh
``` 
which will run `black` auto-formatting as well as [docformatter](https://pypi.org/project/docformatter/) (used
to auto-format documentation strings).

### Type-checking

Our code makes liberal use of type hints. If you have not had experience with type hinting in python we recommend
reading the [documentation](https://docs.python.org/3/library/typing.html) of the `typing` python module or the 
simplified introduction to type hints found [here](https://www.python.org/dev/peps/pep-0483/). All methods should
have typed arguments and output. Furthermore we use [mypy](https://mypy.readthedocs.io/en/stable/) to perform 
basic static type checking. Before making a pull request, there should be no warnings or errors when running
```bash
dmypy run -- --follow-imports=skip .
```
Explicitly ignoring type checking (for instance using `# type: ignore`) should be only be done when it would otherwise
be an extensive burden.

<!-- TODO: This should be updated given how we're changing depdencies in AllenAct.

### Updating, adding, or removing packages?

If you are updating, adding, or removing packages please run:
```bash
pipenv-setup sync --pipfile # Syncs packages to setup.py
pip freeze > requirements.txt # Syncs packages to requirements.py
``` 
before submitting a pull request. If you are not using `pipenv`, you are still
required to update the file `Pipfile` with newly installed or modified packages. Moreover
you must manually update the `install_requires` field of the `setup.py` file. 
-->

### Setting up pre-commit hooks (optional)

Pre-commit hooks check that, when you attempt to commit changes, your code adheres a number of
formatting and type-checking guidelines. Pull requests containing code not adhering to these 
guidelines will not be accepted and thus we recommend installing these pre-commit hooks. Assuming you have 
installed all of the project requirements, you can install our recommended
pre-commit hooks by running (from this project's root directory)
```bash
pre-commit install
```
After running the above, each time you run `git commit ...` a set of pre-commit checks will
be run.

================================================
FILE: LICENSE
================================================
MIT License

Original work Copyright (c) 2017 Ilya Kostrikov

Original work Copyright (c) Facebook, Inc. and its affiliates.

Modified work Copyright (c) 2020 Allen Institute for Artificial Intelligence

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
<div align="center">
    <img src="docs/img/AllenAct.svg" width="350" />
    <br>
    <i><h3>An open source framework for research in Embodied AI</h3></i>
    </p>
    <hr/>
</div>

[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE)
[![Documentation Status](https://img.shields.io/badge/docs-up%20to%20date-Green.svg)](https://allenact.org)
[![Latest Release](https://img.shields.io/github/v/release/allenai/allenact)](https://github.com/allenai/allenact/releases/latest)
[![Python 3.7](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/downloads/release/python-360/)
[![LGTM Grade: Python](https://img.shields.io/lgtm/grade/python/g/allenai/allenact.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/allenai/allenact/context:python)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

**AllenAct** is a modular and flexible learning framework designed with a focus on the unique requirements of Embodied-AI research. It provides first-class support for a growing collection of embodied environments, tasks and algorithms, provides reproductions of state-of-the-art models and includes extensive documentation, tutorials, start-up code, and pre-trained models.

AllenAct is built and backed by the [Allen Institute for AI (AI2)](https://allenai.org/). AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering.

## Quick Links

- [Website & Docs](https://www.allenact.org/)
- [Github](https://github.com/allenai/allenact)
- [Install](https://www.allenact.org/installation/installation-allenact/)
- [Tutorials](https://www.allenact.org/tutorials/)
- [AllenAct Paper](https://arxiv.org/abs/2008.12760)
- [Citation](#citation)

## Features & Highlights

* _Support for multiple environments_: Support for the [iTHOR](https://ai2thor.allenai.org/ithor/), [RoboTHOR](https://ai2thor.allenai.org/robothor/) and [Habitat](https://aihabitat.org/) embodied environments as well as for grid-worlds including [MiniGrid](https://github.com/maximecb/gym-minigrid).
* _Task Abstraction_: Tasks and environments are decoupled in AllenAct, enabling researchers to easily implement a large variety of tasks in the same environment.
* _Algorithms_: Support for a variety of on-policy algorithms including [PPO](https://arxiv.org/pdf/1707.06347.pdf), [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf), [A2C](https://arxiv.org/pdf/1611.05763.pdf), Imitation Learning and [DAgger](https://www.ri.cmu.edu/pub_files/2011/4/Ross-AISTATS11-NoRegret.pdf) as well as offline training such as offline IL.
* _Sequential Algorithms_: It is trivial to experiment with different sequences of training routines, which are often the key to successful policies.
* _Simultaneous Losses_: Easily combine various losses while training models (e.g. use an external self-supervised loss while optimizing a PPO loss).
* _Multi-agent support_: Support for multi-agent algorithms and tasks.
* _Visualizations_: Out of the box support to easily visualize first and third person views for agents as well as intermediate model tensors, integrated into Tensorboard.
* _Pre-trained models_: Code and models for a number of standard Embodied AI tasks.
* _Tutorials_: Start-up code and extensive tutorials to help ramp up to Embodied AI.
* _First-class PyTorch support_: One of the few RL frameworks to target PyTorch.
* _Arbitrary action spaces_: Supporting both discrete and continuous actions.

|Environments|Tasks|Algorithms|
|------------|-----|----------|
|[iTHOR](https://ai2thor.allenai.org/ithor/), [RoboTHOR](https://ai2thor.allenai.org/robothor/), [Habitat](https://aihabitat.org/), [MiniGrid](https://github.com/maximecb/gym-minigrid), [OpenAI Gym](https://gym.openai.com/)|[PointNav](https://arxiv.org/pdf/1807.06757.pdf), [ObjectNav](https://arxiv.org/pdf/2006.13171.pdf), [MiniGrid tasks](https://github.com/maximecb/gym-minigrid), [Gym Box2D tasks](https://gym.openai.com/envs/#box2d)|[A2C](https://arxiv.org/pdf/1611.05763.pdf), [PPO](https://arxiv.org/pdf/1707.06347.pdf), [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf), [DAgger](https://www.ri.cmu.edu/pub_files/2011/4/Ross-AISTATS11-NoRegret.pdf), Off-policy Imitation|

## Contributions
We welcome contributions from the greater community. If you would like to make such a contributions we recommend first submitting an [issue](https://github.com/allenai/allenact/issues) describing your proposed improvement. Doing so can ensure we can validate your suggestions before you spend a great deal of time upon them. Improvements and bug fixes should be made via a pull request from your fork of the repository at [https://github.com/allenai/allenact](https://github.com/allenai/allenact).

All code in this repository is subject to formatting, documentation, and type-annotation guidelines. For more details, please see the our [contribution guidelines](CONTRIBUTING.md).

## Acknowledgments
This work builds upon the [pytorch-a2c-ppo-acktr](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail) library of Ilya Kostrikov and uses some data structures from FAIR's [habitat-lab](https://github.com/facebookresearch/habitat-lab). We would like to thank Dustin Schwenk for his help for the public release of the framework.

## License
AllenAct is MIT licensed, as found in the [LICENSE](LICENSE) file.

## Team
AllenAct is an open-source project built by members of the PRIOR research group at the Allen Institute for Artificial Intelligence (AI2). 

<div align="left">
    <a href="//prior.allenai.org/" target="_blank">
        <img src="docs/img/ai2-prior.svg" width="400">
    </a>
    <br>
</div>

## Citation
If you use this work, please cite our [paper](https://arxiv.org/abs/2008.12760):

```bibtex
@article{AllenAct,
  author = {Luca Weihs and Jordi Salvador and Klemen Kotar and Unnat Jain and Kuo-Hao Zeng and Roozbeh Mottaghi and Aniruddha Kembhavi},
  title = {AllenAct: A Framework for Embodied AI Research},
  year = {2020},
  journal = {arXiv preprint arXiv:2008.12760},
}
```


================================================
FILE: ROADMAP.md
================================================
# Roadmap

Here we track new features/support to be added in the short/mid-term.  

## New environments
* [SAPIEN](https://sapien.ucsd.edu/)
* [ThreeDWorld](http://www.threedworld.org/)

## New tasks
* [Room-to-room navigation](https://arxiv.org/pdf/1711.07280.pdf)
* [Furniture Lifting](https://arxiv.org/abs/1904.05879) and [Furniture Moving](https://arxiv.org/abs/2007.04979)

## New training methods

* A3C
* Deep Q-Learning 

================================================
FILE: allenact/__init__.py
================================================
try:
    # noinspection PyProtectedMember,PyUnresolvedReferences
    from allenact._version import __version__
except ModuleNotFoundError:
    __version__ = None


================================================
FILE: allenact/_constants.py
================================================
import os
from pathlib import Path

ALLENACT_INSTALL_DIR = os.path.abspath(os.path.dirname(Path(__file__)))


================================================
FILE: allenact/algorithms/__init__.py
================================================


================================================
FILE: allenact/algorithms/offpolicy_sync/__init__.py
================================================


================================================
FILE: allenact/algorithms/offpolicy_sync/losses/__init__.py
================================================


================================================
FILE: allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss.py
================================================
"""Defining abstract loss classes for actor critic models."""

import abc
from typing import Dict, Tuple, TypeVar, Generic

import torch

from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.base_abstractions.misc import Loss, Memory

ModelType = TypeVar("ModelType")


class AbstractOffPolicyLoss(Generic[ModelType], Loss):
    """Abstract class representing an off-policy loss function used to train a
    model."""

    # noinspection PyMethodOverriding
    @abc.abstractmethod
    def loss(  # type: ignore
        self,
        *,  # No positional arguments
        step_count: int,
        model: ModelType,
        batch: ObservationType,
        memory: Memory,
        **kwargs,
    ) -> Tuple[torch.FloatTensor, Dict[str, float], Memory, int]:
        """Computes the loss.

        Loss after processing a batch of data with (part of) a model (possibly with memory).

        # Parameters

        model: model to run on data batch (both assumed to be on the same device)
        batch: data to use as input for model (already on the same device as model)
        memory: model memory before processing current data batch

        # Returns

        A tuple with:

        current_loss: total loss
        current_info: additional information about the current loss
        memory: model memory after processing current data batch
        bsize: batch size
        """
        raise NotImplementedError()


================================================
FILE: allenact/algorithms/onpolicy_sync/__init__.py
================================================


================================================
FILE: allenact/algorithms/onpolicy_sync/engine.py
================================================
"""Defines the reinforcement learning `OnPolicyRLEngine`."""

import datetime
import logging
import numbers
import os
import random
import time
import traceback
from functools import partial
from multiprocessing.context import BaseContext
from typing import Any, Dict, List, Optional, Sequence, Union, cast

import filelock
import torch
import torch.distributed as dist  # type: ignore
import torch.distributions  # type: ignore
import torch.multiprocessing as mp  # type: ignore
import torch.nn as nn
import torch.optim as optim

# noinspection PyProtectedMember
from torch._C._distributed_c10d import ReduceOp

from allenact.algorithms.onpolicy_sync.misc import TrackingInfo, TrackingInfoType
from allenact.base_abstractions.sensor import Sensor
from allenact.utils.misc_utils import str2bool
from allenact.utils.model_utils import md5_hash_of_state_dict

try:
    # noinspection PyProtectedMember,PyUnresolvedReferences
    from torch.optim.lr_scheduler import _LRScheduler
except (ImportError, ModuleNotFoundError):
    raise ImportError("`_LRScheduler` was not found in `torch.optim.lr_scheduler`")

from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel
from allenact.algorithms.onpolicy_sync.storage import (
    ExperienceStorage,
    MiniBatchStorageMixin,
    RolloutStorage,
    StreamingStorageMixin,
)
from allenact.algorithms.onpolicy_sync.vector_sampled_tasks import (
    COMPLETE_TASK_CALLBACK_KEY,
    COMPLETE_TASK_METRICS_KEY,
    SingleProcessVectorSampledTasks,
    VectorSampledTasks,
)
from allenact.base_abstractions.distributions import TeacherForcingDistr
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.misc import (
    ActorCriticOutput,
    GenericAbstractLoss,
    Memory,
    RLStepResult,
)
from allenact.utils import spaces_utils as su
from allenact.utils.experiment_utils import (
    LoggingPackage,
    PipelineStage,
    ScalarMeanTracker,
    StageComponent,
    TrainingPipeline,
    set_deterministic_cudnn,
    set_seed,
)
from allenact.utils.system import get_logger
from allenact.utils.tensor_utils import batch_observations, detach_recursively
from allenact.utils.viz_utils import VizSuite

try:
    # When debugging we don't want to timeout in the VectorSampledTasks

    # noinspection PyPackageRequirements
    import pydevd

    DEBUGGING = str2bool(os.getenv("ALLENACT_DEBUG", "true"))
except ImportError:
    DEBUGGING = str2bool(os.getenv("ALLENACT_DEBUG", "false"))

DEBUG_VST_TIMEOUT: Optional[int] = (lambda x: int(x) if x is not None else x)(
    os.getenv("ALLENACT_DEBUG_VST_TIMEOUT", None)
)

TRAIN_MODE_STR = "train"
VALID_MODE_STR = "valid"
TEST_MODE_STR = "test"


class OnPolicyRLEngine(object):
    """The reinforcement learning primary controller.

    This `OnPolicyRLEngine` class handles all training, validation, and
    testing as well as logging and checkpointing. You are not expected
    to instantiate this class yourself, instead you should define an
    experiment which will then be used to instantiate an
    `OnPolicyRLEngine` and perform any desired tasks.
    """

    def __init__(
        self,
        experiment_name: str,
        config: ExperimentConfig,
        results_queue: mp.Queue,  # to output aggregated results
        checkpoints_queue: Optional[
            mp.Queue
        ],  # to write/read (trainer/evaluator) ready checkpoints
        checkpoints_dir: str,
        mode: str = "train",
        callback_sensors: Optional[Sequence[Sensor]] = None,
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        mp_ctx: Optional[BaseContext] = None,
        worker_id: int = 0,
        num_workers: int = 1,
        device: Union[str, torch.device, int] = "cpu",
        distributed_ip: str = "127.0.0.1",
        distributed_port: int = 0,
        deterministic_agents: bool = False,
        max_sampler_processes_per_worker: Optional[int] = None,
        initial_model_state_dict: Optional[Union[Dict[str, Any], int]] = None,
        try_restart_after_task_error: bool = False,
        **kwargs,
    ):
        """Initializer.

        # Parameters

        config : The ExperimentConfig defining the experiment to run.
        output_dir : Root directory at which checkpoints and logs should be saved.
        seed : Seed used to encourage deterministic behavior (it is difficult to ensure
            completely deterministic behavior due to CUDA issues and nondeterminism
            in environments).
        mode : "train", "valid", or "test".
        deterministic_cudnn : Whether to use deterministic cudnn. If `True` this may lower
            training performance this is necessary (but not sufficient) if you desire
            deterministic behavior.
        extra_tag : An additional label to add to the experiment when saving tensorboard logs.
        """
        self.config = config
        self.results_queue = results_queue
        self.checkpoints_queue = checkpoints_queue
        self.mp_ctx = mp_ctx
        self.checkpoints_dir = checkpoints_dir
        self.worker_id = worker_id
        self.num_workers = num_workers
        self.device = torch.device("cpu") if device == -1 else torch.device(device)  # type: ignore

        if self.device != torch.device("cpu"):
            torch.cuda.set_device(device)

        self.distributed_ip = distributed_ip
        self.distributed_port = distributed_port
        self.try_restart_after_task_error = try_restart_after_task_error

        self.mode = mode.lower().strip()
        assert self.mode in [
            TRAIN_MODE_STR,
            VALID_MODE_STR,
            TEST_MODE_STR,
        ], f"Only {TRAIN_MODE_STR}, {VALID_MODE_STR}, {TEST_MODE_STR}, modes supported"

        self.callback_sensors = callback_sensors
        self.deterministic_cudnn = deterministic_cudnn
        if self.deterministic_cudnn:
            set_deterministic_cudnn()

        self.seed = seed
        set_seed(self.seed)

        self.experiment_name = experiment_name

        assert (
            max_sampler_processes_per_worker is None
            or max_sampler_processes_per_worker >= 1
        ), "`max_sampler_processes_per_worker` must be either `None` or a positive integer."
        self.max_sampler_processes_per_worker = max_sampler_processes_per_worker

        machine_params = config.machine_params(self.mode)
        self.machine_params: MachineParams
        if isinstance(machine_params, MachineParams):
            self.machine_params = machine_params
        else:
            self.machine_params = MachineParams(**machine_params)

        self.num_samplers_per_worker = self.machine_params.nprocesses
        self.num_samplers = self.num_samplers_per_worker[self.worker_id]

        self._vector_tasks: Optional[
            Union[VectorSampledTasks, SingleProcessVectorSampledTasks]
        ] = None

        self.sensor_preprocessor_graph = None
        self.actor_critic: Optional[ActorCriticModel] = None

        create_model_kwargs = {}
        if self.machine_params.sensor_preprocessor_graph is not None:
            self.sensor_preprocessor_graph = (
                self.machine_params.sensor_preprocessor_graph.to(self.device)
            )
            create_model_kwargs["sensor_preprocessor_graph"] = (
                self.sensor_preprocessor_graph
            )

        set_seed(self.seed)
        self.actor_critic = cast(
            ActorCriticModel,
            self.config.create_model(**create_model_kwargs),
        ).to(self.device)

        if initial_model_state_dict is not None:
            if isinstance(initial_model_state_dict, int):
                assert (
                    md5_hash_of_state_dict(self.actor_critic.state_dict())
                    == initial_model_state_dict
                ), (
                    f"Could not reproduce the correct model state dict on worker {self.worker_id} despite seeding."
                    f" Please ensure that your model's initialization is reproducable when `set_seed(...)`"
                    f"] has been called with a fixed seed before initialization."
                )
            else:
                self.actor_critic.load_state_dict(
                    state_dict=cast(
                        "OrderedDict[str, Tensor]", initial_model_state_dict
                    )
                )
        else:
            assert mode != TRAIN_MODE_STR or self.num_workers == 1, (
                "When training with multiple workers you must pass a,"
                " non-`None` value for the `initial_model_state_dict` argument."
            )

        if get_logger().level == logging.DEBUG:
            model_hash = md5_hash_of_state_dict(self.actor_critic.state_dict())
            get_logger().debug(
                f"[{self.mode} worker {self.worker_id}] model weights hash: {model_hash}"
            )

        self.is_distributed = False
        self.store: Optional[torch.distributed.TCPStore] = None  # type:ignore
        if self.num_workers > 1:
            self.store = torch.distributed.TCPStore(  # type:ignore
                host_name=self.distributed_ip,
                port=self.distributed_port,
                world_size=self.num_workers,
                is_master=self.worker_id == 0,
                timeout=datetime.timedelta(
                    seconds=3 * (DEBUG_VST_TIMEOUT if DEBUGGING else 1 * 60) + 300
                ),
            )
            cpu_device = self.device == torch.device("cpu")  # type:ignore

            # "gloo" required during testing to ensure that `barrier()` doesn't time out.
            backend = "gloo" if cpu_device or self.mode == TEST_MODE_STR else "nccl"
            get_logger().debug(
                f"Worker {self.worker_id}: initializing distributed {backend} backend with device {self.device}."
            )
            dist.init_process_group(  # type:ignore
                backend=backend,
                store=self.store,
                rank=self.worker_id,
                world_size=self.num_workers,
                # During testing, we sometimes found that default timeout was too short
                # resulting in the run terminating surprisingly, we increase it here.
                timeout=(
                    datetime.timedelta(minutes=3000)
                    if (self.mode == TEST_MODE_STR or DEBUGGING)
                    else dist.default_pg_timeout
                ),
            )
            self.is_distributed = True

        self.deterministic_agents = deterministic_agents

        self._is_closing: bool = (
            False  # Useful for letting the RL runner know if this is closing
        )
        self._is_closed: bool = False

        # Keeping track of metrics and losses during training/inference
        self.single_process_metrics: List = []
        self.single_process_task_callback_data: List = []
        self.tracking_info_list: List[TrackingInfo] = []

        # Variables that wil only be instantiated in the trainer
        self.optimizer: Optional[optim.optimizer.Optimizer] = None
        # noinspection PyProtectedMember
        self.lr_scheduler: Optional[_LRScheduler] = None
        self.insufficient_data_for_update: Optional[torch.distributed.PrefixStore] = (
            None
        )

        # Training pipeline will be instantiated during training and inference.
        # During inference however, it will be instantiated anew on each run of `run_eval`
        # and will be set to `None` after the eval run is complete.
        self.training_pipeline: Optional[TrainingPipeline] = None

    @property
    def vector_tasks(
        self,
    ) -> Union[VectorSampledTasks, SingleProcessVectorSampledTasks]:
        if self._vector_tasks is None and self.num_samplers > 0:
            if self.is_distributed:
                total_processes = sum(
                    self.num_samplers_per_worker
                )  # TODO this will break the fixed seed for multi-device test
            else:
                total_processes = self.num_samplers

            seeds = self.worker_seeds(
                total_processes,
                initial_seed=self.seed,  # do not update the RNG state (creation might happen after seed resetting)
            )

            # TODO: The `self.max_sampler_processes_per_worker == 1` case below would be
            #   great to have but it does not play nicely with us wanting to kill things
            #   using SIGTERM/SIGINT signals. Would be nice to figure out a solution to
            #   this at some point.
            # if self.max_sampler_processes_per_worker == 1:
            #     # No need to instantiate a new task sampler processes if we're
            #     # restricted to one sampler process for this worker.
            #     self._vector_tasks = SingleProcessVectorSampledTasks(
            #         make_sampler_fn=self.config.make_sampler_fn,
            #         sampler_fn_args_list=self.get_sampler_fn_args(seeds),
            #     )
            # else:
            self._vector_tasks = VectorSampledTasks(
                make_sampler_fn=self.config.make_sampler_fn,
                sampler_fn_args=self.get_sampler_fn_args(seeds),
                callback_sensors=self.callback_sensors,
                multiprocessing_start_method=(
                    "forkserver" if self.mp_ctx is None else None
                ),
                mp_ctx=self.mp_ctx,
                max_processes=self.max_sampler_processes_per_worker,
                read_timeout=DEBUG_VST_TIMEOUT if DEBUGGING else 1 * 60,
            )
        return self._vector_tasks

    @staticmethod
    def worker_seeds(nprocesses: int, initial_seed: Optional[int]) -> List[int]:
        """Create a collection of seeds for workers without modifying the RNG
        state."""
        rstate = None  # type:ignore
        if initial_seed is not None:
            rstate = random.getstate()
            random.seed(initial_seed)
        seeds = [random.randint(0, (2**31) - 1) for _ in range(nprocesses)]
        if initial_seed is not None:
            random.setstate(rstate)
        return seeds

    def get_sampler_fn_args(self, seeds: Optional[List[int]] = None):
        sampler_devices = self.machine_params.sampler_devices

        if self.mode == TRAIN_MODE_STR:
            fn = self.config.train_task_sampler_args
        elif self.mode == VALID_MODE_STR:
            fn = self.config.valid_task_sampler_args
        elif self.mode == TEST_MODE_STR:
            fn = self.config.test_task_sampler_args
        else:
            raise NotImplementedError(
                f"self.mode must be one of {TRAIN_MODE_STR}, {VALID_MODE_STR}, or {TEST_MODE_STR}."
            )

        if self.is_distributed:
            total_processes = sum(self.num_samplers_per_worker)
            process_offset = sum(self.num_samplers_per_worker[: self.worker_id])
        else:
            total_processes = self.num_samplers
            process_offset = 0

        sampler_devices_as_ints: Optional[List[int]] = None
        if (
            self.is_distributed or self.mode == TEST_MODE_STR
        ) and self.device.index is not None:
            sampler_devices_as_ints = [self.device.index]
        elif sampler_devices is not None:
            sampler_devices_as_ints = [
                -1 if sd.index is None else sd.index for sd in sampler_devices
            ]

        return [
            fn(
                process_ind=process_offset + it,
                total_processes=total_processes,
                devices=sampler_devices_as_ints,
                seeds=seeds,
            )
            for it in range(self.num_samplers)
        ]

    def checkpoint_load(
        self, ckpt: Union[str, Dict[str, Any]], restart_pipeline: bool
    ) -> Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]]:
        if isinstance(ckpt, str):
            get_logger().info(
                f"[{self.mode} worker {self.worker_id}] Loading checkpoint from {ckpt}"
            )
            # Map location CPU is almost always better than mapping to a CUDA device.
            ckpt = torch.load(os.path.abspath(ckpt), map_location="cpu")

        ckpt = cast(
            Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]],
            ckpt,
        )

        self.actor_critic.load_state_dict(ckpt["model_state_dict"])  # type:ignore

        if "training_pipeline_state_dict" in ckpt and not restart_pipeline:
            self.training_pipeline.load_state_dict(
                cast(Dict[str, Any], ckpt["training_pipeline_state_dict"])
            )

        return ckpt

    # aggregates task metrics currently in queue
    def aggregate_task_metrics(
        self,
        logging_pkg: LoggingPackage,
        num_tasks: int = -1,
    ) -> LoggingPackage:
        if num_tasks > 0:
            if len(self.single_process_metrics) != num_tasks:
                error_msg = (
                    "shorter"
                    if len(self.single_process_metrics) < num_tasks
                    else "longer"
                )
                get_logger().error(
                    f"Metrics out is {error_msg} than expected number of tasks."
                    " This should only happen if a positive number of `num_tasks` were"
                    " set during testing but the queue did not contain this number of entries."
                    " Please file an issue at https://github.com/allenai/allenact/issues."
                )

        num_empty_tasks_dequeued = 0

        for metrics_dict in self.single_process_metrics:
            num_empty_tasks_dequeued += not logging_pkg.add_metrics_dict(
                single_task_metrics_dict=metrics_dict
            )

        self.single_process_metrics = []

        if num_empty_tasks_dequeued != 0:
            get_logger().warning(
                f"Discarded {num_empty_tasks_dequeued} empty task metrics"
            )

        return logging_pkg

    def _preprocess_observations(self, batched_observations):
        if self.sensor_preprocessor_graph is None:
            return batched_observations
        return self.sensor_preprocessor_graph.get_observations(batched_observations)

    def remove_paused(self, observations):
        paused, keep, running = [], [], []
        for it, obs in enumerate(observations):
            if obs is None:
                paused.append(it)
            else:
                keep.append(it)
                running.append(obs)

        for p in reversed(paused):
            self.vector_tasks.pause_at(p)

        # Group samplers along new dim:
        batch = batch_observations(running, device=self.device)

        return len(paused), keep, batch

    def initialize_storage_and_viz(
        self,
        storage_to_initialize: Optional[Sequence[ExperienceStorage]],
        visualizer: Optional[VizSuite] = None,
    ):

        keep: Optional[List] = None
        if visualizer is not None or (
            storage_to_initialize is not None
            and any(isinstance(s, RolloutStorage) for s in storage_to_initialize)
        ):
            # No rollout storage, thus we are not
            observations = self.vector_tasks.get_observations()

            npaused, keep, batch = self.remove_paused(observations)
            observations = (
                self._preprocess_observations(batch) if len(keep) > 0 else batch
            )

            assert npaused == 0, f"{npaused} samplers are paused during initialization."

            num_samplers = len(keep)
        else:
            observations = {}
            num_samplers = 0
            npaused = 0

        recurrent_memory_specification = (
            self.actor_critic.recurrent_memory_specification
        )

        if storage_to_initialize is not None:
            for s in storage_to_initialize:
                s.to(self.device)
                s.set_partition(index=self.worker_id, num_parts=self.num_workers)
                s.initialize(
                    observations=observations,
                    num_samplers=num_samplers,
                    recurrent_memory_specification=recurrent_memory_specification,
                    action_space=self.actor_critic.action_space,
                )

        if visualizer is not None and num_samplers > 0:
            visualizer.collect(vector_task=self.vector_tasks, alive=keep)

        return npaused

    @property
    def num_active_samplers(self):
        if self.vector_tasks is None:
            return 0
        return self.vector_tasks.num_unpaused_tasks

    def act(
        self,
        rollout_storage: RolloutStorage,
        dist_wrapper_class: Optional[type] = None,
    ):
        with torch.no_grad():
            agent_input = rollout_storage.agent_input_for_next_step()
            actor_critic_output, memory = self.actor_critic(**agent_input)

            distr = actor_critic_output.distributions
            if dist_wrapper_class is not None:
                distr = dist_wrapper_class(distr=distr, obs=agent_input["observations"])

            actions = distr.sample() if not self.deterministic_agents else distr.mode()

        return actions, actor_critic_output, memory, agent_input["observations"]

    def aggregate_and_send_logging_package(
        self,
        tracking_info_list: List[TrackingInfo],
        logging_pkg: Optional[LoggingPackage] = None,
        send_logging_package: bool = True,
        checkpoint_file_name: Optional[str] = None,
    ):
        if logging_pkg is None:
            logging_pkg = LoggingPackage(
                mode=self.mode,
                training_steps=self.training_pipeline.total_steps,
                pipeline_stage=self.training_pipeline.current_stage_index,
                storage_uuid_to_total_experiences=self.training_pipeline.storage_uuid_to_total_experiences,
                checkpoint_file_name=checkpoint_file_name,
            )

        self.aggregate_task_metrics(logging_pkg=logging_pkg)

        for callback_dict in self.single_process_task_callback_data:
            logging_pkg.task_callback_data.append(callback_dict)
        self.single_process_task_callback_data = []

        for tracking_info in tracking_info_list:
            if tracking_info.n < 0:
                get_logger().warning(
                    f"Obtained a train_info_dict with {tracking_info.n} elements."
                    f" Full info: ({tracking_info.type}, {tracking_info.info}, {tracking_info.n})."
                )
            else:
                tracking_info_dict = tracking_info.info

                if tracking_info.type == TrackingInfoType.LOSS:
                    tracking_info_dict = {
                        f"losses/{k}": v for k, v in tracking_info_dict.items()
                    }

                logging_pkg.add_info_dict(
                    info_dict=tracking_info_dict,
                    n=tracking_info.n,
                    stage_component_uuid=tracking_info.stage_component_uuid,
                    storage_uuid=tracking_info.storage_uuid,
                )

        if send_logging_package:
            self.results_queue.put(logging_pkg)

        return logging_pkg

    @staticmethod
    def _active_memory(memory, keep):
        return memory.sampler_select(keep) if memory is not None else memory

    def probe(self, dones: List[bool], npaused, period=100000):
        """Debugging util. When called from
        self.collect_step_across_all_task_samplers(...), calls render for the
        0-th task sampler of the 0-th distributed worker for the first
        beginning episode spaced at least period steps from the beginning of
        the previous one.

        For valid, train, it currently renders all episodes for the 0-th task sampler of the
        0-th distributed worker. If this is not wanted, it must be hard-coded for now below.

        # Parameters

        dones : dones list from self.collect_step_across_all_task_samplers(...)
        npaused : number of newly paused tasks returned by self.removed_paused(...)
        period : minimal spacing in sampled steps between the beginning of episodes to be shown.
        """
        sampler_id = 0
        done = dones[sampler_id]
        if self.mode != TRAIN_MODE_STR:
            setattr(
                self, "_probe_npaused", getattr(self, "_probe_npaused", 0) + npaused
            )
            if self._probe_npaused == self.num_samplers:  # type:ignore
                del self._probe_npaused  # type:ignore
                return
            period = 0
        if self.worker_id == 0:
            if done:
                if period > 0 and (
                    getattr(self, "_probe_steps", None) is None
                    or (
                        self._probe_steps < 0  # type:ignore
                        and (
                            self.training_pipeline.total_steps
                            + self._probe_steps  # type:ignore
                        )
                        >= period
                    )
                ):
                    self._probe_steps = self.training_pipeline.total_steps
            if period == 0 or (
                getattr(self, "_probe_steps", None) is not None
                and self._probe_steps >= 0
                and ((self.training_pipeline.total_steps - self._probe_steps) < period)
            ):
                if (
                    period == 0
                    or not done
                    or self._probe_steps == self.training_pipeline.total_steps
                ):
                    self.vector_tasks.call_at(sampler_id, "render", ["human"])
                else:
                    # noinspection PyAttributeOutsideInit
                    self._probe_steps = -self._probe_steps

    def collect_step_across_all_task_samplers(
        self,
        rollout_storage_uuid: str,
        uuid_to_storage: Dict[str, ExperienceStorage],
        visualizer=None,
        dist_wrapper_class=None,
    ) -> int:
        rollout_storage = cast(RolloutStorage, uuid_to_storage[rollout_storage_uuid])
        actions, actor_critic_output, memory, _ = self.act(
            rollout_storage=rollout_storage,
            dist_wrapper_class=dist_wrapper_class,
        )

        # Flatten actions
        flat_actions = su.flatten(self.actor_critic.action_space, actions)

        assert len(flat_actions.shape) == 3, (
            "Distribution samples must include step and task sampler dimensions [step, sampler, ...]. The simplest way"
            "to accomplish this is to pass param tensors (like `logits` in a `CategoricalDistr`) with these dimensions"
            "to the Distribution."
        )

        # Convert flattened actions into list of actions and send them
        outputs: List[RLStepResult] = self.vector_tasks.step(
            su.action_list(self.actor_critic.action_space, flat_actions)
        )

        # Save after task completion metrics
        for step_result in outputs:
            if step_result.info is not None:
                if COMPLETE_TASK_METRICS_KEY in step_result.info:
                    self.single_process_metrics.append(
                        step_result.info[COMPLETE_TASK_METRICS_KEY]
                    )
                    del step_result.info[COMPLETE_TASK_METRICS_KEY]
                if COMPLETE_TASK_CALLBACK_KEY in step_result.info:
                    self.single_process_task_callback_data.append(
                        step_result.info[COMPLETE_TASK_CALLBACK_KEY]
                    )
                    del step_result.info[COMPLETE_TASK_CALLBACK_KEY]

        rewards: Union[List, torch.Tensor]
        observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]

        rewards = torch.tensor(
            rewards,
            dtype=torch.float,
            device=self.device,  # type:ignore
        )

        # We want rewards to have dimensions [sampler, reward]
        if len(rewards.shape) == 1:
            # Rewards are of shape [sampler,]
            rewards = rewards.unsqueeze(-1)
        elif len(rewards.shape) > 1:
            raise NotImplementedError()

        # If done then clean the history of observations.
        masks = (
            1.0
            - torch.tensor(
                dones,
                dtype=torch.float32,
                device=self.device,  # type:ignore
            )
        ).view(
            -1, 1
        )  # [sampler, 1]

        npaused, keep, batch = self.remove_paused(observations)

        if hasattr(self.actor_critic, "sampler_select"):
            self.actor_critic.sampler_select(keep)

        # TODO self.probe(...) can be useful for debugging (we might want to control it from main?)
        # self.probe(dones, npaused)

        if npaused > 0:
            if self.mode == TRAIN_MODE_STR:
                raise NotImplementedError(
                    "When trying to get a new task from a task sampler (using the `.next_task()` method)"
                    " the task sampler returned `None`. This is not currently supported during training"
                    " (and almost certainly a bug in the implementation of the task sampler or in the "
                    " initialization of the task sampler for training)."
                )

            for s in uuid_to_storage.values():
                if isinstance(s, RolloutStorage):
                    s.sampler_select(keep)

        to_add_to_storage = dict(
            observations=(
                self._preprocess_observations(batch) if len(keep) > 0 else batch
            ),
            memory=self._active_memory(memory, keep),
            actions=flat_actions[0, keep],
            action_log_probs=actor_critic_output.distributions.log_prob(actions)[
                0, keep
            ],
            value_preds=actor_critic_output.values[0, keep],
            rewards=rewards[keep],
            masks=masks[keep],
        )
        for storage in uuid_to_storage.values():
            storage.add(**to_add_to_storage)

        # TODO we always miss tensors for the last action in the last episode of each worker
        if visualizer is not None:
            if len(keep) > 0:
                visualizer.collect(
                    rollout=rollout_storage,
                    vector_task=self.vector_tasks,
                    alive=keep,
                    actor_critic=actor_critic_output,
                )
            else:
                visualizer.collect(actor_critic=actor_critic_output)

        return npaused

    def distributed_weighted_sum(
        self,
        to_share: Union[torch.Tensor, float, int],
        weight: Union[torch.Tensor, float, int],
    ):
        """Weighted sum of scalar across distributed workers."""
        if self.is_distributed:
            aggregate = torch.tensor(to_share * weight).to(self.device)
            dist.all_reduce(aggregate)
            return aggregate.item()
        else:
            if abs(1 - weight) > 1e-5:
                get_logger().warning(
                    f"Scaling non-distributed value with weight {weight}"
                )
            return torch.tensor(to_share * weight).item()

    def distributed_reduce(
        self, to_share: Union[torch.Tensor, float, int], op: ReduceOp
    ):
        """Weighted sum of scalar across distributed workers."""
        if self.is_distributed:
            aggregate = torch.tensor(to_share).to(self.device)
            dist.all_reduce(aggregate, op=op)
            return aggregate.item()
        else:
            return torch.tensor(to_share).item()

    def backprop_step(
        self,
        total_loss: torch.Tensor,
        max_grad_norm: float,
        local_to_global_batch_size_ratio: float = 1.0,
    ):
        raise NotImplementedError

    def save_error_data(self, batch: Dict[str, Any]):
        raise NotImplementedError

    @property
    def step_count(self) -> int:
        if (
            self.training_pipeline.current_stage is None
        ):  # Might occur during testing when all stages are complete
            return 0
        return self.training_pipeline.current_stage.steps_taken_in_stage

    def compute_losses_track_them_and_backprop(
        self,
        stage: PipelineStage,
        stage_component: StageComponent,
        storage: ExperienceStorage,
        skip_backprop: bool = False,
    ):
        training = self.mode == TRAIN_MODE_STR

        assert training or skip_backprop

        if training and self.is_distributed:
            self.insufficient_data_for_update.set(
                "insufficient_data_for_update", str(0)
            )
            dist.barrier(
                device_ids=(
                    None if self.device == torch.device("cpu") else [self.device.index]
                )
            )

        training_settings = stage_component.training_settings

        loss_names = stage_component.loss_names
        losses = [self.training_pipeline.get_loss(ln) for ln in loss_names]
        loss_weights = [stage.uuid_to_loss_weight[ln] for ln in loss_names]
        loss_update_repeats_list = training_settings.update_repeats
        if isinstance(loss_update_repeats_list, numbers.Integral):
            loss_update_repeats_list = [loss_update_repeats_list] * len(loss_names)

        if skip_backprop and isinstance(storage, MiniBatchStorageMixin):
            if loss_update_repeats_list != [1] * len(loss_names):
                loss_update_repeats_list = [1] * len(loss_names)
                get_logger().warning(
                    "Does not make sense to do multiple updates when"
                    " skip_backprop is `True` and you are using a storage of type"
                    " `MiniBatchStorageMixin`. This is likely a problem caused by"
                    " using a custom valid/test stage component that is inheriting its"
                    " TrainingSettings from the TrainingPipeline's TrainingSettings. We will override"
                    " the requested number of updates repeats (which was"
                    f" {dict(zip(loss_names, loss_update_repeats_list))}) to be 1 for all losses."
                )

        enough_data_for_update = True
        for current_update_repeat_index in range(
            max(loss_update_repeats_list, default=0)
        ):
            if isinstance(storage, MiniBatchStorageMixin):
                batch_iterator = storage.batched_experience_generator(
                    num_mini_batch=training_settings.num_mini_batch
                )
            elif isinstance(storage, StreamingStorageMixin):
                assert (
                    training_settings.num_mini_batch is None
                    or training_settings.num_mini_batch == 1
                )

                def single_batch_generator(streaming_storage: StreamingStorageMixin):
                    try:
                        yield cast(
                            StreamingStorageMixin, streaming_storage
                        ).next_batch()
                    except EOFError:
                        if not training:
                            raise

                        if streaming_storage.empty():
                            yield None
                        else:
                            cast(
                                StreamingStorageMixin, streaming_storage
                            ).reset_stream()
                            stage.stage_component_uuid_to_stream_memory[
                                stage_component.uuid
                            ].clear()
                            yield cast(
                                StreamingStorageMixin, streaming_storage
                            ).next_batch()

                batch_iterator = single_batch_generator(streaming_storage=storage)
            else:
                raise NotImplementedError(
                    f"Storage {storage} must be a subclass of `MiniBatchStorageMixin` or `StreamingStorageMixin`."
                )

            for batch in batch_iterator:
                if batch is None:
                    # This should only happen in a `StreamingStorageMixin` when it cannot
                    # generate an initial batch or when we are in testing/validation and
                    # we've reached the end of the dataset over which to test/validate.
                    if training:
                        assert isinstance(storage, StreamingStorageMixin)
                        get_logger().warning(
                            f"Worker {self.worker_id}: could not run update in {storage}, potentially because"
                            f" not enough data has been accumulated to be able to fill an initial batch."
                        )
                    else:
                        pass
                    enough_data_for_update = False

                if training and self.is_distributed:
                    self.insufficient_data_for_update.add(
                        "insufficient_data_for_update",
                        1 * (not enough_data_for_update),
                    )
                    dist.barrier(
                        device_ids=(
                            None
                            if self.device == torch.device("cpu")
                            else [self.device.index]
                        )
                    )

                    if (
                        int(
                            self.insufficient_data_for_update.get(
                                "insufficient_data_for_update"
                            )
                        )
                        != 0
                    ):
                        enough_data_for_update = False
                        break

                info: Dict[str, float] = {}

                bsize: Optional[int] = None
                total_loss: Optional[torch.Tensor] = None
                actor_critic_output_for_batch: Optional[ActorCriticOutput] = None
                batch_memory = Memory()

                for loss, loss_name, loss_weight, max_update_repeats_for_loss in zip(
                    losses, loss_names, loss_weights, loss_update_repeats_list
                ):
                    if current_update_repeat_index >= max_update_repeats_for_loss:
                        continue

                    if isinstance(loss, AbstractActorCriticLoss):
                        bsize = batch["bsize"]

                        if actor_critic_output_for_batch is None:

                            try:
                                actor_critic_output_for_batch, _ = self.actor_critic(
                                    observations=batch["observations"],
                                    memory=batch["memory"],
                                    prev_actions=batch["prev_actions"],
                                    masks=batch["masks"],
                                )
                            except ValueError:
                                save_path = self.save_error_data(batch=batch)
                                get_logger().error(
                                    f"Encountered a value error! Likely because of nans in the output/input."
                                    f" Saving all error information to {save_path}."
                                )
                                raise

                        loss_return = loss.loss(
                            step_count=self.step_count,
                            batch=batch,
                            actor_critic_output=actor_critic_output_for_batch,
                        )

                        per_epoch_info = {}
                        if len(loss_return) == 2:
                            current_loss, current_info = loss_return
                        elif len(loss_return) == 3:
                            current_loss, current_info, per_epoch_info = loss_return
                        else:
                            raise NotImplementedError

                    elif isinstance(loss, GenericAbstractLoss):
                        loss_output = loss.loss(
                            model=self.actor_critic,
                            batch=batch,
                            batch_memory=batch_memory,
                            stream_memory=stage.stage_component_uuid_to_stream_memory[
                                stage_component.uuid
                            ],
                        )
                        current_loss = loss_output.value
                        current_info = loss_output.info
                        per_epoch_info = loss_output.per_epoch_info
                        batch_memory = loss_output.batch_memory
                        stage.stage_component_uuid_to_stream_memory[
                            stage_component.uuid
                        ] = loss_output.stream_memory
                        bsize = loss_output.bsize
                    else:
                        raise NotImplementedError(
                            f"Loss of type {type(loss)} is not supported. Losses must be subclasses of"
                            f" `AbstractActorCriticLoss` or `GenericAbstractLoss`."
                        )

                    if total_loss is None:
                        total_loss = loss_weight * current_loss
                    else:
                        total_loss = total_loss + loss_weight * current_loss

                    for key, value in current_info.items():
                        info[f"{loss_name}/{key}"] = value

                    if per_epoch_info is not None:
                        for key, value in per_epoch_info.items():
                            if max(loss_update_repeats_list, default=0) > 1:
                                info[
                                    f"{loss_name}/{key}_epoch{current_update_repeat_index:02d}"
                                ] = value
                                info[f"{loss_name}/{key}_combined"] = value
                            else:
                                info[f"{loss_name}/{key}"] = value

                assert total_loss is not None, (
                    f"No {stage_component.uuid} losses specified for training in stage"
                    f" {self.training_pipeline.current_stage_index}"
                )

                total_loss_scalar = total_loss.item()
                info[f"total_loss"] = total_loss_scalar

                self.tracking_info_list.append(
                    TrackingInfo(
                        type=TrackingInfoType.LOSS,
                        info=info,
                        n=bsize,
                        storage_uuid=stage_component.storage_uuid,
                        stage_component_uuid=stage_component.uuid,
                    )
                )

                to_track = {
                    "rollout_epochs": max(loss_update_repeats_list, default=0),
                    "worker_batch_size": bsize,
                }

                aggregate_bsize = None
                if training:
                    aggregate_bsize = self.distributed_weighted_sum(bsize, 1)
                    to_track["global_batch_size"] = aggregate_bsize
                    to_track["lr"] = self.optimizer.param_groups[0]["lr"]

                if training_settings.num_mini_batch is not None:
                    to_track["rollout_num_mini_batch"] = (
                        training_settings.num_mini_batch
                    )

                for k, v in to_track.items():
                    # We need to set the bsize to 1 for `worker_batch_size` below as we're trying to record the
                    # average batch size per worker, not the average per worker weighted by the size of the batches
                    # of those workers.
                    self.tracking_info_list.append(
                        TrackingInfo(
                            type=TrackingInfoType.UPDATE_INFO,
                            info={k: v},
                            n=1 if k == "worker_batch_size" else bsize,
                            storage_uuid=stage_component.storage_uuid,
                            stage_component_uuid=stage_component.uuid,
                        )
                    )

                if not skip_backprop:
                    total_grad_norm = self.backprop_step(
                        total_loss=total_loss,
                        max_grad_norm=training_settings.max_grad_norm,
                        local_to_global_batch_size_ratio=bsize / aggregate_bsize,
                    )
                    self.tracking_info_list.append(
                        TrackingInfo(
                            type=TrackingInfoType.UPDATE_INFO,
                            info={"total_grad_norm": total_grad_norm},
                            n=bsize,
                            storage_uuid=stage_component.storage_uuid,
                            stage_component_uuid=stage_component.uuid,
                        )
                    )

                stage.stage_component_uuid_to_stream_memory[stage_component.uuid] = (
                    detach_recursively(
                        input=stage.stage_component_uuid_to_stream_memory[
                            stage_component.uuid
                        ],
                        inplace=True,
                    )
                )

    def close(self, verbose=True):
        self._is_closing = True

        if "_is_closed" in self.__dict__ and self._is_closed:
            return

        def logif(s: Union[str, Exception]):
            if verbose:
                if isinstance(s, str):
                    get_logger().info(s)
                elif isinstance(s, Exception):
                    get_logger().error(traceback.format_exc())
                else:
                    raise NotImplementedError()

        if "_vector_tasks" in self.__dict__ and self._vector_tasks is not None:
            try:
                logif(
                    f"[{self.mode} worker {self.worker_id}] Closing OnPolicyRLEngine.vector_tasks."
                )
                self._vector_tasks.close()
                logif(f"[{self.mode} worker {self.worker_id}] Closed.")
            except Exception as e:
                logif(
                    f"[{self.mode} worker {self.worker_id}] Exception raised when closing OnPolicyRLEngine.vector_tasks:"
                )
                logif(e)

        self._is_closed = True
        self._is_closing = False

    @property
    def is_closed(self):
        return self._is_closed

    @property
    def is_closing(self):
        return self._is_closing

    def __del__(self):
        self.close(verbose=False)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close(verbose=False)


class OnPolicyTrainer(OnPolicyRLEngine):
    def __init__(
        self,
        experiment_name: str,
        config: ExperimentConfig,
        results_queue: mp.Queue,
        checkpoints_queue: Optional[mp.Queue],
        checkpoints_dir: str = "",
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        mp_ctx: Optional[BaseContext] = None,
        worker_id: int = 0,
        num_workers: int = 1,
        device: Union[str, torch.device, int] = "cpu",
        distributed_ip: str = "127.0.0.1",
        distributed_port: int = 0,
        deterministic_agents: bool = False,
        distributed_preemption_threshold: float = 0.7,
        max_sampler_processes_per_worker: Optional[int] = None,
        save_ckpt_after_every_pipeline_stage: bool = True,
        first_local_worker_id: int = 0,
        save_ckpt_at_every_host: bool = False,
        **kwargs,
    ):
        kwargs["mode"] = TRAIN_MODE_STR
        super().__init__(
            experiment_name=experiment_name,
            config=config,
            results_queue=results_queue,
            checkpoints_queue=checkpoints_queue,
            checkpoints_dir=checkpoints_dir,
            seed=seed,
            deterministic_cudnn=deterministic_cudnn,
            mp_ctx=mp_ctx,
            worker_id=worker_id,
            num_workers=num_workers,
            device=device,
            distributed_ip=distributed_ip,
            distributed_port=distributed_port,
            deterministic_agents=deterministic_agents,
            max_sampler_processes_per_worker=max_sampler_processes_per_worker,
            **kwargs,
        )

        self.save_ckpt_after_every_pipeline_stage = save_ckpt_after_every_pipeline_stage

        self.actor_critic.train()

        self.training_pipeline: TrainingPipeline = config.training_pipeline()

        if self.num_workers != 1:
            # Ensure that we're only using early stopping criterions in the non-distributed setting.
            if any(
                stage.early_stopping_criterion is not None
                for stage in self.training_pipeline.pipeline_stages
            ):
                raise NotImplementedError(
                    "Early stopping criterions are currently only allowed when using a single training worker, i.e."
                    " no distributed (multi-GPU) training. If this is a feature you'd like please create an issue"
                    " at https://github.com/allenai/allenact/issues or (even better) create a pull request with this "
                    " feature and we'll be happy to review it."
                )

        self.optimizer: optim.optimizer.Optimizer = (
            self.training_pipeline.optimizer_builder(
                params=[p for p in self.actor_critic.parameters() if p.requires_grad]
            )
        )

        # noinspection PyProtectedMember
        self.lr_scheduler: Optional[_LRScheduler] = None
        if self.training_pipeline.lr_scheduler_builder is not None:
            self.lr_scheduler = self.training_pipeline.lr_scheduler_builder(
                optimizer=self.optimizer
            )

        if self.is_distributed:
            # Tracks how many workers have finished their rollout
            self.num_workers_done = torch.distributed.PrefixStore(  # type:ignore
                "num_workers_done", self.store
            )
            # Tracks the number of steps taken by each worker in current rollout
            self.num_workers_steps = torch.distributed.PrefixStore(  # type:ignore
                "num_workers_steps", self.store
            )
            self.distributed_preemption_threshold = distributed_preemption_threshold
            # Flag for finished worker in current epoch
            self.offpolicy_epoch_done = torch.distributed.PrefixStore(  # type:ignore
                "offpolicy_epoch_done", self.store
            )
            # Flag for finished worker in current epoch with custom component
            self.insufficient_data_for_update = (
                torch.distributed.PrefixStore(  # type:ignore
                    "insufficient_data_for_update", self.store
                )
            )
        else:
            self.num_workers_done = None
            self.num_workers_steps = None
            self.distributed_preemption_threshold = 1.0
            self.offpolicy_epoch_done = None

        # Keeping track of training state
        self.former_steps: Optional[int] = None
        self.last_log: Optional[int] = None
        self.last_save: Optional[int] = None
        # The `self._last_aggregated_train_task_metrics` attribute defined
        # below is used for early stopping criterion computations
        self._last_aggregated_train_task_metrics: ScalarMeanTracker = (
            ScalarMeanTracker()
        )

        self.first_local_worker_id = first_local_worker_id
        self.save_ckpt_at_every_host = save_ckpt_at_every_host

    def advance_seed(
        self, seed: Optional[int], return_same_seed_per_worker=False
    ) -> Optional[int]:
        if seed is None:
            return seed
        seed = (seed ^ (self.training_pipeline.total_steps + 1)) % (
            2**31 - 1
        )  # same seed for all workers

        if (not return_same_seed_per_worker) and (
            self.mode == TRAIN_MODE_STR or self.mode == TEST_MODE_STR
        ):
            return self.worker_seeds(self.num_workers, seed)[
                self.worker_id
            ]  # doesn't modify the current rng state
        else:
            return self.worker_seeds(1, seed)[0]  # doesn't modify the current rng state

    def deterministic_seeds(self) -> None:
        if self.seed is not None:
            set_seed(self.advance_seed(self.seed))  # known state for all workers
            seeds = self.worker_seeds(
                self.num_samplers, None
            )  # use the latest seed for workers and update rng state
            if self.vector_tasks is not None:
                self.vector_tasks.set_seeds(seeds)

    def save_error_data(self, batch: Dict[str, Any]) -> str:
        model_path = os.path.join(
            self.checkpoints_dir,
            "error_for_exp_{}__stage_{:02d}__steps_{:012d}.pt".format(
                self.experiment_name,
                self.training_pipeline.current_stage_index,
                self.training_pipeline.total_steps,
            ),
        )
        with filelock.FileLock(
            os.path.join(self.checkpoints_dir, "error.lock"), timeout=60
        ):
            if not os.path.exists(model_path):
                save_dict = {
                    "model_state_dict": self.actor_critic.state_dict(),  # type:ignore
                    "total_steps": self.training_pipeline.total_steps,  # Total steps including current stage
                    "optimizer_state_dict": self.optimizer.state_dict(),  # type: ignore
                    "training_pipeline_state_dict": self.training_pipeline.state_dict(),
                    "trainer_seed": self.seed,
                    "batch": batch,
                }

                if self.lr_scheduler is not None:
                    save_dict["scheduler_state"] = cast(
                        _LRScheduler, self.lr_scheduler
                    ).state_dict()

                torch.save(save_dict, model_path)
        return model_path

    def aggregate_and_send_logging_package(
        self,
        tracking_info_list: List[TrackingInfo],
        logging_pkg: Optional[LoggingPackage] = None,
        send_logging_package: bool = True,
        checkpoint_file_name: Optional[str] = None,
    ):
        logging_pkg = super().aggregate_and_send_logging_package(
            tracking_info_list=tracking_info_list,
            logging_pkg=logging_pkg,
            send_logging_package=send_logging_package,
            checkpoint_file_name=checkpoint_file_name,
        )

        if self.mode == TRAIN_MODE_STR:
            # Technically self.mode should always be "train" here (as this is the training engine),
            # this conditional is defensive
            self._last_aggregated_train_task_metrics.add_scalars(
                scalars=logging_pkg.metrics_tracker.means(),
                n=logging_pkg.metrics_tracker.counts(),
            )

        return logging_pkg

    def checkpoint_save(self, pipeline_stage_index: Optional[int] = None) -> str:
        model_path = os.path.join(
            self.checkpoints_dir,
            "exp_{}__stage_{:02d}__steps_{:012d}.pt".format(
                self.experiment_name,
                (
                    self.training_pipeline.current_stage_index
                    if pipeline_stage_index is None
                    else pipeline_stage_index
                ),
                self.training_pipeline.total_steps,
            ),
        )

        save_dict = {
            "model_state_dict": self.actor_critic.state_dict(),  # type:ignore
            "total_steps": self.training_pipeline.total_steps,  # Total steps including current stage
            "optimizer_state_dict": self.optimizer.state_dict(),  # type: ignore
            "training_pipeline_state_dict": self.training_pipeline.state_dict(),
            "trainer_seed": self.seed,
        }

        if self.lr_scheduler is not None:
            save_dict["scheduler_state"] = cast(
                _LRScheduler, self.lr_scheduler
            ).state_dict()

        torch.save(save_dict, model_path)
        return model_path

    def checkpoint_load(
        self, ckpt: Union[str, Dict[str, Any]], restart_pipeline: bool = False
    ) -> Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]]:
        if restart_pipeline:
            if "training_pipeline_state_dict" in ckpt:
                del ckpt["training_pipeline_state_dict"]

        ckpt = super().checkpoint_load(ckpt, restart_pipeline=restart_pipeline)

        if restart_pipeline:
            self.training_pipeline.restart_pipeline()
        else:
            self.seed = cast(int, ckpt["trainer_seed"])
            self.optimizer.load_state_dict(ckpt["optimizer_state_dict"])  # type: ignore
            if self.lr_scheduler is not None and "scheduler_state" in ckpt:
                self.lr_scheduler.load_state_dict(ckpt["scheduler_state"])  # type: ignore

        self.deterministic_seeds()

        return ckpt

    @property
    def step_count(self):
        return self.training_pipeline.current_stage.steps_taken_in_stage

    @step_count.setter
    def step_count(self, val: int) -> None:
        self.training_pipeline.current_stage.steps_taken_in_stage = val

    @property
    def log_interval(self):
        return (
            self.training_pipeline.current_stage.training_settings.metric_accumulate_interval
        )

    @property
    def approx_steps(self):
        if self.is_distributed:
            # the actual number of steps gets synchronized after each rollout
            return (
                self.step_count - self.former_steps
            ) * self.num_workers + self.former_steps
        else:
            return self.step_count  # this is actually accurate

    def act(
        self,
        rollout_storage: RolloutStorage,
        dist_wrapper_class: Optional[type] = None,
    ):
        if self.training_pipeline.current_stage.teacher_forcing is not None:
            assert dist_wrapper_class is None

            def tracking_callback(type: TrackingInfoType, info: Dict[str, Any], n: int):
                self.tracking_info_list.append(
                    TrackingInfo(
                        type=type,
                        info=info,
                        n=n,
                        storage_uuid=self.training_pipeline.rollout_storage_uuid,
                        stage_component_uuid=None,
                    )
                )

            dist_wrapper_class = partial(
                TeacherForcingDistr,
                action_space=self.actor_critic.action_space,
                num_active_samplers=self.num_active_samplers,
                approx_steps=self.approx_steps,
                teacher_forcing=self.training_pipeline.current_stage.teacher_forcing,
                tracking_callback=tracking_callback,
            )

        actions, actor_critic_output, memory, step_observation = super().act(
            rollout_storage=rollout_storage,
            dist_wrapper_class=dist_wrapper_class,
        )

        self.step_count += self.num_active_samplers

        return actions, actor_critic_output, memory, step_observation

    def advantage_stats(self, advantages: torch.Tensor) -> Dict[str, torch.Tensor]:
        r"""Computes the mean and variances of advantages (possibly over multiple workers).
        For multiple workers, this method is equivalent to first collecting all versions of
        advantages and then computing the mean and variance locally over that.

        # Parameters

        advantages: Tensors to compute mean and variance over. Assumed to be solely the
         worker's local copy of this tensor, the resultant mean and variance will be computed
         as though _all_ workers' versions of this tensor were concatenated together in
         distributed training.
        """

        # Step count has already been updated with the steps from all workers
        global_rollout_steps = self.step_count - self.former_steps

        if self.is_distributed:
            summed_advantages = advantages.sum()
            dist.all_reduce(summed_advantages)
            mean = summed_advantages / global_rollout_steps

            summed_squares = (advantages - mean).pow(2).sum()
            dist.all_reduce(summed_squares)
            std = (summed_squares / (global_rollout_steps - 1)).sqrt()
        else:
            # noinspection PyArgumentList
            mean, std = advantages.mean(), advantages.std()

        return {"mean": mean, "std": std}

    def backprop_step(
        self,
        total_loss: torch.Tensor,
        max_grad_norm: float,
        local_to_global_batch_size_ratio: float = 1.0,
    ):
        self.optimizer.zero_grad()  # type: ignore
        if isinstance(total_loss, torch.Tensor):
            total_loss.backward()

        if self.is_distributed:
            # From https://github.com/pytorch/pytorch/issues/43135
            reductions, all_params = [], []
            for p in self.actor_critic.parameters():
                # you can also organize grads to larger buckets to make all_reduce more efficient
                if p.requires_grad:
                    if p.grad is None:
                        p.grad = torch.zeros_like(p.data)
                    else:  # local_global_batch_size_tuple is not None, since we're distributed:
                        p.grad = p.grad * local_to_global_batch_size_ratio
                    reductions.append(
                        dist.all_reduce(
                            p.grad,
                            async_op=True,
                        )  # sum
                    )  # synchronize
                    all_params.append(p)
            for reduction, p in zip(reductions, all_params):
                reduction.wait()

        if hasattr(self.actor_critic, "compute_total_grad_norm"):
            total_grad_norm = self.actor_critic.compute_total_grad_norm().item()
        else:
            total_grad_norm = 0.0

        nn.utils.clip_grad_norm_(
            self.actor_critic.parameters(),
            max_norm=max_grad_norm,  # type: ignore
        )

        self.optimizer.step()  # type: ignore
        return total_grad_norm

    def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter(
        self, pipeline_stage_index: Optional[int] = None
    ):
        model_path = None
        self.deterministic_seeds()
        if (
            self.save_ckpt_at_every_host
            and self.worker_id == self.first_local_worker_id
        ) or self.worker_id == 0:
            model_path = self.checkpoint_save(pipeline_stage_index=pipeline_stage_index)
            if self.checkpoints_queue is not None:
                self.checkpoints_queue.put(("eval", model_path))
        self.last_save = self.training_pipeline.total_steps
        return model_path

    def run_pipeline(self, valid_on_initial_weights: bool = False):
        cur_stage_training_settings = (
            self.training_pipeline.current_stage.training_settings
        )

        # Change engine attributes that depend on the current stage
        self.training_pipeline.current_stage.change_engine_attributes(self)

        rollout_storage = self.training_pipeline.rollout_storage
        uuid_to_storage = self.training_pipeline.current_stage_storage
        self.initialize_storage_and_viz(
            storage_to_initialize=cast(
                List[ExperienceStorage], list(uuid_to_storage.values())
            )
        )
        self.tracking_info_list.clear()

        self.last_log = self.training_pipeline.total_steps

        if self.last_save is None:
            self.last_save = self.training_pipeline.total_steps

        should_save_checkpoints = (
            self.checkpoints_dir != ""
            and cur_stage_training_settings.save_interval is not None
            and cur_stage_training_settings.save_interval > 0
        )
        already_saved_checkpoint = False

        if (
            valid_on_initial_weights
            and should_save_checkpoints
            and self.checkpoints_queue is not None
        ):
            if (
                self.save_ckpt_at_every_host
                and self.worker_id == self.first_local_worker_id
            ) or self.worker_id == 0:
                model_path = self.checkpoint_save()
                if self.checkpoints_queue is not None:
                    self.checkpoints_queue.put(("eval", model_path))

        while True:
            pipeline_stage_changed = self.training_pipeline.before_rollout(
                train_metrics=self._last_aggregated_train_task_metrics
            )  # This is `False` at the very start of training, i.e. pipeline starts with a stage initialized

            self._last_aggregated_train_task_metrics.reset()
            training_is_complete = self.training_pipeline.current_stage is None

            # `training_is_complete` should imply `pipeline_stage_changed`
            assert pipeline_stage_changed or not training_is_complete

            #  Saving checkpoints and initializing storage when the pipeline stage changes
            if pipeline_stage_changed:
                # Here we handle saving a checkpoint after a pipeline stage ends. We
                # do this:
                # (1) after every pipeline stage if the `self.save_ckpt_after_every_pipeline_stage`
                #   boolean is True, and
                # (2) when we have reached the end of ALL training (i.e. all stages are complete).
                if (
                    should_save_checkpoints
                    and (  # Might happen if the `save_interval` was hit just previously, see below
                        not already_saved_checkpoint
                    )
                    and (
                        self.save_ckpt_after_every_pipeline_stage
                        or training_is_complete
                    )
                ):
                    self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter(
                        pipeline_stage_index=(
                            self.training_pipeline.current_stage_index - 1
                            if not training_is_complete
                            else len(self.training_pipeline.pipeline_stages) - 1
                        )
                    )

                # If training is complete, break out
                if training_is_complete:
                    break

                # Here we handle updating our training settings after a pipeline stage ends.
                # Update the training settings we're using
                cur_stage_training_settings = (
                    self.training_pipeline.current_stage.training_settings
                )

                # If the pipeline stage changed we must initialize any new custom storage and
                # stop updating any custom storage that is no longer in use (this second bit
                # is done by simply updating `uuid_to_storage` to the new custom storage objects).
                new_uuid_to_storage = self.training_pipeline.current_stage_storage
                storage_to_initialize = [
                    s
                    for uuid, s in new_uuid_to_storage.items()
                    if uuid
                    not in uuid_to_storage  # Don't initialize storage already in use
                ]
                self.initialize_storage_and_viz(
                    storage_to_initialize=storage_to_initialize,
                )
                uuid_to_storage = new_uuid_to_storage

                # Change engine attributes that depend on the current stage
                self.training_pipeline.current_stage.change_engine_attributes(self)

            already_saved_checkpoint = False

            if self.is_distributed:
                self.num_workers_done.set("done", str(0))
                self.num_workers_steps.set("steps", str(0))
                # Ensure all workers are done before incrementing num_workers_{steps, done}
                dist.barrier(
                    device_ids=(
                        None
                        if self.device == torch.device("cpu")
                        else [self.device.index]
                    )
                )

            self.former_steps = self.step_count
            former_storage_experiences = {
                k: v.total_experiences
                for k, v in self.training_pipeline.current_stage_storage.items()
            }

            if self.training_pipeline.rollout_storage_uuid is None:
                # In this case we're not expecting to collect storage experiences, i.e. everything
                # will be off-policy.

                # self.step_count is normally updated by the `self.collect_step_across_all_task_samplers`
                # call below, but since we're not collecting onpolicy experiences, we need to update
                # it here. The step count here is now just effectively a count of the number of times
                # we've called `compute_losses_track_them_and_backprop` below.
                self.step_count += 1

                before_update_info = dict(
                    next_value=None,
                    use_gae=cur_stage_training_settings.use_gae,
                    gamma=cur_stage_training_settings.gamma,
                    tau=cur_stage_training_settings.gae_lambda,
                    adv_stats_callback=self.advantage_stats,
                )
            else:
                vector_tasks_already_restarted = False
                step = -1
                while step < cur_stage_training_settings.num_steps - 1:
                    step += 1

                    try:
                        num_paused = self.collect_step_across_all_task_samplers(
                            rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid,
                            uuid_to_storage=uuid_to_storage,
                        )
                    except (TimeoutError, EOFError) as e:
                        if (
                            not self.try_restart_after_task_error
                        ) or self.mode != TRAIN_MODE_STR:
                            # Apparently you can just call `raise` here and doing so will just raise the exception as though
                            # it was not caught (so the stacktrace isn't messed up)
                            raise
                        elif vector_tasks_already_restarted:
                            raise RuntimeError(
                                f"[{self.mode} worker {self.worker_id}] `vector_tasks` has timed out twice in the same"
                                f" rollout. This suggests that this error was not recoverable. Timeout exception:\n{traceback.format_exc()}"
                            )
                        else:
                            get_logger().warning(
                                f"[{self.mode} worker {self.worker_id}] `vector_tasks` appears to have crashed during"
                                f" training due to an {type(e).__name__} error. You have set"
                                f" `try_restart_after_task_error` to `True` so we will attempt to restart these tasks from"
                                f" the beginning. USE THIS FEATURE AT YOUR OWN"
                                f" RISK. Exception:\n{traceback.format_exc()}."
                            )
                            self.vector_tasks.close()
                            self._vector_tasks = None

                            vector_tasks_already_restarted = True
                            for (
                                storage
                            ) in self.training_pipeline.current_stage_storage.values():
                                storage.after_updates()
                            self.initialize_storage_and_viz(
                                storage_to_initialize=cast(
                                    List[ExperienceStorage],
                                    list(uuid_to_storage.values()),
                                )
                            )
                            step = -1
                            continue

                    # A more informative error message should already have been thrown in be given in
                    # `collect_step_across_all_task_samplers` if `num_paused != 0` here but this serves
                    # as a sanity check.
                    assert num_paused == 0

                    if self.is_distributed:
                        # Preempt stragglers
                        # Each worker will stop collecting steps for the current rollout whenever a
                        # 100 * distributed_preemption_threshold percentage of workers are finished collecting their
                        # rollout steps, and we have collected at least 25% but less than 90% of the steps.
                        num_done = int(self.num_workers_done.get("done"))
                        if (
                            num_done
                            > self.distributed_preemption_threshold * self.num_workers
                            and 0.25 * cur_stage_training_settings.num_steps
                            <= step
                            < 0.9 * cur_stage_training_settings.num_steps
                        ):
                            get_logger().debug(
                                f"[{self.mode} worker {self.worker_id}] Preempted after {step}"
                                f" steps (out of {cur_stage_training_settings.num_steps})"
                                f" with {num_done} workers done"
                            )
                            break

                with torch.no_grad():
                    actor_critic_output, _ = self.actor_critic(
                        **rollout_storage.agent_input_for_next_step()
                    )

                self.training_pipeline.rollout_count += 1

                if self.is_distributed:
                    # Mark that a worker is done collecting experience
                    self.num_workers_done.add("done", 1)
                    self.num_workers_steps.add(
                        "steps", self.step_count - self.former_steps
                    )

                    # Ensure all workers are done before updating step counter
                    dist.barrier(
                        device_ids=(
                            None
                            if self.device == torch.device("cpu")
                            else [self.device.index]
                        )
                    )

                    ndone = int(self.num_workers_done.get("done"))
                    assert (
                        ndone == self.num_workers
                    ), f"# workers done {ndone} != # workers {self.num_workers}"

                    # get the actual step_count
                    self.step_count = (
                        int(self.num_workers_steps.get("steps")) + self.former_steps
                    )

                before_update_info = dict(
                    next_value=actor_critic_output.values.detach(),
                    use_gae=cur_stage_training_settings.use_gae,
                    gamma=cur_stage_training_settings.gamma,
                    tau=cur_stage_training_settings.gae_lambda,
                    adv_stats_callback=self.advantage_stats,
                )

            # Prepare storage for iteration during updates
            for storage in self.training_pipeline.current_stage_storage.values():
                storage.before_updates(**before_update_info)

            for sc in self.training_pipeline.current_stage.stage_components:
                component_storage = uuid_to_storage[sc.storage_uuid]

                self.compute_losses_track_them_and_backprop(
                    stage=self.training_pipeline.current_stage,
                    stage_component=sc,
                    storage=component_storage,
                )

            for storage in self.training_pipeline.current_stage_storage.values():
                storage.after_updates()

            # We update the storage step counts saved in
            # `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` here rather than with
            # `self.steps` above because some storage step counts may only change after the update calls above.
            # This may seem a bit weird but consider a storage that corresponds to a fixed dataset
            # used for imitation learning. For such a dataset, the "steps" will only increase as
            # new batches are sampled during update calls.
            # Note: We don't need to sort the keys below to ensure that distributed updates happen correctly
            #   as `self.training_pipeline.current_stage_storage` is an ordered `dict`.
            # First we calculate the change in counts (possibly aggregating across devices)
            change_in_storage_experiences = {}
            for k in sorted(self.training_pipeline.current_stage_storage.keys()):
                delta = (
                    self.training_pipeline.current_stage_storage[k].total_experiences
                    - former_storage_experiences[k]
                )
                assert delta >= 0
                change_in_storage_experiences[k] = self.distributed_weighted_sum(
                    to_share=delta, weight=1
                )

            # Then we update `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` with the above
            # computed changes.
            for storage_uuid, delta in change_in_storage_experiences.items():
                self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage[
                    storage_uuid
                ] += delta

            if self.lr_scheduler is not None:
                self.lr_scheduler.step(epoch=self.training_pipeline.total_steps)

            # Here we handle saving a checkpoint every `save_interval` steps, saving after
            # a pipeline stage completes is controlled above
            checkpoint_file_name = None
            if should_save_checkpoints and (
                self.training_pipeline.total_steps - self.last_save
                >= cur_stage_training_settings.save_interval
            ):
                checkpoint_file_name = (
                    self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter()
                )
                already_saved_checkpoint = True

            if (
                self.training_pipeline.total_steps - self.last_log >= self.log_interval
                or self.training_pipeline.current_stage.is_complete
            ):
                self.aggregate_and_send_logging_package(
                    tracking_info_list=self.tracking_info_list,
                    checkpoint_file_name=checkpoint_file_name,
                )
                self.tracking_info_list.clear()
                self.last_log = self.training_pipeline.total_steps

            if (
                cur_stage_training_settings.advance_scene_rollout_period is not None
            ) and (
                self.training_pipeline.rollout_count
                % cur_stage_training_settings.advance_scene_rollout_period
                == 0
            ):
                get_logger().info(
                    f"[{self.mode} worker {self.worker_id}] Force advance"
                    f" tasks with {self.training_pipeline.rollout_count} rollouts"
                )
                self.vector_tasks.next_task(force_advance_scene=True)
                self.initialize_storage_and_viz(
                    storage_to_initialize=cast(
                        List[ExperienceStorage], list(uuid_to_storage.values())
                    )
                )

    def train(
        self,
        checkpoint_file_name: Optional[str] = None,
        restart_pipeline: bool = False,
        valid_on_initial_weights: bool = False,
    ):
        assert (
            self.mode == TRAIN_MODE_STR
        ), "train only to be called from a train instance"

        training_completed_successfully = False
        # noinspection PyBroadException
        try:
            if checkpoint_file_name is not None:
                self.checkpoint_load(checkpoint_file_name, restart_pipeline)

            self.run_pipeline(valid_on_initial_weights=valid_on_initial_weights)

            training_completed_successfully = True
        except KeyboardInterrupt:
            get_logger().info(
                f"[{self.mode} worker {self.worker_id}] KeyboardInterrupt, exiting."
            )
        except Exception as e:
            get_logger().error(
                f"[{self.mode} worker {self.worker_id}] Encountered {type(e).__name__}, exiting."
            )
            get_logger().error(traceback.format_exc())
        finally:
            if training_completed_successfully:
                if self.worker_id == 0:
                    self.results_queue.put(("train_stopped", 0))
                get_logger().info(
                    f"[{self.mode} worker {self.worker_id}] Training finished successfully."
                )
            else:
                self.results_queue.put(("train_stopped", 1 + self.worker_id))
            self.close()


class OnPolicyInference(OnPolicyRLEngine):
    def __init__(
        self,
        config: ExperimentConfig,
        results_queue: mp.Queue,  # to output aggregated results
        checkpoints_queue: mp.Queue,  # to write/read (trainer/evaluator) ready checkpoints
        checkpoints_dir: str = "",
        mode: str = "valid",  # or "test"
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        mp_ctx: Optional[BaseContext] = None,
        device: Union[str, torch.device, int] = "cpu",
        deterministic_agents: bool = False,
        worker_id: int = 0,
        num_workers: int = 1,
        distributed_port: int = 0,
        enforce_expert: bool = False,
        **kwargs,
    ):
        super().__init__(
            experiment_name="",
            config=config,
            results_queue=results_queue,
            checkpoints_queue=checkpoints_queue,
            checkpoints_dir=checkpoints_dir,
            mode=mode,
            seed=seed,
            deterministic_cudnn=deterministic_cudnn,
            mp_ctx=mp_ctx,
            deterministic_agents=deterministic_agents,
            device=device,
            worker_id=worker_id,
            num_workers=num_workers,
            distributed_port=distributed_port,
            **kwargs,
        )

        self.enforce_expert = enforce_expert

    def run_eval(
        self,
        checkpoint_file_path: str,
        rollout_steps: int = 100,
        visualizer: Optional[VizSuite] = None,
        update_secs: float = 20.0,
        verbose: bool = False,
    ) -> LoggingPackage:
        assert self.actor_critic is not None, "called `run_eval` with no actor_critic"

        # Sanity check that we haven't entered an invalid state. During eval the training_pipeline
        # should be only set in this function and always unset at the end of it.
        assert self.training_pipeline is None, (
            "`training_pipeline` should be `None` before calling `run_eval`."
            " This is necessary as we want to initialize new storages."
        )
        self.training_pipeline = self.config.training_pipeline()

        ckpt = self.checkpoint_load(checkpoint_file_path, restart_pipeline=False)
        total_steps = cast(int, ckpt["total_steps"])

        eval_pipeline_stage = cast(
            PipelineStage,
            getattr(self.training_pipeline, f"{self.mode}_pipeline_stage"),
        )
        assert (
            len(eval_pipeline_stage.stage_components) <= 1
        ), "Only one StageComponent is supported during inference."
        uuid_to_storage = self.training_pipeline.get_stage_storage(eval_pipeline_stage)

        assert len(uuid_to_storage) > 0, (
            "No storage found for eval pipeline stage, this is a bug in AllenAct,"
            " please submit an issue on GitHub (https://github.com/allenai/allenact/issues)."
        )

        uuid_to_rollout_storage = {
            uuid: storage
            for uuid, storage in uuid_to_storage.items()
            if isinstance(storage, RolloutStorage)
        }
        uuid_to_non_rollout_storage = {
            uuid: storage
            for uuid, storage in uuid_to_storage.items()
            if not isinstance(storage, RolloutStorage)
        }

        if len(uuid_to_rollout_storage) > 1 or len(uuid_to_non_rollout_storage) > 1:
            raise NotImplementedError(
                "Only one RolloutStorage and non-RolloutStorage object is allowed within an evaluation pipeline stage."
                " If you'd like to evaluate against multiple storages please"
                " submit an issue on GitHub (https://github.com/allenai/allenact/issues). For the moment you'll need"
                " to evaluate against these storages separately."
            )

        rollout_storage = self.training_pipeline.rollout_storage

        if visualizer is not None:
            assert visualizer.empty()

        num_paused = self.initialize_storage_and_viz(
            storage_to_initialize=cast(
                List[ExperienceStorage], list(uuid_to_storage.values())
            ),
            visualizer=visualizer,
        )
        assert num_paused == 0, f"{num_paused} tasks paused when initializing eval"

        if rollout_storage is not None:
            num_tasks = sum(
                self.vector_tasks.command(
                    "sampler_attr", ["length"] * self.num_active_samplers
                )
            ) + (  # We need to add this as the first tasks have already been sampled
                self.num_active_samplers
            )
        else:
            num_tasks = 0

        # get_logger().debug("worker {self.worker_id} number of tasks {num_tasks}")
        steps = 0

        self.actor_critic.eval()

        last_time: float = time.time()
        init_time: float = last_time
        frames: int = 0
        if verbose:
            get_logger().info(
                f"[{self.mode} worker {self.worker_id}] Running evaluation on {num_tasks} tasks"
                f" for ckpt {checkpoint_file_path}"
            )

        if self.enforce_expert:
            dist_wrapper_class = partial(
                TeacherForcingDistr,
                action_space=self.actor_critic.action_space,
                num_active_samplers=None,
                approx_steps=None,
                teacher_forcing=None,
                tracking_callback=None,
                always_enforce=True,
            )
        else:
            dist_wrapper_class = None

        logging_pkg = LoggingPackage(
            mode=self.mode,
            training_steps=total_steps,
            storage_uuid_to_total_experiences=self.training_pipeline.storage_uuid_to_total_experiences,
        )
        should_compute_onpolicy_losses = (
            len(eval_pipeline_stage.loss_names) > 0
            and eval_pipeline_stage.stage_components[0].storage_uuid
            == self.training_pipeline.rollout_storage_uuid
        )
        while self.num_active_samplers > 0:
            frames += self.num_active_samplers
            num_newly_paused = self.collect_step_across_all_task_samplers(
                rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid,
                uuid_to_storage=uuid_to_rollout_storage,
                visualizer=visualizer,
                dist_wrapper_class=dist_wrapper_class,
            )
            steps += 1

            if should_compute_onpolicy_losses and num_newly_paused > 0:
                # The `collect_step_across_all_task_samplers` method will automatically drop
                # parts of the rollout storage that correspond to paused tasks (namely by calling"
                # `rollout_storage.sampler_select(UNPAUSED_TASK_INDS)`). This makes sense when you don't need to
                # compute losses for tasks but is a bit limiting here as we're throwing away data before
                # using it to compute losses. As changing this is non-trivial we'll just warn the user
                # for now.
                get_logger().warning(
                    f"[{self.mode} worker {self.worker_id}] {num_newly_paused * rollout_storage.step} steps"
                    f" will be dropped when computing losses in evaluation. This is a limitation of the current"
                    f" implementation of rollout collection in AllenAct. If you'd like to see this"
                    f" functionality improved please submit an issue on GitHub"
                    f" (https://github.com/allenai/allenact/issues)."
                )

            if self.num_active_samplers == 0 or steps % rollout_steps == 0:
                if should_compute_onpolicy_losses and self.num_active_samplers > 0:
                    with torch.no_grad():
                        actor_critic_output, _ = self.actor_critic(
                            **rollout_storage.agent_input_for_next_step()
                        )
                        before_update_info = dict(
                            next_value=actor_critic_output.values.detach(),
                            use_gae=eval_pipeline_stage.training_settings.use_gae,
                            gamma=eval_pipeline_stage.training_settings.gamma,
                            tau=eval_pipeline_stage.training_settings.gae_lambda,
                            adv_stats_callback=lambda advantages: {
                                "mean": advantages.mean(),
                                "std": advantages.std(),
                            },
                        )
                    # Prepare storage for iteration during loss computation
                    for storage in uuid_to_rollout_storage.values():
                        storage.before_updates(**before_update_info)

                    # Compute losses
                    with torch.no_grad():
                        for sc in eval_pipeline_stage.stage_components:
                            self.compute_losses_track_them_and_backprop(
                                stage=eval_pipeline_stage,
                                stage_component=sc,
                                storage=uuid_to_rollout_storage[sc.storage_uuid],
                                skip_backprop=True,
                            )

                for storage in uuid_to_rollout_storage.values():
                    storage.after_updates()

            cur_time = time.time()
            if self.num_active_samplers == 0 or cur_time - last_time >= update_secs:
                logging_pkg = self.aggregate_and_send_logging_package(
                    tracking_info_list=self.tracking_info_list,
                    logging_pkg=logging_pkg,
                    send_logging_package=False,
                )
                self.tracking_info_list.clear()

                if verbose:
                    npending: int
                    lengths: List[int]
                    if self.num_active_samplers > 0:
                        lengths = self.vector_tasks.command(
                            "sampler_attr",
                            ["length"] * self.num_active_samplers,
                        )
                        npending = sum(lengths)
                    else:
                        lengths = []
                        npending = 0
                    est_time_to_complete = (
                        "{:.2f}".format(
                            (
                                (cur_time - init_time)
                                * (npending / (num_tasks - npending))
                                / 60
                            )
                        )
                        if npending != num_tasks
                        else "???"
                    )
                    get_logger().info(
                        f"[{self.mode} worker {self.worker_id}]"
                        f" For ckpt {checkpoint_file_path}"
                        f" {frames / (cur_time - init_time):.1f} fps,"
                        f" {npending}/{num_tasks} tasks pending ({lengths})."
                        f" ~{est_time_to_complete} min. to complete."
                    )
                    if logging_pkg.num_non_empty_metrics_dicts_added != 0:
                        get_logger().info(
                            ", ".join(
                                [
                                    f"[{self.mode} worker {self.worker_id}]"
                                    f" num_{self.mode}_tasks_complete {logging_pkg.num_non_empty_metrics_dicts_added}",
                                    *[
                                        f"{k} {v:.3g}"
                                        for k, v in logging_pkg.metrics_tracker.means().items()
                                    ],
                                    *[
                                        f"{k0[1]}/{k1} {v1:.3g}"
                                        for k0, v0 in logging_pkg.info_trackers.items()
                                        for k1, v1 in v0.means().items()
                                    ],
                                ]
                            )
                        )

                    last_time = cur_time

        get_logger().info(
            f"[{self.mode} worker {self.worker_id}] Task evaluation complete, all task samplers paused."
        )

        if rollout_storage is not None:
            self.vector_tasks.resume_all()
            self.vector_tasks.set_seeds(self.worker_seeds(self.num_samplers, self.seed))
            self.vector_tasks.reset_all()

        logging_pkg = self.aggregate_and_send_logging_package(
            tracking_info_list=self.tracking_info_list,
            logging_pkg=logging_pkg,
            send_logging_package=False,
        )
        self.tracking_info_list.clear()

        logging_pkg.viz_data = (
            visualizer.read_and_reset() if visualizer is not None else None
        )

        should_compute_offpolicy_losses = (
            len(eval_pipeline_stage.loss_names) > 0
            and not should_compute_onpolicy_losses
        )
        if should_compute_offpolicy_losses:
            # In this case we are evaluating a non-rollout storage, e.g. some off-policy data
            get_logger().info(
                f"[{self.mode} worker {self.worker_id}] Non-rollout storage detected, will now compute losses"
                f" using this storage."
            )

            offpolicy_eval_done = False
            while not offpolicy_eval_done:
                before_update_info = dict(
                    next_value=None,
                    use_gae=eval_pipeline_stage.training_settings.use_gae,
                    gamma=eval_pipeline_stage.training_settings.gamma,
                    tau=eval_pipeline_stage.training_settings.gae_lambda,
                    adv_stats_callback=lambda advantages: {
                        "mean": advantages.mean(),
                        "std": advantages.std(),
                    },
                )
                # Prepare storage for iteration during loss computation
                for storage in uuid_to_non_rollout_storage.values():
                    storage.before_updates(**before_update_info)

                # Compute losses
                assert len(eval_pipeline_stage.stage_components) == 1
                try:
                    for sc in eval_pipeline_stage.stage_components:
                        with torch.no_grad():
                            self.compute_losses_track_them_and_backprop(
                                stage=eval_pipeline_stage,
                                stage_component=sc,
                                storage=uuid_to_non_rollout_storage[sc.storage_uuid],
                                skip_backprop=True,
                            )
                except EOFError:
                    offpolicy_eval_done = True

                for storage in uuid_to_non_rollout_storage.values():
                    storage.after_updates()

                total_bsize = sum(
                    tif.info.get("worker_batch_size", 0)
                    for tif in self.tracking_info_list
                )
                logging_pkg = self.aggregate_and_send_logging_package(
                    tracking_info_list=self.tracking_info_list,
                    logging_pkg=logging_pkg,
                    send_logging_package=False,
                )
                self.tracking_info_list.clear()

                cur_time = time.time()
                if verbose and (cur_time - last_time >= update_secs):
                    get_logger().info(
                        f"[{self.mode} worker {self.worker_id}]"
                        f" For ckpt {checkpoint_file_path}"
                        f" {total_bsize / (cur_time - init_time):.1f} its/sec."
                    )
                    if logging_pkg.info_trackers != 0:
                        get_logger().info(
                            ", ".join(
                                [
                                    f"[{self.mode} worker {self.worker_id}]"
                                    f" num_{self.mode}_iters_complete {total_bsize}",
                                    *[
                                        f"{'/'.join(k0)}/{k1} {v1:.3g}"
                                        for k0, v0 in logging_pkg.info_trackers.items()
                                        for k1, v1 in v0.means().items()
                                    ],
                                ]
                            )
                        )

                    last_time = cur_time

        # Call after_updates here to reset all storages
        for storage in uuid_to_storage.values():
            storage.after_updates()

        # Set the training pipeline to `None` so that the storages do not
        # persist across calls to `run_eval`
        self.training_pipeline = None

        logging_pkg.checkpoint_file_name = checkpoint_file_path

        return logging_pkg

    @staticmethod
    def skip_to_latest(checkpoints_queue: mp.Queue, command: Optional[str], data):
        assert (
            checkpoints_queue is not None
        ), "Attempting to process checkpoints queue but this queue is `None`."
        cond = True
        while cond:
            sentinel = ("skip.AUTO.sentinel", time.time())
            checkpoints_queue.put(
                sentinel
            )  # valid since a single valid process is the only consumer
            forwarded = False
            while not forwarded:
                new_command: Optional[str]
                new_data: Any
                (
                    new_command,
                    new_data,
                ) = checkpoints_queue.get()  # block until next command arrives
                if new_command == command:
                    data = new_data
                elif new_command == sentinel[0]:
                    assert (
                        new_data == sentinel[1]
                    ), f"Wrong sentinel found: {new_data} vs {sentinel[1]}"
                    forwarded = True
                else:
                    raise ValueError(
                        f"Unexpected command {new_command} with data {new_data}"
                    )
            time.sleep(1)
            cond = not checkpoints_queue.empty()
        return data

    def process_checkpoints(self):
        assert (
            self.mode != TRAIN_MODE_STR
        ), "process_checkpoints only to be called from a valid or test instance"

        assert (
            self.checkpoints_queue is not None
        ), "Attempting to process checkpoints queue but this queue is `None`."

        visualizer: Optional[VizSuite] = None

        finalized = False
        # noinspection PyBroadException
        try:
            while True:
                command: Optional[str]
                ckp_file_path: Any
                (
                    command,
                    ckp_file_path,
                ) = self.checkpoints_queue.get()  # block until first command arrives
                # get_logger().debug(
                #     "{} {} command {} data {}".format(
                #         self.mode, self.worker_id, command, data
                #     )
                # )

                if command == "eval":
                    if self.mode == VALID_MODE_STR:
                        # skip to latest using
                        # 1. there's only consumer in valid
                        # 2. there's no quit/exit/close message issued by runner nor trainer
                        ckp_file_path = self.skip_to_latest(
                            checkpoints_queue=self.checkpoints_queue,
                            command=command,
                            data=ckp_file_path,
                        )

                    if (
                        visualizer is None
                        and self.machine_params.visualizer is not None
                    ):
                        visualizer = self.machine_params.visualizer

                    eval_package = self.run_eval(
                        checkpoint_file_path=ckp_file_path,
                        visualizer=visualizer,
                        verbose=True,
                        update_secs=20 if self.mode == TEST_MODE_STR else 5 * 60,
                    )

                    self.results_queue.put(eval_package)

                    if self.is_distributed:
                        dist.barrier()
                elif command in ["quit", "exit", "close"]:
                    finalized = True
                    break
                else:
                    raise NotImplementedError()
        except KeyboardInterrupt:
            get_logger().info(
                f"[{self.mode} worker {self.worker_id}] KeyboardInterrupt, exiting."
            )
        except Exception as e:
            get_logger().error(
                f"[{self.mode} worker {self.worker_id}] Encountered {type(e).__name__}, exiting."
            )
            get_logger().error(traceback.format_exc())
        finally:
            if finalized:
                if self.mode == TEST_MODE_STR:
                    self.results_queue.put(("test_stopped", 0))
                get_logger().info(
                    f"[{self.mode} worker {self.worker_id}] Complete, all checkpoints processed."
                )
            else:
                if self.mode == TEST_MODE_STR:
                    self.results_queue.put(("test_stopped", self.worker_id + 1))
            self.close(verbose=self.mode == TEST_MODE_STR)


================================================
FILE: allenact/algorithms/onpolicy_sync/losses/__init__.py
================================================
from .a2cacktr import A2C, ACKTR, A2CACKTR
from .ppo import PPO


================================================
FILE: allenact/algorithms/onpolicy_sync/losses/a2cacktr.py
================================================
"""Implementation of A2C and ACKTR losses."""

from typing import cast, Tuple, Dict, Optional

import torch

from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
    ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.utils.system import get_logger


class A2CACKTR(AbstractActorCriticLoss):
    """Class implementing A2C and ACKTR losses.

    # Attributes

    acktr : `True` if should use ACKTR loss (currently not supported), otherwise uses A2C loss.
    value_loss_coef : Weight of value loss.
    entropy_coef : Weight of entropy (encouraging) loss.
    entropy_method_name : Name of Distr's entropy method name. Default is `entropy`,
                          but we might use `conditional_entropy` for `SequentialDistr`.
    """

    def __init__(
        self,
        value_loss_coef,
        entropy_coef,
        acktr=False,
        entropy_method_name: str = "entropy",
        *args,
        **kwargs,
    ):
        """Initializer.

        See class documentation for parameter definitions.
        """
        super().__init__(*args, **kwargs)
        self.acktr = acktr
        self.loss_key = "a2c_total" if not acktr else "aktr_total"

        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef
        self.entropy_method_name = entropy_method_name

    def loss_per_step(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
    ) -> Dict[str, Tuple[torch.Tensor, Optional[float]]]:
        actions = cast(torch.LongTensor, batch["actions"])
        values = actor_critic_output.values
        action_log_probs = actor_critic_output.distributions.log_prob(actions)
        action_log_probs = action_log_probs.view(
            action_log_probs.shape
            + (1,)
            * (
                len(cast(torch.Tensor, batch["adv_targ"]).shape)
                - len(action_log_probs.shape)
            )
        )

        dist_entropy: torch.FloatTensor = getattr(
            actor_critic_output.distributions, self.entropy_method_name
        )()
        dist_entropy = dist_entropy.view(
            dist_entropy.shape
            + ((1,) * (len(action_log_probs.shape) - len(dist_entropy.shape)))
        )

        value_loss = 0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow(2)

        # TODO: Decided not to use normalized advantages here,
        #   is this correct? (it's how it's done in Kostrikov's)
        action_loss = -(
            cast(torch.FloatTensor, batch["adv_targ"]).detach() * action_log_probs
        )

        if self.acktr:
            # TODO: Currently acktr doesn't really work because of this natural gradient stuff
            #   that we should figure out how to integrate properly.
            get_logger().warning("acktr is only partially supported.")

        return {
            "value": (value_loss, self.value_loss_coef),
            "action": (action_loss, None),
            "entropy": (dist_entropy.mul_(-1.0), self.entropy_coef),  # type: ignore
        }

    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
        *args,
        **kwargs,
    ):
        losses_per_step = self.loss_per_step(
            step_count=step_count,
            batch=batch,
            actor_critic_output=actor_critic_output,
        )
        losses = {
            key: (loss.mean(), weight)
            for (key, (loss, weight)) in losses_per_step.items()
        }

        total_loss = cast(
            torch.Tensor,
            sum(
                loss * weight if weight is not None else loss
                for loss, weight in losses.values()
            ),
        )

        return (
            total_loss,
            {
                self.loss_key: total_loss.item(),
                **{key: loss.item() for key, (loss, _) in losses.items()},
            },
        )


class A2C(A2CACKTR):
    """A2C Loss."""

    def __init__(
        self,
        value_loss_coef,
        entropy_coef,
        entropy_method_name: str = "entropy",
        *args,
        **kwargs,
    ):
        super().__init__(
            value_loss_coef=value_loss_coef,
            entropy_coef=entropy_coef,
            acktr=False,
            entropy_method_name=entropy_method_name,
            *args,
            **kwargs,
        )


class ACKTR(A2CACKTR):
    """ACKTR Loss.

    This code is not supported as it currently lacks an implementation
    for recurrent models.
    """

    def __init__(
        self,
        value_loss_coef,
        entropy_coef,
        entropy_method_name: str = "entropy",
        *args,
        **kwargs,
    ):
        super().__init__(
            value_loss_coef=value_loss_coef,
            entropy_coef=entropy_coef,
            acktr=True,
            entropy_method_name=entropy_method_name,
            *args,
            **kwargs,
        )


A2CConfig = dict(
    value_loss_coef=0.5,
    entropy_coef=0.01,
)


================================================
FILE: allenact/algorithms/onpolicy_sync/losses/abstract_loss.py
================================================
"""Defining abstract loss classes for actor critic models."""

import abc
from typing import Dict, Tuple, Union

import torch

from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import Loss, ActorCriticOutput


class AbstractActorCriticLoss(Loss):
    """Abstract class representing a loss function used to train an
    ActorCriticModel."""

    # noinspection PyMethodOverriding
    @abc.abstractmethod
    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
        *args,
        **kwargs,
    ) -> Union[
        Tuple[torch.FloatTensor, Dict[str, float]],
        Tuple[torch.FloatTensor, Dict[str, float], Dict[str, float]],
    ]:
        """Computes the loss.

        # Parameters

        batch : A batch of data corresponding to the information collected when rolling out (possibly many) agents
            over a fixed number of steps. In particular this batch should have the same format as that returned by
            `RolloutStorage.batched_experience_generator`.
        actor_critic_output : The output of calling an ActorCriticModel on the observations in `batch`.
        args : Extra args.
        kwargs : Extra kwargs.

        # Returns

        A (0-dimensional) torch.FloatTensor corresponding to the computed loss. `.backward()` will be called on this
        tensor in order to compute a gradient update to the ActorCriticModel's parameters.
        A Dict[str, float] with scalar values corresponding to sub-losses.
        An optional Dict[str, float] with scalar values corresponding to extra info to be processed per epoch and
        combined across epochs by the engine.
        """
        # TODO: The above documentation is missing what the batch dimensions are.

        raise NotImplementedError()


================================================
FILE: allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.py
================================================
import functools
from typing import Dict, cast, Sequence, Set

import torch

from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput


class GroupedActionImitation(AbstractActorCriticLoss):
    def __init__(
        self, nactions: int, action_groups: Sequence[Set[int]], *args, **kwargs
    ):
        super().__init__(*args, **kwargs)

        assert (
            sum(len(ag) for ag in action_groups) == nactions
            and len(functools.reduce(lambda x, y: x | y, action_groups)) == nactions
        ), f"`action_groups` (==`{action_groups}`) must be a partition of `[0, 1, 2, ..., nactions - 1]`"

        self.nactions = nactions
        self.action_groups_mask = torch.FloatTensor(
            [
                [i in action_group for i in range(nactions)]
                for action_group in action_groups
            ]
            + [[1] * nactions]  # type:ignore
        )

    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
        *args,
        **kwargs,
    ):
        observations = cast(Dict[str, torch.Tensor], batch["observations"])

        assert "expert_group_action" in observations

        expert_group_actions = observations["expert_group_action"]

        # expert_group_actions = expert_group_actions + (expert_group_actions == -1).long() * (
        #     1 + self.action_groups_mask.shape[0]
        # )

        if self.action_groups_mask.get_device() != expert_group_actions.get_device():
            self.action_groups_mask = cast(
                torch.FloatTensor,
                self.action_groups_mask.cuda(expert_group_actions.get_device()),
            )

        expert_group_actions_reshaped = expert_group_actions.view(-1, 1)

        expert_group_actions_mask = self.action_groups_mask[
            expert_group_actions_reshaped
        ]

        probs_tensor = actor_critic_output.distributions.probs_tensor
        expert_group_actions_mask = expert_group_actions_mask.view(probs_tensor.shape)

        total_loss = -(
            torch.log((probs_tensor * expert_group_actions_mask).sum(-1))
        ).mean()

        return total_loss, {
            "grouped_action_cross_entropy": total_loss.item(),
        }


================================================
FILE: allenact/algorithms/onpolicy_sync/losses/imitation.py
================================================
"""Defining imitation losses for actor critic type models."""

from collections import OrderedDict
from typing import Dict, cast, Optional, Union

import torch

import allenact.utils.spaces_utils as su
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
    ObservationType,
)
from allenact.base_abstractions.distributions import (
    Distr,
    CategoricalDistr,
    SequentialDistr,
    ConditionalDistr,
)
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.base_abstractions.sensor import AbstractExpertSensor


class Imitation(AbstractActorCriticLoss):
    """Expert imitation loss."""

    def __init__(
        self, expert_sensor: Optional[AbstractExpertSensor] = None, *args, **kwargs
    ):
        super().__init__(*args, **kwargs)

        self.expert_sensor = expert_sensor

    @staticmethod
    def group_loss(
        distribution: Union[CategoricalDistr, ConditionalDistr],
        expert_actions: torch.Tensor,
        expert_actions_masks: torch.Tensor,
    ):
        assert isinstance(distribution, CategoricalDistr) or (
            isinstance(distribution, ConditionalDistr)
            and isinstance(distribution.distr, CategoricalDistr)
        ), "This implementation only supports (groups of) `CategoricalDistr`"

        expert_successes = expert_actions_masks.sum()

        log_probs = distribution.log_prob(cast(torch.LongTensor, expert_actions))
        assert (
            log_probs.shape[: len(expert_actions_masks.shape)]
            == expert_actions_masks.shape
        )

        # Add dimensions to `expert_actions_masks` on the right to allow for masking
        # if necessary.
        len_diff = len(log_probs.shape) - len(expert_actions_masks.shape)
        assert len_diff >= 0
        expert_actions_masks = expert_actions_masks.view(
            *expert_actions_masks.shape, *((1,) * len_diff)
        )

        group_loss = -(expert_actions_masks * log_probs).sum() / torch.clamp(
            expert_successes, min=1
        )

        return group_loss, expert_successes

    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[Distr],
        *args,
        **kwargs,
    ):
        """Computes the imitation loss.

        # Parameters

        batch : A batch of data corresponding to the information collected when rolling out (possibly many) agents
            over a fixed number of steps. In particular this batch should have the same format as that returned by
            `RolloutStorage.batched_experience_generator`.
            Here `batch["observations"]` must contain `"expert_action"` observations
            or `"expert_policy"` observations. See `ExpertActionSensor` (or `ExpertPolicySensor`) for an example of
            a sensor producing such observations.
        actor_critic_output : The output of calling an ActorCriticModel on the observations in `batch`.
        args : Extra args. Ignored.
        kwargs : Extra kwargs. Ignored.

        # Returns

        A (0-dimensional) torch.FloatTensor corresponding to the computed loss. `.backward()` will be called on this
        tensor in order to compute a gradient update to the ActorCriticModel's parameters.
        """
        observations = cast(Dict[str, torch.Tensor], batch["observations"])

        losses = OrderedDict()

        should_report_loss = False

        if "expert_action" in observations:
            if self.expert_sensor is None or not self.expert_sensor.use_groups:
                expert_actions_and_mask = observations["expert_action"]

                assert expert_actions_and_mask.shape[-1] == 2
                expert_actions_and_mask_reshaped = expert_actions_and_mask.view(-1, 2)

                expert_actions = expert_actions_and_mask_reshaped[:, 0].view(
                    *expert_actions_and_mask.shape[:-1], 1
                )
                expert_actions_masks = (
                    expert_actions_and_mask_reshaped[:, 1]
                    .float()
                    .view(*expert_actions_and_mask.shape[:-1], 1)
                )

                total_loss, expert_successes = self.group_loss(
                    cast(CategoricalDistr, actor_critic_output.distributions),
                    expert_actions,
                    expert_actions_masks,
                )

                should_report_loss = expert_successes.item() != 0
            else:
                expert_actions = su.unflatten(
                    self.expert_sensor.observation_space, observations["expert_action"]
                )

                total_loss = 0

                ready_actions = OrderedDict()

                for group_name, cd in zip(
                    self.expert_sensor.group_spaces,
                    cast(
                        SequentialDistr, actor_critic_output.distributions
                    ).conditional_distrs,
                ):
                    assert group_name == cd.action_group_name

                    cd.reset()
                    cd.condition_on_input(**ready_actions)

                    expert_action = expert_actions[group_name][
                        AbstractExpertSensor.ACTION_POLICY_LABEL
                    ]
                    expert_action_masks = expert_actions[group_name][
                        AbstractExpertSensor.EXPERT_SUCCESS_LABEL
                    ]

                    ready_actions[group_name] = expert_action

                    current_loss, expert_successes = self.group_loss(
                        cd,
                        expert_action,
                        expert_action_masks,
                    )

                    should_report_loss = (
                        expert_successes.item() != 0 or should_report_loss
                    )

                    cd.reset()

                    if expert_successes.item() != 0:
                        losses[group_name + "_cross_entropy"] = current_loss.item()
                        total_loss = total_loss + current_loss
        elif "expert_policy" in observations:
            if self.expert_sensor is None or not self.expert_sensor.use_groups:
                assert isinstance(
                    actor_critic_output.distributions, CategoricalDistr
                ), "This implementation currently only supports `CategoricalDistr`"

                expert_policies = cast(Dict[str, torch.Tensor], batch["observations"])[
                    "expert_policy"
                ][..., :-1]
                expert_actions_masks = cast(
                    Dict[str, torch.Tensor], batch["observations"]
                )["expert_policy"][..., -1:]

                expert_successes = expert_actions_masks.sum()
                if expert_successes.item() > 0:
                    should_report_loss = True

                log_probs = cast(
                    CategoricalDistr, actor_critic_output.distributions
                ).log_probs_tensor

                # Add dimensions to `expert_actions_masks` on the right to allow for masking
                # if necessary.
                len_diff = len(log_probs.shape) - len(expert_actions_masks.shape)
                assert len_diff >= 0
                expert_actions_masks = expert_actions_masks.view(
                    *expert_actions_masks.shape, *((1,) * len_diff)
                )

                total_loss = (
                    -(log_probs * expert_policies) * expert_actions_masks
                ).sum() / torch.clamp(expert_successes, min=1)
            else:
                raise NotImplementedError(
                    "This implementation currently only supports `CategoricalDistr`"
                )
        else:
            raise NotImplementedError(
                "Imitation loss requires either `expert_action` or `expert_policy`"
                " sensor to be active."
            )
        return (
            total_loss,
            (
                {"expert_cross_entropy": total_loss.item(), **losses}
                if should_report_loss
                else {}
            ),
        )


================================================
FILE: allenact/algorithms/onpolicy_sync/losses/ppo.py
================================================
"""Defining the PPO loss for actor critic type models."""

from typing import Dict, Optional, Callable, cast, Tuple

import torch

from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
    ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput


class PPO(AbstractActorCriticLoss):
    """Implementation of the Proximal Policy Optimization loss.

    # Attributes

    clip_param : The clipping parameter to use.
    value_loss_coef : Weight of the value loss.
    entropy_coef : Weight of the entropy (encouraging) loss.
    use_clipped_value_loss : Whether or not to also clip the value loss.
    clip_decay : Callable for clip param decay factor (function of the current number of steps)
    entropy_method_name : Name of Distr's entropy method name. Default is `entropy`,
                          but we might use `conditional_entropy` for `SequentialDistr`
    show_ratios : If True, adds tracking for the PPO ratio (linear, clamped, and used) in each
                  epoch to be logged by the engine.
    normalize_advantage: Whether or not to use normalized advantage. Default is True.
    """

    def __init__(
        self,
        clip_param: float,
        value_loss_coef: float,
        entropy_coef: float,
        use_clipped_value_loss=True,
        clip_decay: Optional[Callable[[int], float]] = None,
        entropy_method_name: str = "entropy",
        normalize_advantage: bool = True,
        show_ratios: bool = False,
        *args,
        **kwargs
    ):
        """Initializer.

        See the class documentation for parameter definitions.
        """
        super().__init__(*args, **kwargs)
        self.clip_param = clip_param
        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef
        self.use_clipped_value_loss = use_clipped_value_loss
        self.clip_decay = clip_decay if clip_decay is not None else (lambda x: 1.0)
        self.entropy_method_name = entropy_method_name
        self.show_ratios = show_ratios
        if normalize_advantage:
            self.adv_key = "norm_adv_targ"
        else:
            self.adv_key = "adv_targ"

    def loss_per_step(
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
    ) -> Tuple[
        Dict[str, Tuple[torch.Tensor, Optional[float]]], Dict[str, torch.Tensor]
    ]:  # TODO tuple output

        actions = cast(torch.LongTensor, batch["actions"])
        values = actor_critic_output.values

        action_log_probs = actor_critic_output.distributions.log_prob(actions)
        dist_entropy: torch.FloatTensor = getattr(
            actor_critic_output.distributions, self.entropy_method_name
        )()

        def add_trailing_dims(t: torch.Tensor):
            assert len(t.shape) <= len(batch[self.adv_key].shape)
            return t.view(
                t.shape + ((1,) * (len(batch[self.adv_key].shape) - len(t.shape)))
            )

        dist_entropy = add_trailing_dims(dist_entropy)

        clip_param = self.clip_param * self.clip_decay(step_count)

        ratio = torch.exp(action_log_probs - batch["old_action_log_probs"])
        ratio = add_trailing_dims(ratio)
        clamped_ratio = torch.clamp(ratio, 1.0 - clip_param, 1.0 + clip_param)

        surr1 = ratio * batch[self.adv_key]
        surr2 = clamped_ratio * batch[self.adv_key]

        use_clamped = surr2 < surr1
        action_loss = -torch.where(cast(torch.Tensor, use_clamped), surr2, surr1)

        if self.use_clipped_value_loss:
            value_pred_clipped = batch["values"] + (values - batch["values"]).clamp(
                -clip_param, clip_param
            )
            value_losses = (values - batch["returns"]).pow(2)
            value_losses_clipped = (value_pred_clipped - batch["returns"]).pow(2)
            value_loss = 0.5 * torch.max(value_losses, value_losses_clipped)
        else:
            value_loss = 0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow(
                2
            )

        # noinspection PyUnresolvedReferences
        return (
            {
                "value": (value_loss, self.value_loss_coef),
                "action": (action_loss, None),
                "entropy": (dist_entropy.mul_(-1.0), self.entropy_coef),  # type: ignore
            },
            (
                {
                    "ratio": ratio,
                    "ratio_clamped": clamped_ratio,
                    "ratio_used": torch.where(
                        cast(torch.Tensor, use_clamped), clamped_ratio, ratio
                    ),
                }
                if self.show_ratios
                else {}
            ),
        )

    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
        *args,
        **kwargs
    ):
        losses_per_step, ratio_info = self.loss_per_step(
            step_count=step_count,
            batch=batch,
            actor_critic_output=actor_critic_output,
        )
        losses = {
            key: (loss.mean(), weight)
            for (key, (loss, weight)) in losses_per_step.items()
        }

        total_loss = sum(
            loss * weight if weight is not None else loss
            for loss, weight in losses.values()
        )

        result = (
            total_loss,
            {
                "ppo_total": cast(torch.Tensor, total_loss).item(),
                **{key: loss.item() for key, (loss, _) in losses.items()},
            },
            {key: float(value.mean().item()) for key, value in ratio_info.items()},
        )

        return result if self.show_ratios else result[:2]


class PPOValue(AbstractActorCriticLoss):
    """Implementation of the Proximal Policy Optimization loss.

    # Attributes

    clip_param : The clipping parameter to use.
    use_clipped_value_loss : Whether or not to also clip the value loss.
    """

    def __init__(
        self,
        clip_param: float,
        use_clipped_value_loss=True,
        clip_decay: Optional[Callable[[int], float]] = None,
        *args,
        **kwargs
    ):
        """Initializer.

        See the class documentation for parameter definitions.
        """
        super().__init__(*args, **kwargs)
        self.clip_param = clip_param
        self.use_clipped_value_loss = use_clipped_value_loss
        self.clip_decay = clip_decay if clip_decay is not None else (lambda x: 1.0)

    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
        *args,
        **kwargs
    ):
        values = actor_critic_output.values
        clip_param = self.clip_param * self.clip_decay(step_count)

        if self.use_clipped_value_loss:
            value_pred_clipped = batch["values"] + (values - batch["values"]).clamp(
                -clip_param, clip_param
            )
            value_losses = (values - batch["returns"]).pow(2)
            value_losses_clipped = (value_pred_clipped - batch["returns"]).pow(2)
            value_loss = 0.5 * torch.max(value_losses, value_losses_clipped).mean()
        else:
            value_loss = (
                0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow(2).mean()
            )

        return (
            value_loss,
            {
                "value": value_loss.item(),
            },
        )


PPOConfig = dict(clip_param=0.1, value_loss_coef=0.5, entropy_coef=0.01)


================================================
FILE: allenact/algorithms/onpolicy_sync/misc.py
================================================
from enum import Enum
from typing import Dict, Any, Optional

import attr


class TrackingInfoType(Enum):
    LOSS = "loss"
    TEACHER_FORCING = "teacher_forcing"
    UPDATE_INFO = "update_info"


@attr.s(kw_only=True)
class TrackingInfo:
    type: TrackingInfoType = attr.ib()
    info: Dict[str, Any] = attr.ib()
    n: int = attr.ib()
    storage_uuid: Optional[str] = attr.ib()
    stage_component_uuid: Optional[str] = attr.ib()


================================================
FILE: allenact/algorithms/onpolicy_sync/policy.py
================================================
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import abc
from collections import OrderedDict
from typing import TypeVar, Generic, Tuple, Optional, Union, Dict, List, Any

import gym
import torch
from gym.spaces.dict import Dict as SpaceDict
import torch.nn as nn

from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput, Memory

DistributionType = TypeVar("DistributionType")

MemoryDimType = Tuple[str, Optional[int]]
MemoryShapeType = Tuple[MemoryDimType, ...]
MemorySpecType = Tuple[MemoryShapeType, torch.dtype]
FullMemorySpecType = Dict[str, MemorySpecType]

ObservationType = Dict[str, Union[torch.Tensor, Dict[str, Any]]]
ActionType = Union[torch.Tensor, OrderedDict, Tuple, int]


class ActorCriticModel(Generic[DistributionType], nn.Module):
    """Abstract class defining a deep (recurrent) actor critic agent.

    When defining a new agent, you should subclass this class and implement the abstract methods.

    # Attributes

    action_space : The space of actions available to the agent. This is of type `gym.spaces.Space`.
    observation_space: The observation space expected by the agent. This is of type `gym.spaces.dict`.
    """

    def __init__(self, action_space: gym.Space, observation_space: SpaceDict):
        """Initializer.

        # Parameters

        action_space : The space of actions available to the agent.
        observation_space: The observation space expected by the agent.
        """
        super().__init__()
        self.action_space = action_space
        self.observation_space = observation_space
        self.memory_spec: Optional[List[Optional[FullMemorySpecType]]] = None

    @property
    def recurrent_memory_specification(self) -> Optional[FullMemorySpecType]:
        """The memory specification for the `ActorCriticModel`. See docs for
        `_recurrent_memory_shape`

        # Returns

        The memory specification from `_recurrent_memory_shape`.
        """
        if self.memory_spec is None:
            self.memory_spec = [self._recurrent_memory_specification()]

            spec = self.memory_spec[0]

            if spec is None:
                return None

            for key in spec:
                dims, _ = spec[key]
                dim_names = [d[0] for d in dims]

                assert (
                    "step" not in dim_names
                ), "`step` is automatically added and cannot be reused"

                assert "sampler" in dim_names, "`sampler` dim must be defined"

        return self.memory_spec[0]

    @abc.abstractmethod
    def _recurrent_memory_specification(self) -> Optional[FullMemorySpecType]:
        """Implementation of memory specification for the `ActorCriticModel`.

        # Returns

        If None, it indicates the model is memory-less.
        Otherwise, it is a one-level dictionary (a map) with string keys (memory type identification) and
        tuple values (memory type specification). Each specification tuple contains:
        1. Memory type named shape, e.g.
        `(("layer", 1), ("sampler", None), ("agent", 2), ("hidden", 32))`
        for a two-agent GRU memory, where
        the `sampler` dimension placeholder *always* precedes the optional `agent` dimension;
        the optional `agent` dimension has the number of agents in the model and is *always* the one after
        `sampler` if present;
        and `layer` and `hidden` correspond to the standard RNN hidden state parametrization.
        2. The data type, e.g. `torch.float32`.

        The `sampler` dimension placeholder is mandatory for all memories.

        For a single-agent ActorCritic model it is often more convenient to skip the agent dimension, e.g.
        `(("layer", 1), ("sampler", None), ("hidden", 32))` for a GRU memory.
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: ActionType,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        """Transforms input observations (& previous hidden state) into action
        probabilities and the state value.

        # Parameters

        observations : Multi-level map from key strings to tensors of shape [steps, samplers, (agents,) ...] with the
                       current observations.
        memory : `Memory` object with recurrent memory. The shape of each tensor is determined by the corresponding
                 entry in `_recurrent_memory_specification`.
        prev_actions : ActionType with tensors of shape [steps, samplers, ...] with the previous actions.
        masks : tensor of shape [steps, samplers, agents, 1] with zeros indicating steps where a new episode/task
                starts.

        # Returns

        A tuple whose first element is an object of class ActorCriticOutput which stores
        the agents' probability distribution over possible actions (shape [steps, samplers, ...]),
        the agents' value for the state (shape [steps, samplers, ..., 1]), and any extra information needed for
        loss computations. The second element is an optional `Memory`, which is only used in models with recurrent
        memory.
        """
        raise NotImplementedError()


class LinearActorCriticHead(nn.Module):
    def __init__(self, input_size: int, num_actions: int):
        super().__init__()
        self.input_size = input_size
        self.num_actions = num_actions
        self.actor_and_critic = nn.Linear(input_size, 1 + num_actions)

        nn.init.orthogonal_(self.actor_and_critic.weight)
        nn.init.constant_(self.actor_and_critic.bias, 0)

    def forward(self, x) -> Tuple[CategoricalDistr, torch.Tensor]:
        out = self.actor_and_critic(x)

        logits = out[..., :-1]
        values = out[..., -1:]
        # noinspection PyArgumentList
        return (
            # logits are [step, sampler, ...]
            CategoricalDistr(logits=logits),
            # values are [step, sampler, flattened]
            values.view(*values.shape[:2], -1),
        )


class LinearCriticHead(nn.Module):
    def __init__(self, input_size: int):
        super().__init__()
        self.fc = nn.Linear(input_size, 1)
        nn.init.orthogonal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)

    def forward(self, x):
        return self.fc(x).view(*x.shape[:2], -1)  # [steps, samplers, flattened]


class LinearActorHead(nn.Module):
    def __init__(self, num_inputs: int, num_outputs: int):
        super().__init__()

        self.linear = nn.Linear(num_inputs, num_outputs)
        nn.init.orthogonal_(self.linear.weight, gain=0.01)
        nn.init.constant_(self.linear.bias, 0)

    def forward(self, x: torch.FloatTensor):  # type: ignore
        x = self.linear(x)  # type:ignore

        # noinspection PyArgumentList
        return CategoricalDistr(logits=x)  # logits are [step, sampler, ...]


================================================
FILE: allenact/algorithms/onpolicy_sync/runner.py
================================================
"""Defines the reinforcement learning `OnPolicyRunner`."""

import copy
import enum
import glob
import importlib.util
import inspect
import itertools
import json
import math
import os
import pathlib
import queue
import random
import signal
import subprocess
import sys
import time
import traceback
from collections import defaultdict
from multiprocessing.context import BaseContext
from multiprocessing.process import BaseProcess
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, Set

import filelock
import numpy as np
import torch
import torch.multiprocessing as mp
from setproctitle import setproctitle as ptitle
from torch.distributions.utils import lazy_property

from allenact.algorithms.onpolicy_sync.engine import (
    TEST_MODE_STR,
    TRAIN_MODE_STR,
    VALID_MODE_STR,
    OnPolicyInference,
    OnPolicyRLEngine,
    OnPolicyTrainer,
)
from allenact.base_abstractions.callbacks import Callback
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.sensor import Sensor
from allenact.utils.experiment_utils import (
    LoggingPackage,
    ScalarMeanTracker,
    set_deterministic_cudnn,
    set_seed,
    download_checkpoint_from_wandb,
)
from allenact.utils.misc_utils import (
    NumpyJSONEncoder,
    all_equal,
    get_git_diff_of_project,
)
from allenact.utils.model_utils import md5_hash_of_state_dict
from allenact.utils.system import find_free_port, get_logger
from allenact.utils.tensor_utils import SummaryWriter
from allenact.utils.viz_utils import VizSuite

CONFIG_KWARGS_STR = "__CONFIG_KWARGS__"


class SaveDirFormat(enum.Enum):
    """Directory formats that can be used when saving tensorboard logs,
    checkpoints, etc.

    during training/evaluation.
    FLAT: the first-level directories are logs, checkpoints, metrics, etc; the second-level are time strings of each experiment
    NESTED: the opposite to FLAT.
    """

    FLAT = "FLAT"
    NESTED = "NESTED"


# Has results queue (aggregated per trainer), checkpoints queue and mp context
# Instantiates train, validate, and test workers
# Logging
# Saves configs, makes folder for trainer models
class OnPolicyRunner(object):
    def __init__(
        self,
        config: ExperimentConfig,
        output_dir: str,
        loaded_config_src_files: Optional[Dict[str, str]],
        seed: Optional[int] = None,
        mode: str = "train",
        deterministic_cudnn: bool = False,
        deterministic_agents: bool = False,
        mp_ctx: Optional[BaseContext] = None,
        multiprocessing_start_method: str = "default",
        extra_tag: str = "",
        disable_tensorboard: bool = False,
        disable_config_saving: bool = False,
        distributed_ip_and_port: str = "127.0.0.1:0",
        distributed_preemption_threshold: float = 0.7,
        machine_id: int = 0,
        save_dir_fmt: SaveDirFormat = SaveDirFormat.FLAT,
        callbacks_paths: Optional[str] = None,
    ):
        self.config = config
        self.output_dir = output_dir
        self.loaded_config_src_files = loaded_config_src_files
        self.seed = seed if seed is not None else random.randint(0, 2**31 - 1)
        self.deterministic_cudnn = deterministic_cudnn
        self.distributed_preemption_threshold = distributed_preemption_threshold
        if multiprocessing_start_method == "default":
            if torch.cuda.is_available():
                multiprocessing_start_method = "forkserver"
            else:
                # Spawn seems to play nicer with cpus and debugging
                multiprocessing_start_method = "spawn"
        self.mp_ctx = self.init_context(mp_ctx, multiprocessing_start_method)
        self.extra_tag = extra_tag
        self.mode = mode.lower().strip()
        self.visualizer: Optional[VizSuite] = None
        self.deterministic_agents = deterministic_agents
        self.disable_tensorboard = disable_tensorboard
        self.disable_config_saving = disable_config_saving

        assert self.mode in [
            TRAIN_MODE_STR,
            TEST_MODE_STR,
        ], "Only 'train' and 'test' modes supported in runner"

        if self.deterministic_cudnn:
            set_deterministic_cudnn()

        set_seed(self.seed)

        self.queues: Optional[Dict[str, mp.Queue]] = None

        self.processes: Dict[str, List[Union[BaseProcess, mp.Process]]] = defaultdict(
            list
        )

        self.current_checkpoint = None

        self._local_start_time_str: Optional[str] = None

        self._is_closed: bool = False

        self._collect_valid_results: bool = False

        self.distributed_ip_and_port = distributed_ip_and_port
        self.machine_id = machine_id

        self.save_dir_fmt = save_dir_fmt

        self.callbacks_paths = callbacks_paths

    @lazy_property
    def callbacks(self):
        return self.setup_callback_classes(self.callbacks_paths)

    @property
    def local_start_time_str(self) -> str:
        if self._local_start_time_str is None:
            raise RuntimeError(
                "Local start time string does not exist as neither `start_train()` or `start_test()`"
                " has been called on this runner."
            )
        return self._local_start_time_str

    @property
    def running_validation(self):
        pipeline = self.config.training_pipeline()
        return (
            sum(
                MachineParams.instance_from(
                    self.config.machine_params(VALID_MODE_STR)
                ).nprocesses
            )
            > 0
            or (
                pipeline.rollout_storage_uuid is None
                and len(pipeline.valid_pipeline_stage.loss_names) > 0
            )
        ) and self.machine_id == 0

    @staticmethod
    def init_context(
        mp_ctx: Optional[BaseContext] = None,
        multiprocessing_start_method: str = "forkserver",
        valid_start_methods: Tuple[str, ...] = ("forkserver", "spawn", "fork"),
    ):
        if mp_ctx is None:
            assert multiprocessing_start_method in valid_start_methods, (
                f"multiprocessing_start_method must be one of {valid_start_methods}."
                f" Got '{multiprocessing_start_method}'"
            )

            mp_ctx = mp.get_context(multiprocessing_start_method)
        elif multiprocessing_start_method != mp_ctx.get_start_method():
            get_logger().warning(
                f"ignoring multiprocessing_start_method '{multiprocessing_start_method}'"
                f" and using given context with '{mp_ctx.get_start_method()}'"
            )

        return mp_ctx

    def setup_callback_classes(self, callbacks: Optional[str]) -> Set[Callback]:
        """Get a list of Callback classes from a comma-separated list of files,
        paths, and/or functions.

        After separating the `callbacks` into a list of strings, each string should either
        be a:
        1. Name of a function defined on the experiment config that, when called, returns an
           object with of type `Callback`.
        2. Path to a python file containing a single class that inherits from `Callback`.
        3. Module path (e.g. `path.to.module`) where this module contains a single class that
            inherits from `Callback`.
        """
        if callbacks == "" or callbacks is None:
            return set()

        setup_dict = dict(
            name=f"{self.experiment_name}/{self.local_start_time_str}",
            config=self.config,
            mode=self.mode,
        )

        callback_objects = set()
        files = callbacks.split(",")
        for filename in files:
            # Check if the `filename` is a function on the config
            if not any(k in filename for k in [".", "/"]):
                callback_func = getattr(self.config, filename, None)
                if callback_func is not None:
                    callback = callback_func()
                    callback.setup(**setup_dict)
                    callback_objects.add(callback)
                    continue

            # Otherwise find the Callback class in the file or module
            module_path = filename.replace("/", ".")
            if module_path.endswith(".py"):
                module_path = module_path[:-3]
            module = importlib.import_module(module_path)
            classes = inspect.getmembers(module, inspect.isclass)

            callback_classes = [
                mod_class[1]
                for mod_class in classes
                if issubclass(mod_class[1], Callback)
            ]

            assert callback_classes == 1, (
                f"Expected a single callback class in {filename}, but found {len(callback_classes)}."
                f" These classes were found: {callback_classes}."
            )

            for mod_class in callback_classes:
                # NOTE: initialize the callback class
                callback = mod_class[1]()
                callback.setup(**setup_dict)
                callback_objects.add(callback)

        return callback_objects

    def _acquire_unique_local_start_time_string(self) -> str:
        """Creates a (unique) local start time string for this experiment.

        Ensures through file locks that the local start time string
        produced is unique. This implies that, if one has many
        experiments starting in parallel, at most one will be started
        every second (as the local start time string only records the
        time up to the current second).
        """
        os.makedirs(self.output_dir, exist_ok=True)
        start_time_string_lock_path = os.path.abspath(
            os.path.join(self.output_dir, ".allenact_start_time_string.lock")
        )
        try:
            with filelock.FileLock(start_time_string_lock_path, timeout=60):
                last_start_time_string_path = os.path.join(
                    self.output_dir, ".allenact_last_start_time_string"
                )
                pathlib.Path(last_start_time_string_path).touch()

                with open(last_start_time_string_path, "r") as f:
                    last_start_time_string_list = f.readlines()

                while True:
                    candidate_str = time.strftime(
                        "%Y-%m-%d_%H-%M-%S", time.localtime(time.time())
                    )
                    if (
                        len(last_start_time_string_list) == 0
                        or last_start_time_string_list[0].strip() != candidate_str
                    ):
                        break
                    time.sleep(0.2)

                with open(last_start_time_string_path, "w") as f:
                    f.write(candidate_str)

        except filelock.Timeout as e:
            get_logger().exception(
                f"Could not acquire the lock for {start_time_string_lock_path} for 60 seconds,"
                " this suggests an unexpected deadlock. Please close all AllenAct training processes,"
                " delete this lockfile, and try again."
            )
            raise e

        assert candidate_str is not None
        return candidate_str

    def worker_devices(self, mode: str):
        machine_params: MachineParams = MachineParams.instance_from(
            self.config.machine_params(mode)
        )
        devices = machine_params.devices

        assert all_equal(devices) or all(
            d.index >= 0 for d in devices
        ), f"Cannot have a mix of CPU and GPU devices (`devices == {devices}`)"

        get_logger().info(f"Using {len(devices)} {mode} workers on devices {devices}")
        return devices

    def local_worker_ids(self, mode: str):
        machine_params: MachineParams = MachineParams.instance_from(
            self.config.machine_params(mode, machine_id=self.machine_id)
        )
        ids = machine_params.local_worker_ids

        get_logger().info(
            f"Using local worker ids {ids} (total {len(ids)} workers in machine {self.machine_id})"
        )

        return ids

    def init_visualizer(self, mode: str):
        if not self.disable_tensorboard:
            # Note: Avoid instantiating anything in machine_params (use Builder if needed)
            machine_params = MachineParams.instance_from(
                self.config.machine_params(mode)
            )
            self.visualizer = machine_params.visualizer

    @staticmethod
    def init_process(mode: str, id: int, to_close_on_termination: OnPolicyRLEngine):
        ptitle(f"{mode}-{id}")

        def create_handler(termination_type: str):
            def handler(_signo, _frame):
                prefix = f"{termination_type} signal sent to worker {mode}-{id}."
                if to_close_on_termination.is_closed:
                    get_logger().info(
                        f"{prefix} Worker {mode}-{id} is already closed, exiting."
                    )
                    sys.exit(0)
                elif not to_close_on_termination.is_closing:
                    get_logger().info(
                        f"{prefix} Forcing worker {mode}-{id} to close and exiting."
                    )
                    # noinspection PyBroadException
                    try:
                        to_close_on_termination.close(True)
                    except Exception:
                        get_logger().error(
                            f"Error occurred when closing the RL engine used by work {mode}-{id}."
                            f" We cannot recover from this and will simply exit. The exception:\n"
                            f"{traceback.format_exc()}"
                        )
                        sys.exit(1)
                    sys.exit(0)
                else:
                    get_logger().info(
                        f"{prefix} Worker {mode}-{id} is already closing, ignoring this signal."
                    )

            return handler

        signal.signal(signal.SIGTERM, create_handler("Termination"))
        signal.signal(signal.SIGINT, create_handler("Interrupt"))

    @staticmethod
    def init_worker(engine_class, args, kwargs):
        mode = kwargs["mode"]
        id = kwargs["worker_id"]

        worker = None
        try:
            worker = engine_class(*args, **kwargs)
        except Exception:
            get_logger().error(f"Encountered Exception. Terminating {mode} worker {id}")
            get_logger().exception(traceback.format_exc())
            kwargs["results_queue"].put((f"{mode}_stopped", 1 + id))
        finally:
            return worker

    @lazy_property
    def _get_callback_sensors(self) -> List[Sensor]:
        callback_sensors: List[Sensor] = []
        for c in self.callbacks:
            sensors = c.callback_sensors()
            if sensors is not None:
                callback_sensors.extend(sensors)
        return callback_sensors

    @staticmethod
    def train_loop(
        id: int = 0,
        checkpoint: Optional[str] = None,
        restart_pipeline: bool = False,
        valid_on_initial_weights: bool = False,
        *engine_args,
        **engine_kwargs,
    ):
        engine_kwargs["mode"] = TRAIN_MODE_STR
        engine_kwargs["worker_id"] = id
        engine_kwargs_for_print = {
            k: (v if k != "initial_model_state_dict" else "[SUPPRESSED]")
            for k, v in engine_kwargs.items()
        }
        get_logger().info(f"train {id} args {engine_kwargs_for_print}")

        trainer: OnPolicyTrainer = OnPolicyRunner.init_worker(
            engine_class=OnPolicyTrainer, args=engine_args, kwargs=engine_kwargs
        )
        if trainer is not None:
            OnPolicyRunner.init_process("Train", id, to_close_on_termination=trainer)
            trainer.train(
                checkpoint_file_name=checkpoint,
                restart_pipeline=restart_pipeline,
                valid_on_initial_weights=valid_on_initial_weights,
            )

    @staticmethod
    def valid_loop(id: int = 0, *engine_args, **engine_kwargs):
        engine_kwargs["mode"] = VALID_MODE_STR
        engine_kwargs["worker_id"] = id
        get_logger().info(f"valid {id} args {engine_kwargs}")

        valid = OnPolicyRunner.init_worker(
            engine_class=OnPolicyInference, args=engine_args, kwargs=engine_kwargs
        )
        if valid is not None:
            OnPolicyRunner.init_process("Valid", id, to_close_on_termination=valid)
            valid.process_checkpoints()  # gets checkpoints via queue

    @staticmethod
    def test_loop(id: int = 0, *engine_args, **engine_kwargs):
        engine_kwargs["mode"] = TEST_MODE_STR
        engine_kwargs["worker_id"] = id
        get_logger().info(f"test {id} args {engine_kwargs}")

        test = OnPolicyRunner.init_worker(OnPolicyInference, engine_args, engine_kwargs)
        if test is not None:
            OnPolicyRunner.init_process("Test", id, to_close_on_termination=test)
            test.process_checkpoints()  # gets checkpoints via queue

    def _initialize_start_train_or_start_test(self):
        self._is_closed = False

        if self.queues is not None:
            for k, q in self.queues.items():
                try:
                    out = q.get(timeout=1)
                    raise RuntimeError(
                        f"{k} queue was not empty before starting new training/testing (contained {out})."
                        f" This should not happen, please report how you obtained this error"
                        f" by creating an issue at https://github.com/allenai/allenact/issues."
                    )
                except queue.Empty:
                    pass

        self.queues = {
            "results": self.mp_ctx.Queue(),
            "checkpoints": self.mp_ctx.Queue(),
        }

        self._local_start_time_str = self._acquire_unique_local_start_time_string()

    def get_port(self):
        passed_port = int(self.distributed_ip_and_port.split(":")[1])
        if passed_port == 0:
            assert (
                self.machine_id == 0
            ), "Only runner with `machine_id` == 0 can search for a free port."
            distributed_port = find_free_port(
                self.distributed_ip_and_port.split(":")[0]
            )
        else:
            distributed_port = passed_port

        get_logger().info(
            f"Engines on machine_id == {self.machine_id} using port {distributed_port} and seed {self.seed}"
        )

        return distributed_port

    def start_train(
        self,
        checkpoint: Optional[str] = None,
        restart_pipeline: bool = False,
        max_sampler_processes_per_worker: Optional[int] = None,
        save_ckpt_after_every_pipeline_stage: bool = True,
        collect_valid_results: bool = False,
        valid_on_initial_weights: bool = False,
        try_restart_after_task_error: bool = False,
        save_ckpt_at_every_host: bool = False,
    ):
        self._initialize_start_train_or_start_test()

        self._collect_valid_results = collect_valid_results

        if not self.disable_config_saving:
            self.save_project_state()

        devices = self.worker_devices(TRAIN_MODE_STR)
        num_workers = len(devices)

        # Be extra careful to ensure that all models start
        # with the same initializations.
        set_seed(self.seed)
        initial_model_state_dict = self.config.create_model(
            sensor_preprocessor_graph=MachineParams.instance_from(
                self.config.machine_params(self.mode)
            ).sensor_preprocessor_graph
        ).state_dict()

        distributed_port = 0 if num_workers == 1 else self.get_port()

        if (
            num_workers > 1
            and "NCCL_ASYNC_ERROR_HANDLING" not in os.environ
            and "NCCL_BLOCKING_WAIT" not in os.environ
        ):
            # This ensures the NCCL distributed backend will throw errors
            # if we timeout at a call to `barrier()`
            os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "1"

        worker_ids = self.local_worker_ids(TRAIN_MODE_STR)

        if checkpoint is not None:
            if checkpoint[:8] == "wandb://":
                ckpt_dir = "/tmp/wandb_ckpts"
                os.makedirs(ckpt_dir, exist_ok=True)
                checkpoint = download_checkpoint_from_wandb(
                    checkpoint, ckpt_dir, only_allow_one_ckpt=True
                )

        model_hash = None
        for trainer_id in worker_ids:
            training_kwargs = dict(
                id=trainer_id,
                checkpoint=checkpoint,
                restart_pipeline=restart_pipeline,
                experiment_name=self.experiment_name,
                config=self.config,
                callback_sensors=self._get_callback_sensors,
                results_queue=self.queues["results"],
                checkpoints_queue=(
                    self.queues["checkpoints"] if self.running_validation else None
                ),
                checkpoints_dir=self.checkpoint_dir(),
                seed=self.seed,
                deterministic_cudnn=self.deterministic_cudnn,
                mp_ctx=self.mp_ctx,
                num_workers=num_workers,
                device=devices[trainer_id],
                distributed_ip=self.distributed_ip_and_port.split(":")[0],
                distributed_port=distributed_port,
                max_sampler_processes_per_worker=max_sampler_processes_per_worker,
                save_ckpt_after_every_pipeline_stage=save_ckpt_after_every_pipeline_stage,
                initial_model_state_dict=(
                    initial_model_state_dict if model_hash is None else model_hash
                ),
                first_local_worker_id=worker_ids[0],
                distributed_preemption_threshold=self.distributed_preemption_threshold,
                valid_on_initial_weights=valid_on_initial_weights,
                try_restart_after_task_error=try_restart_after_task_error,
                save_ckpt_at_every_host=save_ckpt_at_every_host,
            )
            train: BaseProcess = self.mp_ctx.Process(
                target=self.train_loop,
                kwargs=training_kwargs,
            )
            try:
                train.start()
            except (ValueError, OSError, ConnectionRefusedError, EOFError) as e:
                # If the `initial_model_state_dict` is too large we sometimes
                # run into errors passing it with multiprocessing. In such cases
                # we instead hash the state_dict and confirm, in each engine worker, that
                # this hash equals the model the engine worker instantiates.
                if (
                    (isinstance(e, ValueError) and e.args[0] == "too many fds")
                    or (isinstance(e, OSError) and e.errno == 22)
                    or (isinstance(e, ConnectionRefusedError) and e.errno == 111)
                    or isinstance(e, EOFError)
                ):
                    model_hash = md5_hash_of_state_dict(initial_model_state_dict)
                    training_kwargs["initial_model_state_dict"] = model_hash
                    train = self.mp_ctx.Process(
                        target=self.train_loop,
                        kwargs=training_kwargs,
                    )
                    train.start()
                else:
                    raise e

            self.processes[TRAIN_MODE_STR].append(train)

        get_logger().info(
            f"Started {len(self.processes[TRAIN_MODE_STR])} train processes"
        )

        # Validation
        if self.running_validation:
            device = self.worker_devices(VALID_MODE_STR)[0]
            self.init_visualizer(VALID_MODE_STR)
            valid: BaseProcess = self.mp_ctx.Process(
                target=self.valid_loop,
                args=(0,),
                kwargs=dict(
                    config=self.config,
                    callback_sensors=self._get_callback_sensors,
                    results_queue=self.queues["results"],
                    checkpoints_queue=self.queues["checkpoints"],
                    seed=12345,  # TODO allow same order for randomly sampled tasks? Is this any useful anyway?
                    deterministic_cudnn=self.deterministic_cudnn,
                    deterministic_agents=self.deterministic_agents,
                    mp_ctx=self.mp_ctx,
                    device=device,
                    max_sampler_processes_per_worker=max_sampler_processes_per_worker,
                ),
            )
            valid.start()
            self.processes[VALID_MODE_STR].append(valid)

            get_logger().info(
                f"Started {len(self.processes[VALID_MODE_STR])} valid processes"
            )
        else:
            get_logger().info(
                "No processes allocated to validation, no validation will be run."
            )

        metrics_file_template: Optional[str] = None

        if self._collect_valid_results:
            metrics_dir = self.metric_path(self.local_start_time_str)
            os.makedirs(metrics_dir, exist_ok=True)
            suffix = f"__valid_{self.local_start_time_str}"
            metrics_file_template = os.path.join(
                metrics_dir, "metrics" + suffix + "{:012d}.json"
            )  # template for training steps

            get_logger().info(
                f"Saving valid metrics with template {metrics_file_template}"
            )

            # Check output file can be written
            with open(metrics_file_template.format(0), "w") as f:
                json.dump([], f, indent=4, sort_keys=True, cls=NumpyJSONEncoder)

        valid_results = self.log_and_close(
            start_time_str=self.local_start_time_str,
            nworkers=len(worker_ids),  # TODO num_workers once we forward metrics,
            metrics_file=metrics_file_template,
        )

        if not self._collect_valid_results:
            return self.local_start_time_str
        else:
            return self.local_start_time_str, valid_results

    def start_test(
        self,
        checkpoint_path_dir_or_pattern: str,
        infer_output_dir: bool = False,
        approx_ckpt_step_interval: Optional[Union[float, int]] = None,
        max_sampler_processes_per_worker: Optional[int] = None,
        inference_expert: bool = False,
    ) -> List[Dict]:
        # Tester always runs on a single machine
        assert (
            self.machine_id == 0
        ), f"Received `machine_id={self.machine_id} for test. Only one machine supported."
        assert isinstance(
            checkpoint_path_dir_or_pattern, str
        ), "Must provide a --checkpoint path or pattern to test on."

        self.extra_tag += (
            "__" * (len(self.extra_tag) > 0) + "enforced_test_expert"
        ) * inference_expert
        self._initialize_start_train_or_start_test()

        devices = self.worker_devices(TEST_MODE_STR)
        self.init_visualizer(TEST_MODE_STR)
        num_testers = len(devices)

        distributed_port = 0
        if num_testers > 1:
            distributed_port = find_free_port()

        # Tester always runs on a single machine
        for tester_it in range(num_testers):
            test: BaseProcess = self.mp_ctx.Process(
                target=self.test_loop,
                args=(tester_it,),
                kwargs=dict(
                    config=self.config,
                    callback_sensors=self._get_callback_sensors,
                    results_queue=self.queues["results"],
                    checkpoints_queue=self.queues["checkpoints"],
                    seed=12345,  # TODO allow same order for randomly sampled tasks? Is this any useful anyway?
                    deterministic_cudnn=self.deterministic_cudnn,
                    deterministic_agents=self.deterministic_agents,
                    mp_ctx=self.mp_ctx,
                    num_workers=num_testers,
                    device=devices[tester_it],
                    max_sampler_processes_per_worker=max_sampler_processes_per_worker,
                    distributed_port=distributed_port,
                    enforce_expert=inference_expert,
                ),
            )

            test.start()
            self.processes[TEST_MODE_STR].append(test)

        get_logger().info(
            f"Started {len(self.processes[TEST_MODE_STR])} test processes"
        )

        checkpoint_paths = self.get_checkpoint_files(
            checkpoint_path_dir_or_pattern=checkpoint_path_dir_or_pattern,
            approx_ckpt_step_interval=approx_ckpt_step_interval,
        )
        steps = [self.step_from_checkpoint(cp) for cp in checkpoint_paths]

        get_logger().info(f"Running test on {len(steps)} steps {steps}")

        for checkpoint_path in checkpoint_paths:
            # Make all testers work on each checkpoint
            for tester_it in range(num_testers):
                self.queues["checkpoints"].put(("eval", checkpoint_path))

        # Signal all testers to terminate cleanly
        for _ in range(num_testers):
            self.queues["checkpoints"].put(("quit", None))

        if self.save_dir_fmt == SaveDirFormat.NESTED:
            if infer_output_dir:  # NOTE: we change output_dir here
                self.output_dir = self.checkpoint_log_folder_str(checkpoint_paths[0])
            suffix = ""
        elif self.save_dir_fmt == SaveDirFormat.FLAT:
            suffix = f"__test_{self.local_start_time_str}"
        else:
            raise NotImplementedError
        metrics_dir = self.metric_path(self.local_start_time_str)
        os.makedirs(metrics_dir, exist_ok=True)
        metrics_file_path = os.path.join(metrics_dir, "metrics" + suffix + ".json")

        get_logger().info(f"Saving test metrics in {metrics_file_path}")

        # Check output file can be written
        with open(metrics_file_path, "w") as f:
            json.dump([], f, indent=4, sort_keys=True, cls=NumpyJSONEncoder)

        return self.log_and_close(
            start_time_str=self.checkpoint_start_time_str(checkpoint_paths[0]),
            nworkers=num_testers,
            test_steps=steps,
            metrics_file=metrics_file_path,
        )

    @staticmethod
    def checkpoint_start_time_str(checkpoint_file_name):
        parts = checkpoint_file_name.split(os.path.sep)
        assert len(parts) > 1, f"{checkpoint_file_name} is not a valid checkpoint path"
        start_time_str = parts[-2]
        get_logger().info(f"Using checkpoint start time {start_time_str}")
        return start_time_str

    @staticmethod
    def checkpoint_log_folder_str(checkpoint_file_name):
        parts = checkpoint_file_name.split(os.path.sep)
        assert len(parts) > 1, f"{checkpoint_file_name} is not a valid checkpoint path"
        log_folder_str = os.path.sep.join(parts[:-2])  # remove checkpoints/*.pt
        get_logger().info(f"Using log folder {log_folder_str}")
        return log_folder_str

    @property
    def experiment_name(self):
        if len(self.extra_tag) > 0:
            return f"{self.config.tag()}_{self.extra_tag}"
        return self.config.tag()

    def checkpoint_dir(
        self, start_time_str: Optional[str] = None, create_if_none: bool = True
    ):
        path_parts = [
            (
                self.config.tag()
                if self.extra_tag == ""
                else os.path.join(self.config.tag(), self.extra_tag)
            ),
            start_time_str or self.local_start_time_str,
        ]
        if self.save_dir_fmt == SaveDirFormat.NESTED:
            folder = os.path.join(
                self.output_dir,
                *path_parts,
                "checkpoints",
            )
        elif self.save_dir_fmt == SaveDirFormat.FLAT:
            folder = os.path.join(
                self.output_dir,
                "checkpoints",
                *path_parts,
            )
        else:
            raise NotImplementedError
        if create_if_none:
            os.makedirs(folder, exist_ok=True)
        return folder

    def log_writer_path(self, start_time_str: str) -> str:
        if self.save_dir_fmt == SaveDirFormat.NESTED:
            if self.mode == TEST_MODE_STR:
                return os.path.join(
                    self.output_dir,
                    "test",
                    self.config.tag(),
                    self.local_start_time_str,
                )
            path = os.path.join(
                self.output_dir,
                (
                    self.config.tag()
                    if self.extra_tag == ""
                    else os.path.join(self.config.tag(), self.extra_tag)
                ),
                start_time_str,
                "train_tb",
            )
            return path
        elif self.save_dir_fmt == SaveDirFormat.FLAT:
            path = os.path.join(
                self.output_dir,
                "tb",
                (
                    self.config.tag()
                    if self.extra_tag == ""
                    else os.path.join(self.config.tag(), self.extra_tag)
                ),
                start_time_str,
            )
            if self.mode == TEST_MODE_STR:
                path = os.path.join(path, "test", self.local_start_time_str)
            return path
        else:
            raise NotImplementedError

    def metric_path(self, start_time_str: str) -> str:
        if self.save_dir_fmt == SaveDirFormat.NESTED:
            return os.path.join(
                self.output_dir,
                "test",
                self.config.tag(),
                start_time_str,
            )
        elif self.save_dir_fmt == SaveDirFormat.FLAT:
            return os.path.join(
                self.output_dir,
                "metrics",
                (
                    self.config.tag()
                    if self.extra_tag == ""
                    else os.path.join(self.config.tag(), self.extra_tag)
                ),
                start_time_str,
            )
        else:
            raise NotImplementedError

    def save_project_state(self):
        path_parts = [
            (
                self.config.tag()
                if self.extra_tag == ""
                else os.path.join(self.config.tag(), self.extra_tag)
            ),
            self.local_start_time_str,
        ]
        if self.save_dir_fmt == SaveDirFormat.NESTED:
            base_dir = os.path.join(
                self.output_dir,
                *path_parts,
                "used_configs",
            )
        elif self.save_dir_fmt == SaveDirFormat.FLAT:
            base_dir = os.path.join(
                self.output_dir,
                "used_configs",
                *path_parts,
            )
        else:
            raise NotImplementedError
        os.makedirs(base_dir, exist_ok=True)

        # Saving current git diff
        try:
            sha, diff_str = get_git_diff_of_project()
            with open(os.path.join(base_dir, f"{sha}.patch"), "w") as f:
                f.write(diff_str)

            get_logger().info(f"Git diff saved to {base_dir}")
        except subprocess.CalledProcessError:
            get_logger().warning(
                "Failed to get a git diff of the current project."
                f" Is it possible that {os.getcwd()} is not under version control?"
            )

        # Saving configs
        if self.loaded_config_src_files is not None:
            for src_path in self.loaded_config_src_files:
                if src_path == CONFIG_KWARGS_STR:
                    # We also save key-word arguments passed to the experiment
                    # initializer.
                    save_path = os.path.join(base_dir, "config_kwargs.json")
                    assert not os.path.exists(
                        save_path
                    ), f"{save_path} should not already exist."
                    with open(save_path, "w") as f:
                        json.dump(json.loads(self.loaded_config_src_files[src_path]), f)
                    continue

                assert os.path.isfile(src_path), f"Config file {src_path} not found"
                src_path = os.path.abspath(src_path)

                # To prevent overwriting files with the same name, we loop
                # here until we find a prefix (if necessary) to prevent
                # name collisions.
                k = -1
                while True:
                    prefix = "" if k == -1 else f"namecollision{k}__"
                    k += 1
                    dst_path = os.path.join(
                        base_dir,
                        f"{prefix}{os.path.basename(src_path)}",
                    )
                    if not os.path.exists(dst_path):
                        os.makedirs(os.path.dirname(dst_path), exist_ok=True)
                        with open(src_path, "r") as f:
                            file_contents = f.read()
                        with open(dst_path, "w") as f:
                            f.write(
                                f"### THIS FILE ORIGINALLY LOCATED AT '{src_path}'\n\n{file_contents}"
                            )
                        break

        get_logger().info(f"Config files saved to {base_dir}")
        for callback in self.callbacks:
            callback.after_save_project_state(base_dir=base_dir)

    def _update_keys(
        self,
        d: Union[Dict[str, Any], str],
        tag_if_not_a_loss: str,
        mode: str,
        stage_component_uuid: Optional[str] = None,
    ) -> Union[Dict[str, Any], str]:
        midfix = "-" if stage_component_uuid is None else f"-{stage_component_uuid}-"

        def _convert(key: str):
            if key.startswith("losses/"):
                return f"{mode}{midfix}{key}"
            else:
                return f"{mode}{midfix}{tag_if_not_a_loss}/{key}"

        if isinstance(d, str):
            return _convert(d)
        return {_convert(k): v for k, v in d.items()}

    def _process_logging_packages(
        self,
        log_writer: Optional[SummaryWriter],
        pkgs: Union[LoggingPackage, List[LoggingPackage]],
        last_steps: Optional[int],
        last_storage_uuid_to_total_experiences: Optional[Dict[str, int]],
        last_time: Optional[float],
        all_results: Optional[List[Any]] = None,
    ):
        mode = pkgs[0].mode
        assert all(
            pkg.mode == mode for pkg in pkgs
        ), "All logging packages must be the same mode."
        assert mode == self.mode or (
            mode == VALID_MODE_STR and self.mode == TRAIN_MODE_STR
        ), (
            "Logging package mode must match the logger mode except when training where the logging package may"
            "be of mode 'valid'."
        )
        training = mode == TRAIN_MODE_STR  # Are we logging training packages

        current_time = time.time()

        training_steps = pkgs[0].training_steps
        storage_uuid_to_total_experiences = pkgs[0].storage_uuid_to_total_experiences
        callback_metric_means = dict()

        def update_keys_misc(
            key_or_dict: Union[str, Dict[str, Any]],
            stage_component_uuid: Optional[str] = None,
        ):
            # Important to use mode and not self.mode here
            return self._update_keys(
                d=key_or_dict,
                tag_if_not_a_loss="misc",
                mode=mode,
                stage_component_uuid=stage_component_uuid,
            )

        def update_keys_metric(
            key_or_dict: Union[str, Dict[str, Any]],
            stage_component_uuid: Optional[str] = None,
        ):
            # Important to use mode and not self.mode here
            return self._update_keys(
                d=key_or_dict,
                tag_if_not_a_loss="metrics",
                mode=mode,
                stage_component_uuid=stage_component_uuid,
            )

        if training and log_writer is not None:
            log_writer.add_scalar(
                tag=update_keys_misc("pipeline_stage"),
                scalar_value=pkgs[0].pipeline_stage,
                global_step=training_steps,
            )
        callback_metric_means[update_keys_misc("pipeline_stage")] = pkgs[
            0
        ].pipeline_stage

        storage_uuid_to_total_experiences_key = {}
        for storage_uuid, val in storage_uuid_to_total_experiences.items():
            total_experiences_key = update_keys_misc(
                f"{storage_uuid}_total_experiences"
            )
            storage_uuid_to_total_experiences_key[storage_uuid] = total_experiences_key

            if training and log_writer is not None:
                log_writer.add_scalar(
                    tag=total_experiences_key,
                    scalar_value=val,
                    global_step=training_steps,
                )
            callback_metric_means[total_experiences_key] = val

        metrics_and_info_tracker = ScalarMeanTracker()
        scalar_name_to_total_storage_experience = {}
        scalar_name_to_total_experiences_key = {}
        storage_uuid_to_stage_component_uuids = defaultdict(lambda: set())
        metric_dicts_list, render, checkpoint_file_name = [], {}, []
        tasks_callback_data = []

        for pkg in pkgs:
            metrics_and_info_tracker.add_scalars(
                scalars=update_keys_metric(pkg.metrics_tracker.means()),
                n=update_keys_metric(pkg.metrics_tracker.counts()),
            )
            tasks_callback_data.extend(pkg.task_callback_data)
            metric_dicts_list.extend(pkg.metric_dicts)
            if pkg.viz_data is not None:
                render.update(pkg.viz_data)
            checkpoint_file_name.append(pkg.checkpoint_file_name)

            for (
                (stage_component_uuid, storage_uuid),
                info_tracker,
            ) in pkg.info_trackers.items():

                if stage_component_uuid is not None:
                    storage_uuid_to_stage_component_uuids[storage_uuid].add(
                        stage_component_uuid
                    )

                info_means = update_keys_misc(
                    info_tracker.means(),
                    stage_component_uuid,
                )
                info_counts = update_keys_misc(
                    info_tracker.counts(),
                    stage_component_uuid,
                )
                metrics_and_info_tracker.add_scalars(
                    scalars=info_means,
                    n=info_counts,
                )

                total_exp_for_storage = pkg.storage_uuid_to_total_experiences[
                    storage_uuid
                ]

                if stage_component_uuid is None:
                    assert total_exp_for_storage == training_steps

                for scalar_name in info_means:
                    if scalar_name in scalar_name_to_total_storage_experience:
                        assert (
                            total_exp_for_storage
                            == scalar_name_to_total_storage_experience[scalar_name]
                        ), (
                            f"For metric {scalar_name}: there is disagreement between the training steps parameter"
                            f" across different workers ({total_exp_for_storage} !="
                            f" {scalar_name_to_total_storage_experience[scalar_name]}). This suggests an error in "
                            f" AllenAct, please report this issue at https://github.com/allenai/allenact/issues."
                        )
                    else:
                        scalar_name_to_total_storage_experience[scalar_name] = (
                            total_exp_for_storage
                        )
                        scalar_name_to_total_experiences_key[scalar_name] = (
                            storage_uuid_to_total_experiences_key[storage_uuid]
                        )

        if any(checkpoint_file_name):
            ckpt_to_store = None
            for ckpt in checkpoint_file_name:
                if ckpt is not None:
                    ckpt_to_store = ckpt
            assert ckpt_to_store is not None
            checkpoint_file_name = [ckpt_to_store]
        # assert all_equal(
        #     checkpoint_file_name
        # ), f"All {mode} logging packages must have the same checkpoint_file_name."

        message = [
            f"{mode.upper()}: {training_steps} rollout steps ({pkgs[0].storage_uuid_to_total_experiences})"
        ]
        metrics_and_info_means = metrics_and_info_tracker.means()
        callback_metric_means.update(metrics_and_info_means)

        for k in sorted(
            metrics_and_info_means.keys(),
            key=lambda mean_key: (mean_key.count("/"), mean_key),
        ):
            if log_writer is not None:
                log_writer.add_scalar(
                    tag=k,
                    scalar_value=metrics_and_info_means[k],
                    global_step=scalar_name_to_total_storage_experience.get(
                        k, training_steps
                    ),
                )
            short_key = (
                "/".join(k.split("/")[1:])
                if k.startswith(f"{mode}-") and "/" in k
                else k
            )
            message.append(f"{short_key} {metrics_and_info_means[k]:.3g}")

        if training:
            # Log information about FPS and EPS (experiences per second, for non-rollout storage).
            # Not needed during testing or validation.
            message += [f"elapsed_time {(current_time - last_time):.3g}s"]

            if last_steps > 0:
                fps = (training_steps - last_steps) / (current_time - last_time)
                message += [f"approx_fps {fps:.3g}"]
                approx_fps_key = update_keys_misc("approx_fps")
                if log_writer is not None:
                    log_writer.add_scalar(approx_fps_key, fps, training_steps)
                callback_metric_means[approx_fps_key] = fps

            for (
                storage_uuid,
                last_total_exp,
            ) in last_storage_uuid_to_total_experiences.items():
                if storage_uuid in storage_uuid_to_total_experiences:
                    cur_total_exp = storage_uuid_to_total_experiences[storage_uuid]
                    eps = (cur_total_exp - last_total_exp) / (current_time - last_time)
                    message += [f"{storage_uuid}/approx_eps {eps:.3g}"]
                    for stage_component_uuid in storage_uuid_to_stage_component_uuids[
                        storage_uuid
                    ]:
                        approx_eps_key = update_keys_misc(
                            f"approx_eps",
                            stage_component_uuid,
                        )
                        callback_metric_means[approx_eps_key] = eps
                        scalar_name_to_total_experiences_key[approx_eps_key] = (
                            storage_uuid_to_total_experiences_key[storage_uuid]
                        )

                        if log_writer is not None:
                            log_writer.add_scalar(
                                approx_eps_key,
                                eps,
                                cur_total_exp,
                            )

        metrics_and_info_means_with_metrics_dicts_list = copy.deepcopy(
            metrics_and_info_means
        )
        metrics_and_info_means_with_metrics_dicts_list.update(
            {"training_steps": training_steps, "tasks": metric_dicts_list}
        )
        if all_results is not None:
            all_results.append(metrics_and_info_means_with_metrics_dicts_list)

        num_tasks = sum([pkg.num_non_empty_metrics_dicts_added for pkg in pkgs])
        num_tasks_completed_key = update_keys_misc("num_tasks_completed_since_last_log")
        if log_writer is not None:
            log_writer.add_scalar(num_tasks_completed_key, num_tasks, training_steps)
        callback_metric_means[num_tasks_completed_key] = num_tasks

        message.append(f"new_tasks_completed {num_tasks}")
        if not training:
            message.append(f"checkpoint {checkpoint_file_name[0]}")

        get_logger().info(" ".join(message))

        for callback in self.callbacks:
            if mode == TRAIN_MODE_STR:
                callback.on_train_log(
                    metrics=metric_dicts_list,
                    metric_means=callback_metric_means,
                    step=training_steps,
                    checkpoint_file_name=checkpoint_file_name[0],
                    tasks_data=tasks_callback_data,
                    scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,
                )

            if mode == VALID_MODE_STR:
                callback.on_valid_log(
                    metrics=metrics_and_info_means_with_metrics_dicts_list,
                    metric_means=callback_metric_means,
                    step=training_steps,
                    checkpoint_file_name=checkpoint_file_name[0],
                    tasks_data=tasks_callback_data,
                    scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,
                )

            if mode == TEST_MODE_STR:
                callback.on_test_log(
                    metrics=metrics_and_info_means_with_metrics_dicts_list,
                    metric_means=callback_metric_means,
                    step=training_steps,
                    checkpoint_file_name=checkpoint_file_name[0],
                    tasks_data=tasks_callback_data,
                    scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,
                )

        if self.visualizer is not None:
            self.visualizer.log(
                log_writer=log_writer,
                task_outputs=metric_dicts_list,
                render=render,
                num_steps=training_steps,
            )

        return training_steps, storage_uuid_to_total_experiences, current_time

    def process_valid_package(
        self,
        log_writer: Optional[SummaryWriter],
        pkg: LoggingPackage,
        all_results: Optional[List[Any]] = None,
    ):
        return self._process_logging_packages(
            log_writer=log_writer,
            pkgs=[pkg],
            last_steps=None,
            last_storage_uuid_to_total_experiences=None,
            last_time=None,
            all_results=all_results,
        )

    def process_train_packages(
        self,
        log_writer: Optional[SummaryWriter],
        pkgs: List[LoggingPackage],
        last_steps: int,
        last_storage_uuid_to_total_experiences: Dict[str, int],
        last_time: float,
    ):
        return self._process_logging_packages(
            log_writer=log_writer,
            pkgs=pkgs,
            last_steps=last_steps,
            last_storage_uuid_to_total_experiences=last_storage_uuid_to_total_experiences,
            last_time=last_time,
        )

    def process_test_packages(
        self,
        log_writer: Optional[SummaryWriter],
        pkgs: List[LoggingPackage],
        all_results: Optional[List[Any]] = None,
    ):
        return self._process_logging_packages(
            log_writer=log_writer,
            pkgs=pkgs,
            last_steps=None,
            last_storage_uuid_to_total_experiences=None,
            last_time=None,
            all_results=all_results,
        )

    def log_and_close(
        self,
        start_time_str: str,
        nworkers: int,
        test_steps: Sequence[int] = (),
        metrics_file: Optional[str] = None,
    ) -> List[Dict]:
        ptitle(f"AllenAct-Logging-{self.local_start_time_str}")
        finalized = False

        log_writer: Optional[SummaryWriter] = None
        if not self.disable_tensorboard:
            log_writer = SummaryWriter(
                log_dir=self.log_writer_path(start_time_str),
                filename_suffix=f"__{self.mode}_{self.local_start_time_str}",
            )

        # To aggregate/buffer metrics from trainers/testers
        collected: List[LoggingPackage] = []
        last_train_steps = 0
        last_storage_uuid_to_total_experiences = {}
        last_train_time = time.time()
        # test_steps = sorted(test_steps, reverse=True)
        eval_results: List[Dict] = []
        unfinished_workers = nworkers

        try:
            while True:
                try:
                    package: Union[
                        LoggingPackage, Union[Tuple[str, Any], Tuple[str, Any, Any]]
                    ] = self.queues["results"].get(timeout=1)

                    if isinstance(package, LoggingPackage):
                        pkg_mode = package.mode

                        if pkg_mode == TRAIN_MODE_STR:
                            collected.append(package)
                            if len(collected) >= nworkers:

                                collected = sorted(
                                    collected,
                                    key=lambda pkg: (
                                        pkg.training_steps,
                                        *sorted(
                                            pkg.storage_uuid_to_total_experiences.items()
                                        ),
                                    ),
                                )

                                if (
                                    collected[nworkers - 1].training_steps
                                    == collected[0].training_steps
                                    and collected[
                                        nworkers - 1
                                    ].storage_uuid_to_total_experiences
                                    == collected[0].storage_uuid_to_total_experiences
                                ):  # ensure all workers have provided the same training_steps and total_experiences
                                    (
                                        last_train_steps,
                                        last_storage_uuid_to_total_experiences,
                                        last_train_time,
                                    ) = self.process_train_packages(
                                        log_writer=log_writer,
                                        pkgs=collected[:nworkers],
                                        last_steps=last_train_steps,
                                        last_storage_uuid_to_total_experiences=last_storage_uuid_to_total_experiences,
                                        last_time=last_train_time,
                                    )
                                    collected = collected[nworkers:]
                                elif len(collected) > 2 * nworkers:
                                    get_logger().warning(
                                        f"Unable to aggregate train packages from all {nworkers} workers"
                                        f"after {len(collected)} packages collected"
                                    )
                        elif (
                            pkg_mode == VALID_MODE_STR
                        ):  # they all come from a single worker
                            if (
                                package.training_steps is not None
                            ):  # no validation samplers
                                self.process_valid_package(
                                    log_writer=log_writer,
                                    pkg=package,
                                    all_results=(
                                        eval_results
                                        if self._collect_valid_results
                                        else None
                                    ),
                                )

                                if metrics_file is not None:
                                    with open(
                                        metrics_file.format(package.training_steps), "w"
                                    ) as f:
                                        json.dump(
                                            eval_results[-1],
                                            f,
                                            indent=4,
                                            sort_keys=True,
                                            cls=NumpyJSONEncoder,
                                        )
                                        get_logger().info(
                                            "Written valid results file {}".format(
                                                metrics_file.format(
                                                    package.training_steps
                                                ),
                                            )
                                        )

                            if (
                                finalized and self.queues["checkpoints"].empty()
                            ):  # assume queue is actually empty after trainer finished and no checkpoints in queue
                                break
                        elif pkg_mode == TEST_MODE_STR:
                            collected.append(package)
                            if len(collected) >= nworkers:
                                collected = sorted(
                                    collected, key=lambda x: x.training_steps
                                )  # sort by num_steps
                                if (
                                    collected[nworkers - 1].training_steps
                                    == collected[0].training_steps
                                ):  # ensure nworkers have provided the same num_steps
                                    self.process_test_packages(
                                        log_writer=log_writer,
                                        pkgs=collected[:nworkers],
                                        all_results=eval_results,
                                    )

                                    collected = collected[nworkers:]
                                    with open(metrics_file, "w") as f:
                                        json.dump(
                                            eval_results,
                                            f,
                                            indent=4,
                                            sort_keys=True,
                                            cls=NumpyJSONEncoder,
                                        )
                                        get_logger().info(
                                            f"Updated {metrics_file} up to checkpoint"
                                            f" {test_steps[len(eval_results) - 1]}"
                                        )
                        else:
                            get_logger().error(
                                f"Runner received unknown package of type {pkg_mode}"
                            )
                    else:
                        pkg_mode = package[0]

                        if pkg_mode == "train_stopped":
                            if package[1] == 0:
                                finalized = True
                                if not self.running_validation:
                                    get_logger().info(
                                        "Terminating runner after trainer done (no validation)"
                                    )
                                    break
                            else:
                                raise Exception(
                                    f"Train worker {package[1] - 1} abnormally terminated"
                                )
                        elif pkg_mode == "valid_stopped":
                            raise Exception(
                                f"Valid worker {package[1] - 1} abnormally terminated"
                            )
                        elif pkg_mode == "test_stopped":
                            if package[1] == 0:
                                unfinished_workers -= 1
                                if unfinished_workers == 0:
                                    get_logger().info(
                                        "Last tester finished. Terminating"
                                    )
                                    finalized = True
                                    break
                            else:
                                raise RuntimeError(
                                    f"Test worker {package[1] - 1} abnormally terminated"
                                )
                        else:
                            get_logger().error(
                                f"Runner received invalid package tuple {package}"
                            )
                except queue.Empty as _:
                    if all(
                        p.exitcode is not None
                        for p in itertools.chain(*self.processes.values())
                    ):
                        break
        except KeyboardInterrupt:
            get_logger().info("KeyboardInterrupt. Terminating runner.")
        except Exception:
            get_logger().error("Encountered Exception. Terminating runner.")
            get_logger().exception(traceback.format_exc())
        finally:
            if finalized:
                get_logger().info("Done")
            if log_writer is not None:
                log_writer.close()
            self.close()
            return eval_results

    def get_checkpoint_files(
        self,
        checkpoint_path_dir_or_pattern: str,
        approx_ckpt_step_interval: Optional[int] = None,
    ):
        if "wandb://" == checkpoint_path_dir_or_pattern[:8]:
            eval_dir = "/tmp/wandb_ckpts_to_eval/{}".format(self.local_start_time_str)
            os.makedirs(eval_dir, exist_ok=True)
            return download_checkpoint_from_wandb(
                checkpoint_path_dir_or_pattern, eval_dir, only_allow_one_ckpt=False
            )

        if os.path.isdir(checkpoint_path_dir_or_pattern):
            # The fragment is a path to a directory, lets use this directory
            # as the base dir to search for checkpoints
            checkpoint_path_dir_or_pattern = os.path.join(
                checkpoint_path_dir_or_pattern, "*.pt"
            )

        ckpt_paths = glob.glob(checkpoint_path_dir_or_pattern, recursive=True)

        if len(ckpt_paths) == 0:
            raise FileNotFoundError(
                f"Could not find any checkpoints at {os.path.abspath(checkpoint_path_dir_or_pattern)}, is it possible"
                f" the path has been mispecified?"
            )

        step_count_ckpt_pairs = [(self.step_from_checkpoint(p), p) for p in ckpt_paths]
        step_count_ckpt_pairs.sort()
        ckpts_paths = [p for _, p in step_count_ckpt_pairs]
        step_counts = np.array([sc for sc, _ in step_count_ckpt_pairs])

        if approx_ckpt_step_interval is not None:
            assert (
                approx_ckpt_step_interval > 0
            ), "`approx_ckpt_step_interval` must be >0"
            inds_to_eval = set()
            for i in range(
                math.ceil(step_count_ckpt_pairs[-1][0] / approx_ckpt_step_interval) + 1
            ):
                inds_to_eval.add(
                    int(np.argmin(np.abs(step_counts - i * approx_ckpt_step_interval)))
                )

            ckpts_paths = [ckpts_paths[ind] for ind in sorted(list(inds_to_eval))]
        return ckpts_paths

    @staticmethod
    def step_from_checkpoint(ckpt_path: str) -> int:
        parts = os.path.basename(ckpt_path).split("__")
        for part in parts:
            if "steps_" in part:
                possible_num = part.split("_")[-1].split(".")[0]
                if possible_num.isdigit():
                    return int(possible_num)

        get_logger().warning(
            f"The checkpoint {os.path.basename(ckpt_path)} does not follow the checkpoint naming convention"
            f" used by AllenAct. As a fall back we must load the checkpoint into memory to find the"
            f" training step count, this may increase startup time if the checkpoints are large or many"
            f" must be loaded in sequence."
        )
        ckpt = torch.load(ckpt_path, map_location="cpu")
        return ckpt["total_steps"]

    def close(self, verbose=True):
        if self._is_closed:
            return

        def logif(s: Union[str, Exception]):
            if verbose:
                if isinstance(s, str):
                    get_logger().info(s)
                elif isinstance(s, Exception):
                    get_logger().exception(traceback.format_exc())
                else:
                    raise NotImplementedError()

        # First send termination signals
        for process_type in self.processes:
            for it, process in enumerate(self.processes[process_type]):
                if process.is_alive():
                    logif(f"Terminating {process_type} {it}")
                    process.terminate()

        # Now join processes
        for process_type in self.processes:
            for it, process in enumerate(self.processes[process_type]):
                try:
                    logif(f"Joining {process_type} {it}")
                    process.join(1)
                    logif(f"Closed {process_type} {it}")
                except Exception as e:
                    logif(f"Exception raised when closing {process_type} {it}")
                    logif(e)

        self.processes.clear()
        self._is_closed = True

    def __del__(self):
        self.close(verbose=True)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close(verbose=True)


================================================
FILE: allenact/algorithms/onpolicy_sync/storage.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import abc
import random
from typing import (
    Union,
    List,
    Dict,
    Tuple,
    Sequence,
    cast,
    Optional,
    Callable,
    Any,
    Generator,
)

import gym
import numpy as np
import torch

import allenact.utils.spaces_utils as su
from allenact.algorithms.onpolicy_sync.policy import (
    FullMemorySpecType,
    ObservationType,
    ActionType,
)
from allenact.base_abstractions.misc import Memory
from allenact.utils.system import get_logger


class ExperienceStorage(abc.ABC):
    @abc.abstractmethod
    def initialize(self, *, observations: ObservationType, **kwargs):
        raise NotImplementedError

    @abc.abstractmethod
    def add(
        self,
        observations: ObservationType,
        memory: Optional[Memory],
        actions: torch.Tensor,
        action_log_probs: torch.Tensor,
        value_preds: torch.Tensor,
        rewards: torch.Tensor,
        masks: torch.Tensor,
    ):
        """
        # Parameters
        observations : Observations after taking `actions`
        memory: Memory after having observed the last set of observations.
        actions: Actions taken to reach the current state, i.e. taking these actions has led to a new state with
            new `observations`.
        action_log_probs : Log probs of `actions`
        value_preds : Value predictions corresponding to the last observations
            (i.e. the states before taking `actions`).
        rewards : Rewards from taking `actions` in the last set of states.
        masks : Masks corresponding to the current states, having 0 entries where `observations` correspond to
            observations from the beginning of a new episode.
        """
        raise NotImplementedError

    def before_updates(self, **kwargs):
        pass

    def after_updates(self, **kwargs) -> int:
        pass

    @abc.abstractmethod
    def to(self, device: torch.device):
        pass

    @abc.abstractmethod
    def set_partition(self, index: int, num_parts: int):
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def total_experiences(self) -> int:
        raise NotImplementedError


class RolloutStorage(ExperienceStorage, abc.ABC):
    # noinspection PyMethodOverriding
    @abc.abstractmethod
    def initialize(
        self,
        *,
        observations: ObservationType,
        num_samplers: int,
        recurrent_memory_specification: FullMemorySpecType,
        action_space: gym.Space,
        **kwargs,
    ):
        raise NotImplementedError

    @abc.abstractmethod
    def agent_input_for_next_step(self) -> Dict[str, Any]:
        raise NotImplementedError

    @abc.abstractmethod
    def sampler_select(self, keep_list: Sequence[int]):
        raise NotImplementedError


class StreamingStorageMixin(abc.ABC):
    @abc.abstractmethod
    def next_batch(self) -> Dict[str, Any]:
        raise NotImplementedError

    def reset_stream(self):
        raise NotImplementedError

    @abc.abstractmethod
    def empty(self) -> bool:
        raise NotImplementedError


class MiniBatchStorageMixin(abc.ABC):
    @abc.abstractmethod
    def batched_experience_generator(
        self,
        num_mini_batch: int,
    ) -> Generator[Dict[str, Any], None, None]:
        raise NotImplementedError


class RolloutBlockStorage(RolloutStorage, MiniBatchStorageMixin):
    """Class for storing rollout information for RL trainers."""

    FLATTEN_SEPARATOR: str = "._AUTOFLATTEN_."

    def __init__(self, init_size: int = 50):
        self.full_size = init_size

        self.flattened_to_unflattened: Dict[str, Dict[str, List[str]]] = {
            "memory": dict(),
            "observations": dict(),
        }
        self.unflattened_to_flattened: Dict[str, Dict[Tuple[str, ...], str]] = {
            "memory": dict(),
            "observations": dict(),
        }

        self.dim_names = ["step", "sampler", None]

        self.memory_specification: Optional[FullMemorySpecType] = None
        self.action_space: Optional[gym.Space] = None
        self.memory_first_last: Optional[Memory] = None
        self._observations_full: Memory = Memory()

        self._value_preds_full: Optional[torch.Tensor] = None
        self._returns_full: Optional[torch.Tensor] = None
        self._rewards_full: Optional[torch.Tensor] = None
        self._action_log_probs_full: Optional[torch.Tensor] = None

        self.step = 0
        self._total_steps = 0
        self._before_update_called = False
        self.device = torch.device("cpu")

        # self._advantages and self._normalized_advantages are only computed
        # when `before_updates` is called
        self._advantages: Optional[torch.Tensor] = None
        self._normalized_advantages: Optional[torch.Tensor] = None

        self._masks_full: Optional[torch.Tensor] = None
        self._actions_full: Optional[torch.Tensor] = None
        self._prev_actions_full: Optional[torch.Tensor] = None

    def initialize(
        self,
        *,
        observations: ObservationType,
        num_samplers: int,
        recurrent_memory_specification: FullMemorySpecType,
        action_space: gym.Space,
        **kwargs,
    ):
        if self.memory_specification is None:
            self.memory_specification = recurrent_memory_specification or {}
            self.action_space = action_space

            self.memory_first_last: Memory = self.create_memory(
                spec=self.memory_specification,
                num_samplers=num_samplers,
            ).to(self.device)
            for key in self.memory_specification:
                self.flattened_to_unflattened["memory"][key] = [key]
                self.unflattened_to_flattened["memory"][(key,)] = key

            self._masks_full = torch.zeros(
                self.full_size + 1, num_samplers, 1, device=self.device
            )
            action_flat_dim = su.flatdim(self.action_space)
            self._actions_full = torch.zeros(
                self.full_size, num_samplers, action_flat_dim, device=self.device
            )
            self._prev_actions_full = torch.zeros(
                self.full_size + 1, num_samplers, action_flat_dim, device=self.device
            )

        assert self.step == 0, "Must call `after_updates` before calling `initialize`"
        self.insert_observations(observations=observations, time_step=0)
        self.prev_actions[0].zero_()  # Have to zero previous actions
        self.masks[0].zero_()  # Have to zero masks

    @property
    def total_experiences(self) -> int:
        return self._total_steps

    @total_experiences.setter
    def total_experiences(self, value: int):
        self._total_steps = value

    def set_partition(self, index: int, num_parts: int):
        pass

    @property
    def value_preds(self) -> torch.Tensor:
        return self._value_preds_full[: self.step + 1]

    @property
    def rewards(self) -> torch.Tensor:
        return self._rewards_full[: self.step]

    @property
    def returns(self) -> torch.Tensor:
        return self._returns_full[: self.step + 1]

    @property
    def action_log_probs(self) -> torch.Tensor:
        return self._action_log_probs_full[: self.step]

    @property
    def actions(self) -> torch.Tensor:
        return self._actions_full[: self.step]

    @property
    def prev_actions(self) -> torch.Tensor:
        return self._prev_actions_full[: self.step + 1]

    @property
    def masks(self) -> torch.Tensor:
        return self._masks_full[: self.step + 1]

    @property
    def observations(self) -> Memory:
        return self._observations_full.slice(dim=0, start=0, stop=self.step + 1)

    @staticmethod
    def create_memory(
        spec: Optional[FullMemorySpecType],
        num_samplers: int,
    ) -> Memory:
        if spec is None:
            return Memory()

        memory = Memory()
        for key in spec:
            dims_template, dtype = spec[key]

            dim_names = ["step"] + [d[0] for d in dims_template]
            sampler_dim = dim_names.index("sampler")

            all_dims = [2] + [d[1] for d in dims_template]
            all_dims[sampler_dim] = num_samplers

            memory.check_append(
                key=key,
                tensor=torch.zeros(*all_dims, dtype=dtype),
                sampler_dim=sampler_dim,
            )

        return memory

    def to(self, device: torch.device):
        for key in [
            "_observations_full",
            "memory_first_last",
            "_actions_full",
            "_prev_actions_full",
            "_masks_full",
            "_rewards_full",
            "_value_preds_full",
            "_returns_full",
            "_action_log_probs_full",
        ]:
            val = getattr(self, key)
            if val is not None:
                setattr(self, key, val.to(device))

        self.device = device

    def insert_observations(
        self,
        observations: ObservationType,
        time_step: int,
    ):
        self.insert_tensors(
            storage=self._observations_full,
            storage_name="observations",
            unflattened=observations,
            time_step=time_step,
        )

    def insert_memory(
        self,
        memory: Optional[Memory],
        time_step: int,
    ):
        if memory is None:
            assert len(self.memory_first_last) == 0
            return

        # `min(time_step, 1)` as we only store the first and last memories:
        #  * first memory is used for loss computation when the agent model has to compute
        #    all its outputs again given the full batch.
        #  * last memory ised used by the agent when collecting rollouts
        self.insert_tensors(
            storage=self.memory_first_last,
            storage_name="memory",
            unflattened=memory,
            time_step=min(time_step, 1),
        )

    def insert_tensors(
        self,
        storage: Memory,
        storage_name: str,
        unflattened: Union[ObservationType, Memory],
        prefix: str = "",
        path: Sequence[str] = (),
        time_step: int = 0,
    ):
        path = list(path)

        for name in unflattened:
            current_data = unflattened[name]

            if isinstance(current_data, Dict):
                self.insert_tensors(
                    storage=storage,
                    storage_name=storage_name,
                    unflattened=cast(ObservationType, current_data),
                    prefix=prefix + name + self.FLATTEN_SEPARATOR,
                    path=path + [name],
                    time_step=time_step,
                )
                continue

            sampler_dim = self.dim_names.index("sampler")
            if isinstance(current_data, tuple):
                sampler_dim = current_data[1]
                current_data = current_data[0]

            flatten_name = prefix + name
            if flatten_name not in storage:
                assert storage_name == "observations"
                storage[flatten_name] = (
                    torch.zeros_like(current_data)  # type:ignore
                    .repeat(
                        self.full_size + 1,  # required for observations (and memory)
                        *(1 for _ in range(len(current_data.shape))),
                    )
                    .to(self.device),
                    sampler_dim,
                )

                assert (
                    flatten_name not in self.flattened_to_unflattened[storage_name]
                ), f"new flattened name {flatten_name} already existing in flattened spaces[{storage_name}]"
                self.flattened_to_unflattened[storage_name][flatten_name] = path + [
                    name
                ]
                self.unflattened_to_flattened[storage_name][
                    tuple(path + [name])
                ] = flatten_name

            try:
                if storage_name == "observations":
                    # current_data has a step dimension
                    assert time_step >= 0
                    storage[flatten_name][0][time_step : time_step + 1].copy_(
                        current_data
                    )
                elif storage_name == "memory":
                    # current_data does not have a step dimension
                    storage[flatten_name][0][time_step].copy_(current_data)
                else:
                    raise NotImplementedError
            except:
                get_logger().error(
                    f"Error while inserting data in storage for name {flatten_name}"
                )
                raise

    def create_tensor_storage(
        self, num_steps: int, template: torch.Tensor
    ) -> torch.Tensor:
        return torch.cat([torch.zeros_like(template).to(self.device)] * num_steps)

    def _double_storage_size(self):
        def pad_tensor_with_zeros(old_t: Optional[torch.Tensor]):
            if old_t is None:
                return None

            assert old_t.shape[0] in [self.full_size, self.full_size + 1]
            padded_t = torch.zeros(
                old_t.shape[0] + self.full_size,
                *old_t.shape[1:],
                dtype=old_t.dtype,
                device=old_t.device,
            )
            padded_t[: old_t.shape[0]] = old_t
            return padded_t

        for key in list(self._observations_full.keys()):
            obs_tensor, sampler_dim = self._observations_full[key]
            self._observations_full[key] = (
                pad_tensor_with_zeros(obs_tensor),
                sampler_dim,
            )

        self._actions_full = pad_tensor_with_zeros(self._actions_full)
        self._prev_actions_full = pad_tensor_with_zeros(self._prev_actions_full)
        self._masks_full = pad_tensor_with_zeros(self._masks_full)

        self._rewards_full = pad_tensor_with_zeros(self._rewards_full)
        self._value_preds_full = pad_tensor_with_zeros(self._value_preds_full)
        self._returns_full = pad_tensor_with_zeros(self._returns_full)
        self._action_log_probs_full = pad_tensor_with_zeros(self._action_log_probs_full)

        self.full_size *= 2

    def add(
        self,
        observations: ObservationType,
        memory: Optional[Memory],
        actions: torch.Tensor,
        action_log_probs: torch.Tensor,
        value_preds: torch.Tensor,
        rewards: torch.Tensor,
        masks: torch.Tensor,
    ):
        """See `ExperienceStorage.add` documentation."""
        assert (
            len(masks.shape) == 2 and masks.shape[1] == 1
        ), f"Can only add a single step worth of data at a time (mask shape = {masks.shape})."

        self.total_experiences += masks.shape[0]

        if self.step == self.full_size:
            self._double_storage_size()
        elif self.step > self.full_size:
            raise RuntimeError

        self.insert_observations(observations, time_step=self.step + 1)
        self.insert_memory(memory, time_step=self.step + 1)

        assert actions.shape == self._actions_full.shape[1:]

        self._actions_full[self.step].copy_(actions)  # type:ignore
        self._prev_actions_full[self.step + 1].copy_(actions)  # type:ignore
        self._masks_full[self.step + 1].copy_(masks)  # type:ignore

        if self._rewards_full is None:
            # We delay the instantiation of storage for `rewards`, `value_preds`, `action_log_probs` and `returns`
            # as we do not, a priori, know what shape these will be. For instance, if we are in a multi-agent setting
            # then there may be many rewards (one for each agent).
            self._rewards_full = self.create_tensor_storage(
                self.full_size, rewards.unsqueeze(0)
            )  # add step

            value_returns_template = value_preds.unsqueeze(0)  # add step
            self._value_preds_full = self.create_tensor_storage(
                self.full_size + 1, value_returns_template
            )
            self._returns_full = self.create_tensor_storage(
                self.full_size + 1, value_returns_template
            )

            self._action_log_probs_full = self.create_tensor_storage(
                self.full_size, action_log_probs.unsqueeze(0)
            )

        self._value_preds_full[self.step].copy_(value_preds)  # type:ignore
        self._rewards_full[self.step].copy_(rewards)  # type:ignore
        self._action_log_probs_full[self.step].copy_(  # type:ignore
            action_log_probs
        )

        self.step += 1
        self._before_update_called = False

        # We set the below to be None just for extra safety.
        self._advantages = None
        self._normalized_advantages = None

    def sampler_select(self, keep_list: Sequence[int]):
        keep_list = list(keep_list)
        if self._actions_full.shape[1] == len(keep_list):  # samplers dim
            return  # we are keeping everything, no need to copy

        self._observations_full = self._observations_full.sampler_select(keep_list)
        self.memory_first_last = self.memory_first_last.sampler_select(keep_list)
        self._actions_full = self._actions_full[:, keep_list]
        self._prev_actions_full = self._prev_actions_full[:, keep_list]
        self._action_log_probs_full = self._action_log_probs_full[:, keep_list]
        self._masks_full = self._masks_full[:, keep_list]

        if self._rewards_full is not None:
            self._value_preds_full = self._value_preds_full[:, keep_list]
            self._rewards_full = self._rewards_full[:, keep_list]
            self._returns_full = self._returns_full[:, keep_list]

    def before_updates(
        self,
        *,
        next_value: torch.Tensor,
        use_gae: bool,
        gamma: float,
        tau: float,
        adv_stats_callback: Callable[[torch.Tensor], Dict[str, torch.Tensor]],
        **kwargs,
    ):
        assert len(kwargs) == 0
        self.compute_returns(
            next_value=next_value,
            use_gae=use_gae,
            gamma=gamma,
            tau=tau,
        )

        self._advantages = self.returns[:-1] - self.value_preds[:-1]

        adv_stats = adv_stats_callback(self._advantages)
        self._normalized_advantages = (self._advantages - adv_stats["mean"]) / (
            adv_stats["std"] + 1e-5
        )

        self._before_update_called = True

    def after_updates(self, **kwargs):
        assert len(kwargs) == 0

        for storage in [self.observations, self.memory_first_last]:
            for key in storage:
                storage[key][0][0].copy_(storage[key][0][-1])

        if self._masks_full is not None:
            self.masks[0].copy_(self.masks[-1])

        if self._prev_actions_full is not None:
            self.prev_actions[0].copy_(self.prev_actions[-1])

        self._before_update_called = False
        self._advantages = None
        self._normalized_advantages = None
        self.step = 0

    @staticmethod
    def _extend_tensor_with_ones(stored_tensor: torch.Tensor, desired_num_dims: int):
        # Ensure broadcast to all flattened dimensions
        extended_shape = stored_tensor.shape + (1,) * (
            desired_num_dims - len(stored_tensor.shape)
        )
        return stored_tensor.view(*extended_shape)

    def compute_returns(
        self, next_value: torch.Tensor, use_gae: bool, gamma: float, tau: float
    ):
        extended_mask = self._extend_tensor_with_ones(
            self.masks, desired_num_dims=len(self.value_preds.shape)
        )
        extended_rewards = self._extend_tensor_with_ones(
            self.rewards, desired_num_dims=len(self.value_preds.shape)
        )

        if use_gae:
            self.value_preds[-1] = next_value
            gae = 0
            for step in reversed(range(extended_rewards.shape[0])):
                delta = (
                    extended_rewards[step]
                    + gamma * self.value_preds[step + 1] * extended_mask[step + 1]
                    - self.value_preds[step]
                )
                gae = delta + gamma * tau * extended_mask[step + 1] * gae  # type:ignore
                self.returns[step] = gae + self.value_preds[step]
        else:
            self.returns[-1] = next_value
            for step in reversed(range(extended_rewards.shape[0])):
                self.returns[step] = (
                    self.returns[step + 1] * gamma * extended_mask[step + 1]
                    + extended_rewards[step]
                )

    def batched_experience_generator(
        self,
        num_mini_batch: int,
    ):
        assert self._before_update_called, (
            "self._before_update_called() must be called before"
            " attempting to generated batched rollouts."
        )
        num_samplers = self.rewards.shape[1]
        assert num_samplers >= num_mini_batch, (
            f"The number of task samplers ({num_samplers}) "
            f"must be greater than or equal to the number of "
            f"mini batches ({num_mini_batch})."
        )

        inds = np.round(
            np.linspace(0, num_samplers, num_mini_batch + 1, endpoint=True)
        ).astype(np.int32)
        pairs = list(zip(inds[:-1], inds[1:]))
        random.shuffle(pairs)

        for start_ind, end_ind in pairs:
            cur_samplers = list(range(start_ind, end_ind))

            memory_batch = self.memory_first_last.step_squeeze(0).sampler_select(
                cur_samplers
            )
            observations_batch = self.unflatten_observations(
                self.observations.slice(dim=0, stop=-1).sampler_select(cur_samplers)
            )

            actions_batch = []
            prev_actions_batch = []
            value_preds_batch = []
            return_batch = []
            masks_batch = []
            old_action_log_probs_batch = []
            adv_targ = []
            norm_adv_targ = []

            for ind in cur_samplers:
                actions_batch.append(self.actions[:, ind])
                prev_actions_batch.append(self.prev_actions[:-1, ind])
                value_preds_batch.append(self.value_preds[:-1, ind])
                return_batch.append(self.returns[:-1, ind])
                masks_batch.append(self.masks[:-1, ind])
                old_action_log_probs_batch.append(self.action_log_probs[:, ind])

                adv_targ.append(self._advantages[:, ind])
                norm_adv_targ.append(self._normalized_advantages[:, ind])

            actions_batch = torch.stack(actions_batch, 1)  # type:ignore
            prev_actions_batch = torch.stack(prev_actions_batch, 1)  # type:ignore
            value_preds_batch = torch.stack(value_preds_batch, 1)  # type:ignore
            return_batch = torch.stack(return_batch, 1)  # type:ignore
            masks_batch = torch.stack(masks_batch, 1)  # type:ignore
            old_action_log_probs_batch = torch.stack(  # type:ignore
                old_action_log_probs_batch, 1
            )
            adv_targ = torch.stack(adv_targ, 1)  # type:ignore
            norm_adv_targ = torch.stack(norm_adv_targ, 1)  # type:ignore

            yield {
                "observations": observations_batch,
                "memory": memory_batch,
                "actions": su.unflatten(self.action_space, actions_batch),
                "prev_actions": su.unflatten(self.action_space, prev_actions_batch),
                "values": value_preds_batch,
                "returns": return_batch,
                "masks": masks_batch,
                "old_action_log_probs": old_action_log_probs_batch,
                "adv_targ": adv_targ,
                "norm_adv_targ": norm_adv_targ,
                "bsize": int(np.prod(masks_batch.shape[:2])),
            }

    def unflatten_observations(self, flattened_batch: Memory) -> ObservationType:
        result: ObservationType = {}
        for name in flattened_batch:
            full_path = self.flattened_to_unflattened["observations"][name]
            cur_dict = result
            for part in full_path[:-1]:
                if part not in cur_dict:
                    cur_dict[part] = {}
                cur_dict = cast(ObservationType, cur_dict[part])
            cur_dict[full_path[-1]] = flattened_batch[name][0]
        return result

    def pick_observation_step(self, step: int) -> ObservationType:
        return self.unflatten_observations(self.observations.step_select(step))

    def pick_memory_step(self, step: int) -> Memory:
        assert step in [0, self.step, -1], "Can only access the first or last memory."
        return self.memory_first_last.step_squeeze(min(step, 1))

    def pick_prev_actions_step(self, step: int) -> ActionType:
        return su.unflatten(self.action_space, self.prev_actions[step : step + 1])

    def agent_input_for_next_step(self) -> Dict[str, Any]:
        return {
            "observations": self.pick_observation_step(self.step),
            "memory": self.pick_memory_step(self.step),
            "prev_actions": self.pick_prev_actions_step(self.step),
            "masks": self.masks[self.step : self.step + 1],
        }


================================================
FILE: allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import signal
import time
import traceback
from multiprocessing.connection import Connection
from multiprocessing.context import BaseContext
from multiprocessing.process import BaseProcess
from threading import Thread
from typing import (
    Any,
    Callable,
    Dict,
    Generator,
    Iterator,
    List,
    Optional,
    Sequence,
    Set,
    Tuple,
    Union,
    cast,
)

import numpy as np
from gym.spaces.dict import Dict as SpaceDict
from setproctitle import setproctitle as ptitle

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import SensorSuite, Sensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.misc_utils import partition_sequence
from allenact.utils.system import get_logger
from allenact.utils.tensor_utils import tile_images

try:
    # Use torch.multiprocessing if we can.
    # We have yet to find a reason to not use it and
    # you are required to use it when sending a torch.Tensor
    # between processes
    import torch.multiprocessing as mp
except ImportError:
    import multiprocessing as mp  # type: ignore

DEFAULT_MP_CONTEXT_TYPE = "forkserver"
COMPLETE_TASK_METRICS_KEY = "__AFTER_TASK_METRICS__"
COMPLETE_TASK_CALLBACK_KEY = "__AFTER_TASK_CALLBACK__"

STEP_COMMAND = "step"
NEXT_TASK_COMMAND = "next_task"
RENDER_COMMAND = "render"
CLOSE_COMMAND = "close"
OBSERVATION_SPACE_COMMAND = "observation_space"
ACTION_SPACE_COMMAND = "action_space"
CALL_COMMAND = "call"
SAMPLER_COMMAND = "call_sampler"
ATTR_COMMAND = "attr"
SAMPLER_ATTR_COMMAND = "sampler_attr"
RESET_COMMAND = "reset"
SEED_COMMAND = "seed"
PAUSE_COMMAND = "pause"
RESUME_COMMAND = "resume"


class DelaySignalHandling:
    # Modified from https://stackoverflow.com/a/21919644
    def __init__(self):
        self.int_signal_received: Optional[Any] = None
        self.term_signal_received: Optional[Any] = None
        self.old_int_handler = None
        self.old_term_handler = None

    def __enter__(self):
        self.int_signal_received: Optional[Any] = None
        self.term_signal_received: Optional[Any] = None
        self.old_int_handler = signal.signal(signal.SIGINT, self.int_handler)
        self.old_term_handler = signal.signal(signal.SIGTERM, self.term_handler)

    def int_handler(self, sig, frame):
        self.int_signal_received = (sig, frame)
        get_logger().debug("SIGINT received. Delaying KeyboardInterrupt.")

    def term_handler(self, sig, frame):
        self.term_signal_received = (sig, frame)
        get_logger().debug("SIGTERM received. Delaying termination.")

    def __exit__(self, type, value, traceback):
        signal.signal(signal.SIGINT, self.old_int_handler)
        signal.signal(signal.SIGTERM, self.old_term_handler)
        if self.term_signal_received:
            # For some reason there appear to be cases where the original termination
            # handler is not callable. It is unclear to me exactly why this is the case
            # but here we add a guard to double check that the handler is callable and,
            # if it's not, we re-send the termination signal to the process and let
            # the python internals handle it (note that we've already reset the termination
            # handler to what it was originaly above in the signal.signal(...) code).
            if callable(self.old_term_handler):
                self.old_term_handler(*self.term_signal_received)
            else:
                get_logger().debug(
                    "Termination handler could not be called after delaying signal handling."
                    f" Resending the SIGTERM signal. Last (sig, frame) == ({self.term_signal_received})."
                )
                os.kill(os.getpid(), signal.SIGTERM)

        if self.int_signal_received:
            if callable(self.old_int_handler):
                self.old_int_handler(*self.int_signal_received)
            else:
                signal.default_int_handler(*self.int_signal_received)


class VectorSampledTasks:
    """Vectorized collection of tasks. Creates multiple processes where each
    process runs its own TaskSampler. Each process generates one Task from its
    TaskSampler at a time and this class allows for interacting with these
    tasks in a vectorized manner. When a task on a process completes, the
    process samples another task from its task sampler. All the tasks are
    synchronized (for step and new_task methods).

    # Attributes

    make_sampler_fn : function which creates a single TaskSampler.
    sampler_fn_args : sequence of dictionaries describing the args
        to pass to make_sampler_fn on each individual process.
    auto_resample_when_done : automatically sample a new Task from the TaskSampler when
        the Task completes. If False, a new Task will not be resampled until all
        Tasks on all processes have completed. This functionality is provided for seamless training
        of vectorized Tasks.
    multiprocessing_start_method : the multiprocessing method used to
        spawn worker processes. Valid methods are
        ``{'spawn', 'forkserver', 'fork'}`` ``'forkserver'`` is the
        recommended method as it works well with CUDA. If
        ``'fork'`` is used, the subproccess  must be started before
        any other GPU useage.
    """

    observation_space: SpaceDict
    _workers: List[Union[mp.Process, Thread, BaseProcess]]
    _is_waiting: bool
    _num_task_samplers: int
    _auto_resample_when_done: bool
    _mp_ctx: BaseContext
    _connection_read_fns: List[Callable[[], Any]]
    _connection_write_fns: List[Callable[[Any], None]]
    _read_timeout: Optional[float]

    def __init__(
        self,
        make_sampler_fn: Callable[..., TaskSampler],
        sampler_fn_args: Sequence[Dict[str, Any]] = None,
        callback_sensors: Optional[Sequence[Sensor]] = None,
        auto_resample_when_done: bool = True,
        multiprocessing_start_method: Optional[str] = "forkserver",
        mp_ctx: Optional[BaseContext] = None,
        should_log: bool = True,
        max_processes: Optional[int] = None,
        read_timeout: Optional[
            float
        ] = 60,  # Seconds to wait for a task to return a response before timing out
    ) -> None:

        self._is_waiting = False
        self._is_closed = True
        self.should_log = should_log
        self.max_processes = max_processes
        self.read_timeout = read_timeout

        assert (
            sampler_fn_args is not None and len(sampler_fn_args) > 0
        ), "number of processes to be created should be greater than 0"

        self._num_task_samplers = len(sampler_fn_args)
        self._num_processes = (
            self._num_task_samplers
            if max_processes is None
            else min(max_processes, self._num_task_samplers)
        )

        self._auto_resample_when_done = auto_resample_when_done

        assert (multiprocessing_start_method is None) != (
            mp_ctx is None
        ), "Exactly one of `multiprocessing_start_method`, and `mp_ctx` must be not None."
        if multiprocessing_start_method is not None:
            assert multiprocessing_start_method in self._valid_start_methods, (
                "multiprocessing_start_method must be one of {}. Got '{}'"
            ).format(self._valid_start_methods, multiprocessing_start_method)
            self._mp_ctx = mp.get_context(multiprocessing_start_method)
        else:
            self._mp_ctx = cast(BaseContext, mp_ctx)

        self.npaused_per_process = [0] * self._num_processes
        self.sampler_index_to_process_ind_and_subprocess_ind: Optional[
            List[List[int]]
        ] = None
        self._reset_sampler_index_to_process_ind_and_subprocess_ind()

        self._workers: Optional[List[Union[mp.Process, Thread, BaseProcess]]] = None
        for args in sampler_fn_args:
            args["mp_ctx"] = self._mp_ctx
        (
            connection_poll_fns,
            connection_read_fns,
            self._connection_write_fns,
        ) = self._spawn_workers(  # noqa
            make_sampler_fn=make_sampler_fn,
            sampler_fn_args_list=[
                args_list for args_list in self._partition_to_processes(sampler_fn_args)
            ],
            callback_sensor_suite=(
                SensorSuite(callback_sensors)
                if isinstance(callback_sensors, Sequence)
                else callback_sensors
            ),
        )

        self._connection_read_fns = [
            self._create_read_function_with_timeout(
                read_fn=read_fn, poll_fn=poll_fn, timeout=self.read_timeout
            )
            for read_fn, poll_fn in zip(connection_read_fns, connection_poll_fns)
        ]

        self._is_closed = False

        for write_fn in self._connection_write_fns:
            write_fn((OBSERVATION_SPACE_COMMAND, None))

        # Note that we increase the read timeout below as initialization can take some time
        observation_spaces = [
            space
            for read_fn in self._connection_read_fns
            for space in read_fn(timeout_to_use=5 * self.read_timeout if self.read_timeout is not None else None)  # type: ignore
        ]

        if any(os is None for os in observation_spaces):
            raise NotImplementedError(
                "It appears that the `all_observation_spaces_equal`"
                " is not True for some task sampler created by"
                " VectorSampledTasks. This is not currently supported."
            )

        if any(observation_spaces[0] != os for os in observation_spaces):
            raise NotImplementedError(
                "It appears that the observation spaces of the samplers"
                " created in VectorSampledTasks are not equal."
                " This is not currently supported."
            )

        self.observation_space = observation_spaces[0]
        for write_fn in self._connection_write_fns:
            write_fn((ACTION_SPACE_COMMAND, None))
        self.action_spaces = [
            space for read_fn in self._connection_read_fns for space in read_fn()
        ]

    @staticmethod
    def _create_read_function_with_timeout(
        *,
        read_fn: Callable[[], Any],
        poll_fn: Callable[[float], bool],
        timeout: Optional[float],
    ) -> Callable[[], Any]:
        def read_with_timeout(timeout_to_use: Optional[float] = timeout):
            if timeout_to_use is not None:
                # noinspection PyArgumentList
                if not poll_fn(timeout=timeout_to_use):
                    raise TimeoutError(
                        f"Did not receive output from `VectorSampledTask` worker for {timeout_to_use} seconds."
                    )

            return read_fn()

        return read_with_timeout

    def _reset_sampler_index_to_process_ind_and_subprocess_ind(self):
        self.sampler_index_to_process_ind_and_subprocess_ind = [
            [i, j]
            for i, part in enumerate(
                partition_sequence([1] * self._num_task_samplers, self._num_processes)
            )
            for j in range(len(part))
        ]

    def _partition_to_processes(self, seq: Union[Iterator, Sequence]):
        subparts_list: List[List] = [[] for _ in range(self._num_processes)]

        seq = list(seq)
        assert len(seq) == len(self.sampler_index_to_process_ind_and_subprocess_ind)

        for sampler_index, (process_ind, subprocess_ind) in enumerate(
            self.sampler_index_to_process_ind_and_subprocess_ind
        ):
            assert len(subparts_list[process_ind]) == subprocess_ind
            subparts_list[process_ind].append(seq[sampler_index])

        return subparts_list

    @property
    def is_closed(self) -> bool:
        """Has the vector task been closed."""
        return self._is_closed

    @property
    def num_unpaused_tasks(self) -> int:
        """Number of unpaused processes.

        # Returns

        Number of unpaused processes.
        """
        return self._num_task_samplers - sum(self.npaused_per_process)

    @property
    def mp_ctx(self):
        """Get the multiprocessing process used by the vector task.

        # Returns

        The multiprocessing context.
        """
        return self._mp_ctx

    @staticmethod
    def _task_sampling_loop_worker(
        worker_id: Union[int, str],
        connection_read_fn: Callable,
        connection_write_fn: Callable,
        make_sampler_fn: Callable[..., TaskSampler],
        sampler_fn_args_list: List[Dict[str, Any]],
        callback_sensor_suite: Optional[SensorSuite],
        auto_resample_when_done: bool,
        should_log: bool,
        child_pipe: Optional[Connection] = None,
        parent_pipe: Optional[Connection] = None,
    ) -> None:
        """process worker for creating and interacting with the
        Tasks/TaskSampler."""

        ptitle(f"VectorSampledTask: {worker_id}")

        sp_vector_sampled_tasks = SingleProcessVectorSampledTasks(
            make_sampler_fn=make_sampler_fn,
            sampler_fn_args_list=sampler_fn_args_list,
            callback_sensor_suite=callback_sensor_suite,
            auto_resample_when_done=auto_resample_when_done,
            should_log=should_log,
        )

        if parent_pipe is not None:
            parent_pipe.close()  # Means this pipe will close when the calling process closes it
        try:
            while True:
                read_input = connection_read_fn()

                # TODO: Was the below necessary?
                # with DelaySignalHandling():
                #     # Delaying signal handling here is necessary to ensure that we don't
                #     # (when processing a SIGTERM/SIGINT signal) attempt to send data to
                #     # a generator while it is already processing other data.
                if len(read_input) == 3:
                    sampler_index, command, data = read_input

                    assert command != CLOSE_COMMAND, "Must close all processes at once."
                    assert (
                        command != RESUME_COMMAND
                    ), "Must resume all task samplers at once."

                    if command == PAUSE_COMMAND:
                        sp_vector_sampled_tasks.pause_at(sampler_index=sampler_index)
                        connection_write_fn("done")
                    else:
                        connection_write_fn(
                            sp_vector_sampled_tasks.command_at(
                                sampler_index=sampler_index,
                                command=command,
                                data=data,
                            )
                        )
                else:
                    commands, data_list = read_input

                    assert (
                        commands != PAUSE_COMMAND
                    ), "Cannot pause all task samplers at once."

                    if commands == CLOSE_COMMAND:
                        # Will close the `sp_vector_sampled_tasks` in the `finally` clause below
                        break

                    elif commands == RESUME_COMMAND:
                        sp_vector_sampled_tasks.resume_all()
                        connection_write_fn("done")
                    else:
                        if isinstance(commands, str):
                            commands = [
                                commands
                            ] * sp_vector_sampled_tasks.num_unpaused_tasks

                        connection_write_fn(
                            sp_vector_sampled_tasks.command(
                                commands=commands, data_list=data_list
                            )
                        )

        except KeyboardInterrupt:
            if should_log:
                get_logger().info(f"Worker {worker_id} KeyboardInterrupt")
        except Exception as e:
            get_logger().error(
                f"Worker {worker_id} encountered an exception:\n{traceback.format_exc()}"
            )
            raise e
        finally:
            try:
                sp_vector_sampled_tasks.close()
            except Exception:
                pass

            if child_pipe is not None:
                child_pipe.close()
            if should_log:
                get_logger().info(f"Worker {worker_id} closing.")

    def _spawn_workers(
        self,
        make_sampler_fn: Callable[..., TaskSampler],
        sampler_fn_args_list: Sequence[Sequence[Dict[str, Any]]],
        callback_sensor_suite: Optional[SensorSuite],
    ) -> Tuple[
        List[Callable[[], bool]], List[Callable[[], Any]], List[Callable[[Any], None]]
    ]:
        parent_connections, worker_connections = zip(
            *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_processes)]
        )
        self._workers = []
        k = 0
        id: Union[int, str]
        for id, (worker_conn, parent_conn, current_sampler_fn_args_list) in enumerate(
            zip(worker_connections, parent_connections, sampler_fn_args_list)
        ):
            if len(current_sampler_fn_args_list) != 1:
                id = f"{id}({k}-{k + len(current_sampler_fn_args_list) - 1})"
                k += len(current_sampler_fn_args_list)

            if self.should_log:
                get_logger().info(
                    f"Starting {id}-th VectorSampledTask worker with args {current_sampler_fn_args_list}"
                )

            ps = self._mp_ctx.Process(  # type: ignore
                target=self._task_sampling_loop_worker,
                kwargs=dict(
                    worker_id=id,
                    connection_read_fn=worker_conn.recv,
                    connection_write_fn=worker_conn.send,
                    make_sampler_fn=make_sampler_fn,
                    sampler_fn_args_list=current_sampler_fn_args_list,
                    callback_sensor_suite=callback_sensor_suite,
                    auto_resample_when_done=self._auto_resample_when_done,
                    should_log=self.should_log,
                    child_pipe=worker_conn,
                    parent_pipe=parent_conn,
                ),
            )
            self._workers.append(ps)
            ps.daemon = True
            ps.start()
            worker_conn.close()  # Means this pipe will close when the child process closes it
            time.sleep(
                0.1
            )  # Useful to ensure things don't lock up when spawning many envs
        return (
            [p.poll for p in parent_connections],
            [p.recv for p in parent_connections],
            [p.send for p in parent_connections],
        )

    def next_task(self, **kwargs):
        """Move to the the next Task for all TaskSamplers.

        # Parameters

        kwargs : key word arguments passed to the `next_task` function of the samplers.

        # Returns

        List of initial observations for each of the new tasks.
        """
        return self.command(
            commands=NEXT_TASK_COMMAND, data_list=[kwargs] * self.num_unpaused_tasks
        )

    def get_observations(self):
        """Get observations for all unpaused tasks.

        # Returns

        List of observations for each of the unpaused tasks.
        """
        return self.call(
            ["get_observations"] * self.num_unpaused_tasks,
        )

    def command_at(
        self, sampler_index: int, command: str, data: Optional[Any] = None
    ) -> Any:
        """Runs the command on the selected task and returns the result.

        # Parameters


        # Returns

        Result of the command.
        """
        self._is_waiting = True
        (
            process_ind,
            subprocess_ind,
        ) = self.sampler_index_to_process_ind_and_subprocess_ind[sampler_index]
        self._connection_write_fns[process_ind]((subprocess_ind, command, data))
        result = self._connection_read_fns[process_ind]()
        self._is_waiting = False
        return result

    def call_at(
        self,
        sampler_index: int,
        function_name: str,
        function_args: Optional[List[Any]] = None,
    ) -> Any:
        """Calls a function (which is passed by name) on the selected task and
        returns the result.

        # Parameters

        index : Which task to call the function on.
        function_name : The name of the function to call on the task.
        function_args : Optional function args.

        # Returns

        Result of calling the function.
        """
        return self.command_at(
            sampler_index=sampler_index,
            command=CALL_COMMAND,
            data=(function_name, function_args),
        )

    def next_task_at(self, sampler_index: int) -> List[RLStepResult]:
        """Move to the the next Task from the TaskSampler in index_process
        process in the vector.

        # Parameters

        index_process : Index of the process to be reset.

        # Returns

        List of length one containing the observations the newly sampled task.
        """
        return [
            self.command_at(
                sampler_index=sampler_index, command=NEXT_TASK_COMMAND, data=None
            )
        ]

    def step_at(self, sampler_index: int, action: Any) -> List[RLStepResult]:
        """Step in the index_process task in the vector.

        # Parameters

        sampler_index : Index of the sampler to be reset.
        action : The action to take.

        # Returns

        List containing the output of step method on the task in the indexed process.
        """
        return [
            self.command_at(
                sampler_index=sampler_index, command=STEP_COMMAND, data=action
            )
        ]

    def async_step(self, actions: Sequence[Any]) -> None:
        """Asynchronously step in the vectorized Tasks.

        # Parameters

        actions : actions to be performed in the vectorized Tasks.
        """
        self._is_waiting = True
        for write_fn, action in zip(
            self._connection_write_fns, self._partition_to_processes(actions)
        ):
            write_fn((STEP_COMMAND, action))

    def wait_step(self) -> List[Dict[str, Any]]:
        """Wait until all the asynchronized processes have synchronized."""
        observations = []
        for read_fn in self._connection_read_fns:
            observations.extend(read_fn())
        self._is_waiting = False
        return observations

    def step(self, actions: Sequence[Any]):
        """Perform actions in the vectorized tasks.

        # Parameters

        actions: List of size _num_samplers containing action to be taken in each task.

        # Returns

        List of outputs from the step method of tasks.
        """
        self.async_step(actions)
        return self.wait_step()

    def reset_all(self):
        """Reset all task samplers to their initial state (except for the RNG
        seed)."""
        self.command(commands=RESET_COMMAND, data_list=None)

    def set_seeds(self, seeds: List[int]):
        """Sets new tasks' RNG seeds.

        # Parameters

        seeds: List of size _num_samplers containing new RNG seeds.
        """
        self.command(commands=SEED_COMMAND, data_list=seeds)

    def close(self) -> None:
        if self._is_closed:
            return

        if self._is_waiting:
            for read_fn in self._connection_read_fns:
                try:
                    # noinspection PyArgumentList
                    read_fn(0)  # Time out immediately
                except Exception:
                    pass

        for write_fn in self._connection_write_fns:
            try:
                write_fn((CLOSE_COMMAND, None))
            except Exception:
                pass

        for process in self._workers:
            try:
                process.join(timeout=0.1)
            except Exception:
                pass

        for process in self._workers:
            if process.is_alive():
                process.kill()

        self._is_closed = True

    def pause_at(self, sampler_index: int) -> None:
        """Pauses computation on the Task in process `index` without destroying
        the Task. This is useful for not needing to call steps on all Tasks
        when only some are active (for example during the last samples of
        running eval).

        # Parameters

        index : which process to pause. All indexes after this
            one will be shifted down by one.
        """
        if self._is_waiting:
            for read_fn in self._connection_read_fns:
                read_fn()

        (
            process_ind,
            subprocess_ind,
        ) = self.sampler_index_to_process_ind_and_subprocess_ind[sampler_index]

        self.command_at(sampler_index=sampler_index, command=PAUSE_COMMAND, data=None)

        for i in range(
            sampler_index + 1, len(self.sampler_index_to_process_ind_and_subprocess_ind)
        ):
            other_process_and_sub_process_inds = (
                self.sampler_index_to_process_ind_and_subprocess_ind[i]
            )
            if other_process_and_sub_process_inds[0] == process_ind:
                other_process_and_sub_process_inds[1] -= 1
            else:
                break

        self.sampler_index_to_process_ind_and_subprocess_ind.pop(sampler_index)

        self.npaused_per_process[process_ind] += 1

    def resume_all(self) -> None:
        """Resumes any paused processes."""
        self._is_waiting = True
        for connection_write_fn in self._connection_write_fns:
            connection_write_fn((RESUME_COMMAND, None))

        for connection_read_fn in self._connection_read_fns:
            connection_read_fn()

        self._is_waiting = False

        self._reset_sampler_index_to_process_ind_and_subprocess_ind()

        for i in range(len(self.npaused_per_process)):
            self.npaused_per_process[i] = 0

    def command(
        self, commands: Union[List[str], str], data_list: Optional[List]
    ) -> List[Any]:
        """"""
        self._is_waiting = True

        if isinstance(commands, str):
            commands = [commands] * self.num_unpaused_tasks

        if data_list is None:
            data_list = [None] * self.num_unpaused_tasks

        for write_fn, subcommands, subdata_list in zip(
            self._connection_write_fns,
            self._partition_to_processes(commands),
            self._partition_to_processes(data_list),
        ):
            write_fn((subcommands, subdata_list))
        results = []
        for read_fn in self._connection_read_fns:
            results.extend(read_fn())
        self._is_waiting = False
        return results

    def call(
        self,
        function_names: Union[str, List[str]],
        function_args_list: Optional[List[Any]] = None,
    ) -> List[Any]:
        """Calls a list of functions (which are passed by name) on the
        corresponding task (by index).

        # Parameters

        function_names : The name of the functions to call on the tasks.
        function_args_list : List of function args for each function.
            If provided, len(function_args_list) should be as long as  len(function_names).

        # Returns

        List of results of calling the functions.
        """
        self._is_waiting = True

        if isinstance(function_names, str):
            function_names = [function_names] * self.num_unpaused_tasks

        if function_args_list is None:
            function_args_list = [None] * len(function_names)
        assert len(function_names) == len(function_args_list)
        func_names_and_args_list = zip(function_names, function_args_list)
        for write_fn, func_names_and_args in zip(
            self._connection_write_fns,
            self._partition_to_processes(func_names_and_args_list),
        ):
            write_fn((CALL_COMMAND, func_names_and_args))
        results = []
        for read_fn in self._connection_read_fns:
            results.extend(read_fn())
        self._is_waiting = False
        return results

    def attr_at(self, sampler_index: int, attr_name: str) -> Any:
        """Gets the attribute (specified by name) on the selected task and
        returns it.

        # Parameters

        index : Which task to call the function on.
        attr_name : The name of the function to call on the task.

        # Returns

         Result of calling the function.
        """
        return self.command_at(sampler_index, command=ATTR_COMMAND, data=attr_name)

    def attr(self, attr_names: Union[List[str], str]) -> List[Any]:
        """Gets the attributes (specified by name) on the tasks.

        # Parameters

        attr_names : The name of the functions to call on the tasks.

        # Returns

        List of results of calling the functions.
        """
        if isinstance(attr_names, str):
            attr_names = [attr_names] * self.num_unpaused_tasks

        return self.command(commands=ATTR_COMMAND, data_list=attr_names)

    def render(
        self, mode: str = "human", *args, **kwargs
    ) -> Union[np.ndarray, None, List[np.ndarray]]:
        """Render observations from all Tasks in a tiled image or list of
        images."""

        images = self.command(
            commands=RENDER_COMMAND,
            data_list=[(args, {"mode": "rgb", **kwargs})] * self.num_unpaused_tasks,
        )

        if mode == "raw_rgb_list":
            return images

        tile = tile_images(images)
        if mode == "human":
            import cv2

            cv2.imshow("vectask", tile[:, :, ::-1])
            cv2.waitKey(1)
            return None
        elif mode == "rgb_array":
            return tile
        else:
            raise NotImplementedError

    @property
    def _valid_start_methods(self) -> Set[str]:
        return {"forkserver", "spawn", "fork"}

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()


class SingleProcessVectorSampledTasks(object):
    """Vectorized collection of tasks.

    Simultaneously handles the state of multiple TaskSamplers and their associated tasks.
    Allows for interacting with these tasks in a vectorized manner. When a task completes,
    another task is sampled from the appropriate task sampler. All the tasks are
    synchronized (for step and new_task methods).

    # Attributes

    make_sampler_fn : function which creates a single TaskSampler.
    sampler_fn_args : sequence of dictionaries describing the args
        to pass to make_sampler_fn on each individual process.
    auto_resample_when_done : automatically sample a new Task from the TaskSampler when
        the Task completes. If False, a new Task will not be resampled until all
        Tasks on all processes have completed. This functionality is provided for seamless training
        of vectorized Tasks.
    """

    observation_space: SpaceDict
    _vector_task_generators: List[Generator]
    _num_task_samplers: int
    _auto_resample_when_done: bool

    def __init__(
        self,
        make_sampler_fn: Callable[..., TaskSampler],
        sampler_fn_args_list: Sequence[Dict[str, Any]] = None,
        callback_sensor_suite: Optional[SensorSuite] = None,
        auto_resample_when_done: bool = True,
        should_log: bool = True,
    ) -> None:

        self._is_closed = True

        assert (
            sampler_fn_args_list is not None and len(sampler_fn_args_list) > 0
        ), "number of processes to be created should be greater than 0"

        self._num_task_samplers = len(sampler_fn_args_list)
        self._auto_resample_when_done = auto_resample_when_done

        self.should_log = should_log

        self._vector_task_generators: List[Generator] = self._create_generators(
            make_sampler_fn=make_sampler_fn,
            sampler_fn_args=[{"mp_ctx": None, **args} for args in sampler_fn_args_list],
            callback_sensor_suite=callback_sensor_suite,
        )

        self._is_closed = False

        observation_spaces = [
            vsi.send((OBSERVATION_SPACE_COMMAND, None))
            for vsi in self._vector_task_generators
        ]

        if any(os is None for os in observation_spaces):
            raise NotImplementedError(
                "It appears that the `all_observation_spaces_equal`"
                " is not True for some task sampler created by"
                " VectorSampledTasks. This is not currently supported."
            )

        if any(observation_spaces[0] != os for os in observation_spaces):
            raise NotImplementedError(
                "It appears that the observation spaces of the samplers"
                " created in VectorSampledTasks are not equal."
                " This is not currently supported."
            )

        self.observation_space = observation_spaces[0]
        self.action_spaces = [
            vsi.send((ACTION_SPACE_COMMAND, None))
            for vsi in self._vector_task_generators
        ]
        self._paused: List[Tuple[int, Generator]] = []

    @property
    def is_closed(self) -> bool:
        """Has the vector task been closed."""
        return self._is_closed

    @property
    def mp_ctx(self) -> Optional[BaseContext]:
        return None

    @property
    def num_unpaused_tasks(self) -> int:
        """Number of unpaused processes.

        # Returns

        Number of unpaused processes.
        """
        return self._num_task_samplers - len(self._paused)

    @staticmethod
    def _task_sampling_loop_generator_fn(
        worker_id: int,
        make_sampler_fn: Callable[..., TaskSampler],
        sampler_fn_args: Dict[str, Any],
        callback_sensor_suite: Optional[SensorSuite],
        auto_resample_when_done: bool,
        should_log: bool,
    ) -> Generator:
        """Generator for working with Tasks/TaskSampler."""

        task_sampler = make_sampler_fn(**sampler_fn_args)
        current_task = task_sampler.next_task()

        if current_task is None:
            raise RuntimeError(
                "Newly created task sampler had `None` as it's first task. This likely means that"
                " it was not provided with any tasks to generate. This can happen if, e.g., during testing"
                " you have started more processes than you had tasks to test. Currently this is not supported:"
                " every task sampler must be able to generate at least one task."
            )

        try:
            command, data = yield "started"

            while command != CLOSE_COMMAND:
                if command == STEP_COMMAND:
                    step_result: RLStepResult = current_task.step(data)
                    if current_task.is_done():
                        metrics = current_task.metrics()
                        if metrics is not None and len(metrics) != 0:
                            if step_result.info is None:
                                step_result = step_result.clone({"info": {}})
                            step_result.info[COMPLETE_TASK_METRICS_KEY] = metrics

                        if callback_sensor_suite is not None:
                            task_callback_data = callback_sensor_suite.get_observations(
                                env=current_task.env, task=current_task
                            )
                            if step_result.info is None:
                                step_result = step_result.clone({"info": {}})
                            step_result.info[COMPLETE_TASK_CALLBACK_KEY] = (
                                task_callback_data
                            )

                        if auto_resample_when_done:
                            current_task = task_sampler.next_task()
                            if current_task is None:
                                step_result = step_result.clone({"observation": None})
                            else:
                                step_result = step_result.clone(
                                    {"observation": current_task.get_observations()}
                                )

                    command, data = yield step_result

                elif command == NEXT_TASK_COMMAND:
                    if data is not None:
                        current_task = task_sampler.next_task(**data)
                    else:
                        current_task = task_sampler.next_task()
                    observations = current_task.get_observations()

                    command, data = yield observations

                elif command == RENDER_COMMAND:
                    command, data = yield current_task.render(*data[0], **data[1])

                elif (
                    command == OBSERVATION_SPACE_COMMAND
                    or command == ACTION_SPACE_COMMAND
                ):
                    res = getattr(current_task, command)
                    command, data = yield res

                elif command == CALL_COMMAND:
                    function_name, function_args = data
                    if function_args is None or len(function_args) == 0:
                        result = getattr(current_task, function_name)()
                    else:
                        result = getattr(current_task, function_name)(*function_args)
                    command, data = yield result

                elif command == SAMPLER_COMMAND:
                    function_name, function_args = data
                    if function_args is None or len(function_args) == 0:
                        result = getattr(task_sampler, function_name)()
                    else:
                        result = getattr(task_sampler, function_name)(*function_args)

                    command, data = yield result

                elif command == ATTR_COMMAND:
                    property_name = data
                    result = getattr(current_task, property_name)

                    command, data = yield result

                elif command == SAMPLER_ATTR_COMMAND:
                    property_name = data
                    result = getattr(task_sampler, property_name)

                    command, data = yield result

                elif command == RESET_COMMAND:
                    task_sampler.reset()
                    current_task = task_sampler.next_task()

                    if current_task is None:
                        raise RuntimeError(
                            "After resetting the task sampler it seems to have"
                            " no new tasks (the `task_sampler.next_task()` call"
                            " returned `None` after the reset). This suggests that"
                            " the task sampler's reset method was not implemented"
                            f" correctly (task sampler type is {type(task_sampler)})."
                        )

                    command, data = yield "done"
                elif command == SEED_COMMAND:
                    task_sampler.set_seed(data)

                    command, data = yield "done"
                else:
                    raise NotImplementedError()

        except KeyboardInterrupt:
            if should_log:
                get_logger().info(
                    "SingleProcessVectorSampledTask {} KeyboardInterrupt".format(
                        worker_id
                    )
                )
        except Exception as e:
            get_logger().error(traceback.format_exc())
            raise e
        finally:
            if should_log:
                get_logger().info(
                    "SingleProcessVectorSampledTask {} closing.".format(worker_id)
                )
            task_sampler.close()

    def _create_generators(
        self,
        make_sampler_fn: Callable[..., TaskSampler],
        sampler_fn_args: Sequence[Dict[str, Any]],
        callback_sensor_suite: Optional[SensorSuite],
    ) -> List[Generator]:

        generators = []
        for id, current_sampler_fn_args in enumerate(sampler_fn_args):
            if self.should_log:
                get_logger().info(
                    f"Starting {id}-th SingleProcessVectorSampledTasks generator with args {current_sampler_fn_args}."
                )
            generators.append(
                self._task_sampling_loop_generator_fn(
                    worker_id=id,
                    make_sampler_fn=make_sampler_fn,
                    sampler_fn_args=current_sampler_fn_args,
                    callback_sensor_suite=callback_sensor_suite,
                    auto_resample_when_done=self._auto_resample_when_done,
                    should_log=self.should_log,
                )
            )

            if next(generators[-1]) != "started":
                raise RuntimeError("Generator failed to start.")

        return generators

    def next_task(self, **kwargs):
        """Move to the the next Task for all TaskSamplers.

        # Parameters

        kwargs : key word arguments passed to the `next_task` function of the samplers.

        # Returns

        List of initial observations for each of the new tasks.
        """
        return [
            g.send((NEXT_TASK_COMMAND, kwargs)) for g in self._vector_task_generators
        ]

    def get_observations(self):
        """Get observations for all unpaused tasks.

        # Returns

        List of observations for each of the unpaused tasks.
        """
        return self.call(
            ["get_observations"] * self.num_unpaused_tasks,
        )

    def next_task_at(self, index_process: int) -> List[RLStepResult]:
        """Move to the the next Task from the TaskSampler in index_process
        process in the vector.

        # Parameters

        index_process : Index of the generator to be reset.

        # Returns

        List of length one containing the observations the newly sampled task.
        """
        return [
            self._vector_task_generators[index_process].send((NEXT_TASK_COMMAND, None))
        ]

    def step_at(self, index_process: int, action: int) -> List[RLStepResult]:
        """Step in the index_process task in the vector.

        # Parameters

        index_process : Index of the process to be reset.
        action : The action to take.

        # Returns

        List containing the output of step method on the task in the indexed process.
        """
        return self._vector_task_generators[index_process].send((STEP_COMMAND, action))

    def step(self, actions: List[List[int]]):
        """Perform actions in the vectorized tasks.

        # Parameters

        actions: List of size _num_samplers containing action to be taken in each task.

        # Returns

        List of outputs from the step method of tasks.
        """
        return [
            g.send((STEP_COMMAND, action))
            for g, action in zip(self._vector_task_generators, actions)
        ]

    def reset_all(self):
        """Reset all task samplers to their initial state (except for the RNG
        seed)."""
        return [g.send((RESET_COMMAND, None)) for g in self._vector_task_generators]

    def set_seeds(self, seeds: List[int]):
        """Sets new tasks' RNG seeds.

        # Parameters

        seeds: List of size _num_samplers containing new RNG seeds.
        """
        return [
            g.send((SEED_COMMAND, seed))
            for g, seed in zip(self._vector_task_generators, seeds)
        ]

    def close(self) -> None:
        if self._is_closed:
            return

        for g in self._vector_task_generators:
            try:
                try:
                    g.send((CLOSE_COMMAND, None))
                except StopIteration:
                    pass
            except KeyboardInterrupt:
                pass

        self._is_closed = True

    def pause_at(self, sampler_index: int) -> None:
        """Pauses computation on the Task in process `index` without destroying
        the Task. This is useful for not needing to call steps on all Tasks
        when only some are active (for example during the last samples of
        running eval).

        # Parameters

        index : which process to pause. All indexes after this
            one will be shifted down by one.
        """
        generator = self._vector_task_generators.pop(sampler_index)
        self._paused.append((sampler_index, generator))

    def resume_all(self) -> None:
        """Resumes any paused processes."""
        for index, generator in reversed(self._paused):
            self._vector_task_generators.insert(index, generator)
        self._paused = []

    def command_at(
        self, sampler_index: int, command: str, data: Optional[Any] = None
    ) -> Any:
        """Calls a function (which is passed by name) on the selected task and
        returns the result.

        # Parameters

        index : Which task to call the function on.
        function_name : The name of the function to call on the task.
        function_args : Optional function args.

        # Returns

        Result of calling the function.
        """
        return self._vector_task_generators[sampler_index].send((command, data))

    def command(
        self, commands: Union[List[str], str], data_list: Optional[List]
    ) -> List[Any]:
        """"""
        if isinstance(commands, str):
            commands = [commands] * self.num_unpaused_tasks

        if data_list is None:
            data_list = [None] * self.num_unpaused_tasks

        return [
            g.send((command, data))
            for g, command, data in zip(
                self._vector_task_generators, commands, data_list
            )
        ]

    def call_at(
        self,
        sampler_index: int,
        function_name: str,
        function_args: Optional[List[Any]] = None,
    ) -> Any:
        """Calls a function (which is passed by name) on the selected task and
        returns the result.

        # Parameters

        index : Which task to call the function on.
        function_name : The name of the function to call on the task.
        function_args : Optional function args.

        # Returns

        Result of calling the function.
        """
        return self._vector_task_generators[sampler_index].send(
            (CALL_COMMAND, (function_name, function_args))
        )

    def call(
        self,
        function_names: Union[str, List[str]],
        function_args_list: Optional[List[Any]] = None,
    ) -> List[Any]:
        """Calls a list of functions (which are passed by name) on the
        corresponding task (by index).

        # Parameters

        function_names : The name of the functions to call on the tasks.
        function_args_list : List of function args for each function.
            If provided, len(function_args_list) should be as long as  len(function_names).

        # Returns

        List of results of calling the functions.
        """
        if isinstance(function_names, str):
            function_names = [function_names] * self.num_unpaused_tasks

        if function_args_list is None:
            function_args_list = [None] * len(function_names)

        assert len(function_names) == len(function_args_list)

        return [
            g.send((CALL_COMMAND, args))
            for g, args in zip(
                self._vector_task_generators, zip(function_names, function_args_list)
            )
        ]

    def attr_at(self, sampler_index: int, attr_name: str) -> Any:
        """Gets the attribute (specified by name) on the selected task and
        returns it.

        # Parameters

        index : Which task to call the function on.
        attr_name : The name of the function to call on the task.

        # Returns

         Result of calling the function.
        """
        return self._vector_task_generators[sampler_index].send(
            (ATTR_COMMAND, attr_name)
        )

    def attr(self, attr_names: Union[List[str], str]) -> List[Any]:
        """Gets the attributes (specified by name) on the tasks.

        # Parameters

        attr_names : The name of the functions to call on the tasks.

        # Returns

        List of results of calling the functions.
        """
        if isinstance(attr_names, str):
            attr_names = [attr_names] * self.num_unpaused_tasks

        return [
            g.send((ATTR_COMMAND, attr_name))
            for g, attr_name in zip(self._vector_task_generators, attr_names)
        ]

    def render(
        self, mode: str = "human", *args, **kwargs
    ) -> Union[np.ndarray, None, List[np.ndarray]]:
        """Render observations from all Tasks in a tiled image or a list of
        images."""

        images = [
            g.send((RENDER_COMMAND, (args, {"mode": "rgb", **kwargs})))
            for g in self._vector_task_generators
        ]

        if mode == "raw_rgb_list":
            return images

        for index, _ in reversed(self._paused):
            images.insert(index, np.zeros_like(images[0]))

        tile = tile_images(images)
        if mode == "human":
            import cv2

            cv2.imshow("vectask", tile[:, :, ::-1])
            cv2.waitKey(1)
            return None
        elif mode == "rgb_array":
            return tile
        else:
            raise NotImplementedError

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()


================================================
FILE: allenact/base_abstractions/__init__.py
================================================


================================================
FILE: allenact/base_abstractions/callbacks.py
================================================
import abc
from typing import List, Dict, Any, Sequence, Optional

from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import Sensor

try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal


class Callback(abc.ABC):
    def setup(
        self,
        name: str,
        config: ExperimentConfig,
        mode: Literal["train", "valid", "test"],
        **kwargs,
    ) -> None:
        """Called once before training begins."""

    def on_train_log(
        self,
        *,
        metrics: List[Dict[str, Any]],
        metric_means: Dict[str, float],
        tasks_data: List[Any],
        step: int,
        scalar_name_to_total_experiences_key: Dict[str, str],
        checkpoint_file_name: str,
        **kwargs,
    ) -> None:
        """Called once train is supposed to log."""

    def on_valid_log(
        self,
        *,
        metrics: Dict[str, Any],
        metric_means: Dict[str, float],
        tasks_data: List[Any],
        step: int,
        scalar_name_to_total_experiences_key: Dict[str, str],
        checkpoint_file_name: str,
        **kwargs,
    ) -> None:
        """Called after validation ends."""

    def on_test_log(
        self,
        *,
        metrics: Dict[str, Any],
        metric_means: Dict[str, float],
        tasks_data: List[Any],
        step: int,
        scalar_name_to_total_experiences_key: Dict[str, str],
        checkpoint_file_name: str,
        **kwargs,
    ) -> None:
        """Called after test ends."""

    def after_save_project_state(self, base_dir: str) -> None:
        """Called after saving the project state in base_dir."""

    def callback_sensors(self) -> Optional[Sequence[Sensor]]:
        """Determines the data returned to the `tasks_data` parameter in the
        above *_log functions."""


================================================
FILE: allenact/base_abstractions/distributions.py
================================================
import abc
from collections import OrderedDict
from typing import Any, Union, Callable, TypeVar, Dict, Optional, cast, Protocol

import gym
import torch
import torch.nn as nn
from torch.distributions.utils import lazy_property

from allenact.algorithms.onpolicy_sync.misc import TrackingInfoType
from allenact.base_abstractions.sensor import AbstractExpertActionSensor as Expert
from allenact.utils import spaces_utils as su
from allenact.utils.misc_utils import all_unique

TeacherForcingAnnealingType = TypeVar("TeacherForcingAnnealingType")

"""
Modify standard PyTorch distributions so they are compatible with this code.
"""


class Distr(abc.ABC):
    @abc.abstractmethod
    def log_prob(self, actions: Any):
        """Return the log probability/ies of the provided action/s."""
        raise NotImplementedError()

    @abc.abstractmethod
    def entropy(self):
        """Return the entropy or entropies."""
        raise NotImplementedError()

    @abc.abstractmethod
    def sample(self, sample_shape=torch.Size()):
        """Sample actions."""
        raise NotImplementedError()

    def mode(self):
        """If available, return the action(s) with highest probability.

        It will only be called if using deterministic agents.
        """
        raise NotImplementedError()


class CategoricalDistr(torch.distributions.Categorical, Distr):
    """A categorical distribution extending PyTorch's Categorical.

    probs or logits are assumed to be passed with step and sampler
    dimensions as in: [step, samplers, ...]
    """

    def mode(self):
        return self._param.argmax(dim=-1, keepdim=False)  # match sample()'s shape

    def log_prob(self, value: torch.Tensor):
        if value.shape == self.logits.shape[:-1]:
            return super(CategoricalDistr, self).log_prob(value=value)
        elif value.shape == self.logits.shape[:-1] + (1,):
            return (
                super(CategoricalDistr, self)
                .log_prob(value=value.squeeze(-1))
                .unsqueeze(-1)
            )
        else:
            raise NotImplementedError(
                "Broadcasting in categorical distribution is disabled as it often leads"
                f" to unexpected results. We have that `value.shape == {value.shape}` but"
                f" expected a shape of "
                f" `self.logits.shape[:-1] == {self.logits.shape[:-1]}` or"
                f" `self.logits.shape[:-1] + (1,) == {self.logits.shape[:-1] + (1,)}`"
            )

    @lazy_property
    def log_probs_tensor(self):
        return torch.log_softmax(self.logits, dim=-1)

    @lazy_property
    def probs_tensor(self):
        return torch.softmax(self.logits, dim=-1)


class ConditionalDistr(Distr):
    """Action distribution conditional which is conditioned on other
    information (i.e. part of a hierarchical distribution)

    # Attributes
    action_group_name : the identifier of the group of actions (`OrderedDict`) produced by this `ConditionalDistr`
    """

    action_group_name: str

    def __init__(
        self,
        distr_conditioned_on_input_fn_or_instance: Union[Callable, Distr],
        action_group_name: str,
        *distr_conditioned_on_input_args,
        **distr_conditioned_on_input_kwargs,
    ):
        """Initialize an ConditionalDistr.

        # Parameters
        distr_conditioned_on_input_fn_or_instance : Callable to generate `ConditionalDistr` given sampled actions,
            or given `Distr`.
        action_group_name : the identifier of the group of actions (`OrderedDict`) produced by this `ConditionalDistr`
        distr_conditioned_on_input_args : positional arguments for Callable `distr_conditioned_on_input_fn_or_instance`
        distr_conditioned_on_input_kwargs : keyword arguments for Callable `distr_conditioned_on_input_fn_or_instance`
        """

        self.distr: Optional[Distr] = None
        self.distr_conditioned_on_input_fn: Optional[Callable] = None
        self.distr_conditioned_on_input_args = distr_conditioned_on_input_args
        self.distr_conditioned_on_input_kwargs = distr_conditioned_on_input_kwargs

        if isinstance(distr_conditioned_on_input_fn_or_instance, Distr):
            self.distr = distr_conditioned_on_input_fn_or_instance
        else:
            self.distr_conditioned_on_input_fn = (
                distr_conditioned_on_input_fn_or_instance
            )

        self.action_group_name = action_group_name

    def log_prob(self, actions):
        return self.distr.log_prob(actions)

    def entropy(self):
        return self.distr.entropy()

    def condition_on_input(self, **ready_actions):
        if self.distr is None:
            assert all(
                key not in self.distr_conditioned_on_input_kwargs
                for key in ready_actions
            )
            self.distr = self.distr_conditioned_on_input_fn(
                *self.distr_conditioned_on_input_args,
                **self.distr_conditioned_on_input_kwargs,
                **ready_actions,
            )

    def reset(self):
        if (self.distr is not None) and (
            self.distr_conditioned_on_input_fn is not None
        ):
            self.distr = None

    def sample(self, sample_shape=torch.Size()) -> OrderedDict:
        return OrderedDict([(self.action_group_name, self.distr.sample(sample_shape))])

    def mode(self) -> OrderedDict:
        return OrderedDict([(self.action_group_name, self.distr.mode())])


class SequentialDistr(Distr):
    def __init__(self, *conditional_distrs: ConditionalDistr):
        action_group_names = [cd.action_group_name for cd in conditional_distrs]
        assert all_unique(
            action_group_names
        ), f"All conditional distribution `action_group_name`, must be unique, given names {action_group_names}"
        self.conditional_distrs = conditional_distrs

    def sample(self, sample_shape=torch.Size()):
        actions = OrderedDict()
        for cd in self.conditional_distrs:
            cd.condition_on_input(**actions)
            actions.update(cd.sample(sample_shape=sample_shape))
        return actions

    def mode(self):
        actions = OrderedDict()
        for cd in self.conditional_distrs:
            cd.condition_on_input(**actions)
            actions.update(cd.mode())
        return actions

    def conditional_entropy(self):
        total = 0
        for cd in self.conditional_distrs:
            total = total + cd.entropy()
        return total

    def entropy(self):
        raise NotImplementedError(
            "Please use 'conditional_entropy' instead of 'entropy' as the `entropy_method_name` "
            "parameter in your loss when using `SequentialDistr`."
        )

    def log_prob(
        self, actions: Dict[str, Any], return_dict: bool = False
    ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
        assert len(actions) == len(
            self.conditional_distrs
        ), f"{len(self.conditional_distrs)} conditional distributions for {len(actions)} action groups"

        res: Union[int, torch.Tensor, Dict[str, torch.Tensor]] = (
            0 if not return_dict else OrderedDict()
        )

        for cd in self.conditional_distrs:
            cd.condition_on_input(**actions)
            current_log_prob = cd.log_prob(actions[cd.action_group_name])

            if not return_dict:
                res = res + current_log_prob
            else:
                res[cd.action_group_name] = current_log_prob

        return res


class TrackingCallback(Protocol):
    def __call__(self, type: TrackingInfoType, info: Dict[str, Any], n: int): ...


class TeacherForcingDistr(Distr):
    def __init__(
        self,
        distr: Distr,
        obs: Dict[str, Any],
        action_space: gym.spaces.Space,
        num_active_samplers: Optional[int],
        approx_steps: Optional[int],
        teacher_forcing: Optional[TeacherForcingAnnealingType],
        tracking_callback: Optional[TrackingCallback],
        always_enforce: bool = False,
    ):
        self.distr = distr
        self.is_sequential = isinstance(self.distr, SequentialDistr)

        # action_space is a gym.spaces.Dict for SequentialDistr, or any gym.Space for other Distr
        self.action_space = action_space
        self.num_active_samplers = num_active_samplers
        self.approx_steps = approx_steps
        self.teacher_forcing = teacher_forcing
        self.tracking_callback = tracking_callback
        self.always_enforce = always_enforce

        assert (
            "expert_action" in obs
        ), "When using teacher forcing, obs must contain an `expert_action` uuid"

        obs_space = Expert.flagged_space(
            self.action_space, use_dict_as_groups=self.is_sequential
        )
        self.expert = su.unflatten(obs_space, obs["expert_action"])

    def enforce(
        self,
        sample: Any,
        action_space: gym.spaces.Space,
        teacher: OrderedDict,
        teacher_force_info: Optional[Dict[str, Any]],
        action_name: Optional[str] = None,
    ):
        actions = su.flatten(action_space, sample)

        assert (
            len(actions.shape) == 3
        ), f"Got flattened actions with shape {actions.shape} (it should be [1 x `samplers` x `flatdims`])"

        if self.num_active_samplers is not None:
            assert actions.shape[1] == self.num_active_samplers

        expert_actions = su.flatten(action_space, teacher[Expert.ACTION_POLICY_LABEL])
        assert (
            expert_actions.shape == actions.shape
        ), f"expert actions shape {expert_actions.shape} doesn't match the model's {actions.shape}"

        # expert_success is 0 if the expert action could not be computed and otherwise equals 1.
        expert_action_exists_mask = teacher[Expert.EXPERT_SUCCESS_LABEL]

        if not self.always_enforce:
            teacher_forcing_mask = (
                torch.distributions.bernoulli.Bernoulli(
                    torch.tensor(self.teacher_forcing(self.approx_steps))
                )
                .sample(expert_action_exists_mask.shape)
                .long()
                .to(actions.device)
            ) * expert_action_exists_mask
        else:
            teacher_forcing_mask = expert_action_exists_mask

        if teacher_force_info is not None:
            teacher_force_info[
                "teacher_ratio/sampled{}".format(
                    f"_{action_name}" if action_name is not None else ""
                )
            ] = (teacher_forcing_mask.float().mean().item())

        extended_shape = teacher_forcing_mask.shape + (1,) * (
            len(actions.shape) - len(teacher_forcing_mask.shape)
        )

        actions = torch.where(
            teacher_forcing_mask.byte().view(extended_shape), expert_actions, actions
        )

        return su.unflatten(action_space, actions)

    def log_prob(self, actions: Any):
        return self.distr.log_prob(actions)

    def entropy(self):
        return self.distr.entropy()

    def conditional_entropy(self):
        if hasattr(self.distr, "conditional_entropy"):
            return self.distr.conditional_entropy()

        raise NotImplementedError(
            f"`conditional_entropy` is not defined for {self.distr}."
        )

    def sample(self, sample_shape=torch.Size()):
        teacher_force_info: Optional[Dict[str, Any]] = None
        if self.approx_steps is not None:
            teacher_force_info = {
                "teacher_ratio/enforced": self.teacher_forcing(self.approx_steps),
            }

        if self.is_sequential:
            res = OrderedDict()
            for cd in cast(SequentialDistr, self.distr).conditional_distrs:
                cd.condition_on_input(**res)
                action_group_name = cd.action_group_name
                res[action_group_name] = self.enforce(
                    cd.sample(sample_shape)[action_group_name],
                    cast(gym.spaces.Dict, self.action_space)[action_group_name],
                    self.expert[action_group_name],
                    teacher_force_info,
                    action_group_name,
                )
        else:
            res = self.enforce(
                self.distr.sample(sample_shape),
                self.action_space,
                self.expert,
                teacher_force_info,
            )

        if self.tracking_callback is not None and self.num_active_samplers is not None:
            self.tracking_callback(
                type=TrackingInfoType.TEACHER_FORCING,
                info=teacher_force_info,
                n=self.num_active_samplers,
            )

        return res


class AddBias(nn.Module):
    """Adding bias parameters to input values."""

    def __init__(self, bias: torch.FloatTensor):
        """Initializer.

        # Parameters

        bias : data to use as the initial values of the bias.
        """
        super(AddBias, self).__init__()
        self._bias = nn.Parameter(bias.unsqueeze(1), requires_grad=True)

    def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:  # type: ignore
        """Adds the stored bias parameters to `x`."""
        assert x.dim() in [2, 4]

        if x.dim() == 2:
            bias = self._bias.t().view(1, -1)
        else:
            bias = self._bias.t().view(1, -1, 1, 1)

        return x + bias  # type:ignore


================================================
FILE: allenact/base_abstractions/experiment_config.py
================================================
"""Defines the `ExperimentConfig` abstract class used as the basis of all
experiments."""

import abc
from typing import Dict, Any, Optional, List, Union, Sequence, Tuple, cast

import torch
import torch.nn as nn

from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import TrainingPipeline, Builder
from allenact.utils.system import get_logger
from allenact.utils.viz_utils import VizSuite


def split_processes_onto_devices(nprocesses: int, ndevices: int):
    assert (
        nprocesses == 0 or nprocesses >= ndevices
    ), "NUM_PROCESSES {} < ndevices {}".format(nprocesses, ndevices)
    res = [0] * ndevices
    for it in range(nprocesses):
        res[it % ndevices] += 1
    return res


class MachineParams(object):
    def __init__(
        self,
        nprocesses: Union[int, Sequence[int]],
        devices: Union[
            None, int, str, torch.device, Sequence[Union[int, str, torch.device]]
        ] = None,
        sensor_preprocessor_graph: Optional[
            Union[SensorPreprocessorGraph, Builder[SensorPreprocessorGraph]]
        ] = None,
        sampler_devices: Union[
            None, int, str, torch.device, Sequence[Union[int, str, torch.device]]
        ] = None,
        visualizer: Optional[Union[VizSuite, Builder[VizSuite]]] = None,
        gpu_ids: Union[int, Sequence[int]] = None,
        local_worker_ids: Optional[List[int]] = None,
    ):
        assert (
            gpu_ids is None or devices is None
        ), "only one of `gpu_ids` or `devices` should be set."
        if gpu_ids is not None:
            get_logger().warning(
                "The `gpu_ids` parameter will be deprecated, use `devices` instead."
            )
            devices = gpu_ids

        self.nprocesses = (
            nprocesses if isinstance(nprocesses, Sequence) else (nprocesses,)
        )

        self.devices: Tuple[torch.device, ...] = self._standardize_devices(
            devices=devices, nworkers=len(self.nprocesses)
        )

        self._sensor_preprocessor_graph_maybe_builder = sensor_preprocessor_graph
        self.sampler_devices: Tuple[torch.device, ...] = (
            None
            if sampler_devices is None
            else self._standardize_devices(
                devices=sampler_devices, nworkers=len(self.nprocesses)
            )
        )
        self._visualizer_maybe_builder = visualizer

        self._sensor_preprocessor_graph_cached: Optional[SensorPreprocessorGraph] = None
        self._visualizer_cached: Optional[VizSuite] = None

        self.local_worker_ids: Optional[List[int]] = None
        self.set_local_worker_ids(local_worker_ids)

    def set_local_worker_ids(self, local_worker_ids: Optional[List[int]]):
        self.local_worker_ids = local_worker_ids or list(range(len(self.devices)))

        assert all(0 <= id < len(self.devices) for id in self.local_worker_ids), (
            f"Passed {len(self.local_worker_ids)} local worker ids {self.local_worker_ids}"
            f" for {len(self.devices)} total devices (workers)"
        )

    @classmethod
    def instance_from(
        cls, machine_params: Union["MachineParams", Dict[str, Any]]
    ) -> "MachineParams":
        if isinstance(machine_params, cls):
            return machine_params
        assert isinstance(machine_params, Dict)
        return cls(**machine_params)

    @staticmethod
    def _standardize_devices(
        devices: Optional[
            Union[int, str, torch.device, Sequence[Union[int, str, torch.device]]]
        ],
        nworkers: int,
    ) -> Tuple[torch.device, ...]:
        if devices is None or (isinstance(devices, Sequence) and len(devices) == 0):
            devices = torch.device("cpu")

        if not isinstance(devices, Sequence):
            devices = (devices,) * nworkers

        assert len(devices) == nworkers, (
            f"The number of devices (len({devices})={len(devices)})"
            f" must equal the number of workers ({nworkers})"
        )

        devices = tuple(
            torch.device("cpu") if d == -1 else torch.device(d) for d in devices  # type: ignore
        )
        for d in devices:
            if d != torch.device("cpu"):
                try:
                    torch.cuda.get_device_capability(d)  # type: ignore
                except Exception:
                    raise RuntimeError(
                        f"It appears the cuda device {d} is not available on your system."
                    )

        return cast(Tuple[torch.device, ...], devices)

    @property
    def sensor_preprocessor_graph(self) -> Optional[SensorPreprocessorGraph]:
        if self._sensor_preprocessor_graph_maybe_builder is None:
            return None

        if self._sensor_preprocessor_graph_cached is None:
            if isinstance(self._sensor_preprocessor_graph_maybe_builder, Builder):
                self._sensor_preprocessor_graph_cached = (
                    self._sensor_preprocessor_graph_maybe_builder()
                )
            else:
                self._sensor_preprocessor_graph_cached = (
                    self._sensor_preprocessor_graph_maybe_builder
                )

        return self._sensor_preprocessor_graph_cached

    def set_visualizer(self, viz: VizSuite):
        if self._visualizer_cached is None:
            self._visualizer_maybe_builder = viz
        else:
            get_logger().warning("Ignoring viz (already instantiated)")

    @property
    def visualizer(self) -> Optional[VizSuite]:
        if self._visualizer_maybe_builder is None:
            return None

        if self._visualizer_cached is None:
            if isinstance(self._visualizer_maybe_builder, Builder):
                self._visualizer_cached = self._visualizer_maybe_builder()
            else:
                self._visualizer_cached = self._visualizer_maybe_builder

        return self._visualizer_cached


class FrozenClassVariables(abc.ABCMeta):
    """Metaclass for ExperimentConfig.

    Ensures ExperimentConfig class-level attributes cannot be modified.
    ExperimentConfig attributes can still be modified at the object
    level.
    """

    def __setattr__(cls, attr, value):
        if isinstance(cls, type) and (
            attr != "__abstractmethods__" and not attr.startswith("_abc_")
        ):
            raise RuntimeError(
                "Cannot edit class-level attributes.\n"
                "Changing the values of class-level attributes is disabled in ExperimentConfig classes.\n"
                "This is to prevent problems that can occur otherwise when using multiprocessing.\n"
                "If you wish to change the value of a configuration, please do so for an instance of that"
                " configuration.\nTriggered by attempting to modify {}".format(
                    cls.__name__
                )
            )
        else:
            super().__setattr__(attr, value)


class ExperimentConfig(metaclass=FrozenClassVariables):
    """Abstract class used to define experiments.

    Instead of using yaml or text files, experiments in our framework
    are defined as a class. In particular, to define an experiment one
    must define a new class inheriting from this class which implements
    all of the below methods. The below methods will then be called when
    running the experiment.
    """

    @abc.abstractmethod
    def tag(self) -> str:
        """A string describing the experiment."""
        raise NotImplementedError()

    @abc.abstractmethod
    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        """Creates the training pipeline.

        # Parameters

        kwargs : Extra kwargs. Currently unused.

        # Returns

        An instantiate `TrainingPipeline` object.
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def machine_params(
        self, mode="train", **kwargs
    ) -> Union[MachineParams, Dict[str, Any]]:
        """Parameters used to specify machine information.

        Machine information includes at least (1) the number of processes
        to train with and (2) the gpu devices indices to use.

        mode : Whether or not the machine parameters should be those for
            "train", "valid", or "test".
        kwargs : Extra kwargs.

        # Returns

        A dictionary of the form `{"nprocesses": ..., "gpu_ids": ..., ...}`.
        Here `nprocesses` must be a non-negative integer, `gpu_ids` must
        be a sequence of non-negative integers (if empty, then everything
        will be run on the cpu).
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def create_model(self, **kwargs) -> nn.Module:
        """Create the neural model."""
        raise NotImplementedError()

    @abc.abstractmethod
    def make_sampler_fn(self, **kwargs) -> TaskSampler:
        """Create the TaskSampler given keyword arguments.

        These `kwargs` will be generated by one of
        `ExperimentConfig.train_task_sampler_args`,
        `ExperimentConfig.valid_task_sampler_args`, or
        `ExperimentConfig.test_task_sampler_args` depending on whether
        the user has chosen to train, validate, or test.
        """
        raise NotImplementedError()

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        """Specifies the training parameters for the `process_ind`th training
        process.

        These parameters are meant be passed as keyword arguments to `ExperimentConfig.make_sampler_fn`
        to generate a task sampler.

        # Parameters

        process_ind : The unique index of the training process (`0 ≤ process_ind < total_processes`).
        total_processes : The total number of training processes.
        devices : Gpu devices (if any) to use.
        seeds : The seeds to use, if any.
        deterministic_cudnn : Whether or not to use deterministic cudnn.

        # Returns

        The parameters for `make_sampler_fn`
        """
        raise NotImplementedError()

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        """Specifies the validation parameters for the `process_ind`th
        validation process.

        See `ExperimentConfig.train_task_sampler_args` for parameter
        definitions.
        """
        raise NotImplementedError()

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        """Specifies the test parameters for the `process_ind`th test process.

        See `ExperimentConfig.train_task_sampler_args` for parameter
        definitions.
        """
        raise NotImplementedError()


================================================
FILE: allenact/base_abstractions/misc.py
================================================
import abc
from typing import (
    Dict,
    Any,
    TypeVar,
    Sequence,
    NamedTuple,
    Optional,
    List,
    Union,
    Generic,
)

import attr
import torch

EnvType = TypeVar("EnvType")
DistributionType = TypeVar("DistributionType")
ModelType = TypeVar("ModelType")
ObservationType = Dict[str, Union[torch.Tensor, Dict[str, Any]]]


class RLStepResult(NamedTuple):
    observation: Optional[Any]
    reward: Optional[Union[float, List[float]]]
    done: Optional[bool]
    info: Optional[Dict[str, Any]]

    def clone(self, new_info: Dict[str, Any]):
        return RLStepResult(
            observation=(
                self.observation
                if "observation" not in new_info
                else new_info["observation"]
            ),
            reward=self.reward if "reward" not in new_info else new_info["reward"],
            done=self.done if "done" not in new_info else new_info["done"],
            info=self.info if "info" not in new_info else new_info["info"],
        )

    def merge(self, other: "RLStepResult"):
        return RLStepResult(
            observation=(
                self.observation if other.observation is None else other.observation
            ),
            reward=self.reward if other.reward is None else other.reward,
            done=self.done if other.done is None else other.done,
            info={
                **(self.info if self.info is not None else {}),
                **(other.info if other is not None else {}),
            },
        )


class ActorCriticOutput(tuple, Generic[DistributionType]):
    distributions: DistributionType
    values: torch.FloatTensor
    extras: Dict[str, Any]

    # noinspection PyTypeChecker
    def __new__(
        cls,
        distributions: DistributionType,
        values: torch.FloatTensor,
        extras: Dict[str, Any],
    ):
        self = tuple.__new__(cls, (distributions, values, extras))
        self.distributions = distributions
        self.values = values
        self.extras = extras
        return self

    def __repr__(self) -> str:
        return (
            f"Group(distributions={self.distributions},"
            f" values={self.values},"
            f" extras={self.extras})"
        )


class Memory(Dict):
    def __init__(self, *args, **kwargs):
        super().__init__()
        if len(args) > 0:
            assert len(args) == 1, (
                "Only one of Sequence[Tuple[str, Tuple[torch.Tensor, int]]]"
                "or Dict[str, Tuple[torch.Tensor, int]] accepted as unnamed args"
            )
            if isinstance(args[0], Sequence):
                for key, tensor_dim in args[0]:
                    assert (
                        len(tensor_dim) == 2
                    ), "Only Tuple[torch.Tensor, int]] accepted as second item in Tuples"
                    tensor, dim = tensor_dim
                    self.check_append(key, tensor, dim)
            elif isinstance(args[0], Dict):
                for key in args[0]:
                    assert (
                        len(args[0][key]) == 2
                    ), "Only Tuple[torch.Tensor, int]] accepted as values in Dict"
                    tensor, dim = args[0][key]
                    self.check_append(key, tensor, dim)
        elif len(kwargs) > 0:
            for key in kwargs:
                assert (
                    len(kwargs[key]) == 2
                ), "Only Tuple[torch.Tensor, int]] accepted as keyword arg"
                tensor, dim = kwargs[key]
                self.check_append(key, tensor, dim)

    def check_append(
        self, key: str, tensor: torch.Tensor, sampler_dim: int
    ) -> "Memory":
        """Appends a new memory type given its identifier, its memory tensor
        and its sampler dim.

        # Parameters

        key: string identifier of the memory type
        tensor: memory tensor
        sampler_dim: sampler dimension

        # Returns

        Updated Memory
        """
        assert isinstance(key, str), "key {} must be str".format(key)
        assert isinstance(
            tensor, torch.Tensor
        ), "tensor {} must be torch.Tensor".format(tensor)
        assert isinstance(sampler_dim, int), "sampler_dim {} must be int".format(
            sampler_dim
        )

        assert key not in self, "Reused key {}".format(key)
        assert (
            0 <= sampler_dim < len(tensor.shape)
        ), "Got sampler_dim {} for tensor with shape {}".format(
            sampler_dim, tensor.shape
        )

        self[key] = (tensor, sampler_dim)

        return self

    def tensor(self, key: str) -> torch.Tensor:
        """Returns the memory tensor for a given memory type.

        # Parameters

        key: string identifier of the memory type

        # Returns

        Memory tensor for type `key`
        """
        assert key in self, "Missing key {}".format(key)
        return self[key][0]

    def sampler_dim(self, key: str) -> int:
        """Returns the sampler dimension for the given memory type.

        # Parameters

        key: string identifier of the memory type

        # Returns

        The sampler dim
        """
        assert key in self, "Missing key {}".format(key)
        return self[key][1]

    def sampler_select(self, keep: Sequence[int]) -> "Memory":
        """Equivalent to PyTorch index_select along the `sampler_dim` of each
        memory type.

        # Parameters

        keep: a list of sampler indices to keep

        # Returns

        Selected memory
        """
        res = Memory()
        valid = False
        for name in self:
            sampler_dim = self.sampler_dim(name)
            tensor = self.tensor(name)
            assert len(keep) == 0 or (
                0 <= min(keep) and max(keep) < tensor.shape[sampler_dim]
            ), "Got min(keep)={} max(keep)={} for memory type {} with shape {}, dim {}".format(
                min(keep), max(keep), name, tensor.shape, sampler_dim
            )
            if tensor.shape[sampler_dim] > len(keep):
                tensor = tensor.index_select(
                    dim=sampler_dim,
                    index=torch.as_tensor(
                        list(keep), dtype=torch.int64, device=tensor.device
                    ),
                )
                res.check_append(name, tensor, sampler_dim)
                valid = True
        if valid:
            return res
        return self

    def set_tensor(self, key: str, tensor: torch.Tensor) -> "Memory":
        """Replaces tensor for given key with an updated version.

        # Parameters

        key: memory type identifier to update
        tensor: updated tensor

        # Returns

        Updated memory
        """
        assert key in self, "Missing key {}".format(key)
        assert (
            tensor.shape == self[key][0].shape
        ), "setting tensor with shape {} for former {}".format(
            tensor.shape, self[key][0].shape
        )
        self[key] = (tensor, self[key][1])

        return self

    def step_select(self, step: int) -> "Memory":
        """Equivalent to slicing with length 1 for the `step` (i.e first)
        dimension in rollouts storage.

        # Parameters

        step: step to keep

        # Returns

        Sliced memory with a single step
        """
        res = Memory()
        for key in self:
            tensor = self.tensor(key)
            assert (
                tensor.shape[0] > step
            ), "attempting to access step {} for memory type {} of shape {}".format(
                step, key, tensor.shape
            )
            if step != -1:
                res.check_append(
                    key, self.tensor(key)[step : step + 1, ...], self.sampler_dim(key)
                )
            else:
                res.check_append(
                    key, self.tensor(key)[step:, ...], self.sampler_dim(key)
                )
        return res

    def step_squeeze(self, step: int) -> "Memory":
        """Equivalent to simple indexing for the `step` (i.e first) dimension
        in rollouts storage.

        # Parameters

        step: step to keep

        # Returns

        Sliced memory with a single step (and squeezed step dimension)
        """
        res = Memory()
        for key in self:
            tensor = self.tensor(key)
            assert (
                tensor.shape[0] > step
            ), "attempting to access step {} for memory type {} of shape {}".format(
                step, key, tensor.shape
            )
            res.check_append(
                key, self.tensor(key)[step, ...], self.sampler_dim(key) - 1
            )
        return res

    def slice(
        self,
        dim: int,
        start: Optional[int] = None,
        stop: Optional[int] = None,
        step: int = 1,
    ) -> "Memory":
        """Slicing for dimensions that have same extents in all memory types.
        It also accepts negative indices.

        # Parameters

        dim: the dimension to slice
        start: the index of the first item to keep if given (default 0 if None)
        stop: the index of the first item to discard if given (default tensor size along `dim` if None)
        step: the increment between consecutive indices (default 1)

        # Returns

        Sliced memory
        """
        checked = False
        total: Optional[int] = None

        res = Memory()
        for key in self:
            tensor = self.tensor(key)
            assert (
                len(tensor.shape) > dim
            ), f"attempting to access dim {dim} for memory type {key} of shape {tensor.shape}"

            if not checked:
                total = tensor.shape[dim]
                checked = True

            assert (
                total == tensor.shape[dim]
            ), f"attempting to slice along non-uniform dimension {dim}"

            if start is not None or stop is not None or step != 1:
                slice_tuple = (
                    (slice(None),) * dim
                    + (slice(start, stop, step),)
                    + (slice(None),) * (len(tensor.shape) - (1 + dim))
                )
                sliced_tensor = tensor[slice_tuple]
                res.check_append(
                    key=key,
                    tensor=sliced_tensor,
                    sampler_dim=self.sampler_dim(key),
                )
            else:
                res.check_append(
                    key,
                    tensor,
                    self.sampler_dim(key),
                )

        return res

    def to(self, device: torch.device) -> "Memory":
        for key in self:
            tensor = self.tensor(key)
            if tensor.device != device:
                self.set_tensor(key, tensor.to(device))
        return self


class Loss(abc.ABC):
    pass


@attr.s(kw_only=True)
class LossOutput:
    value: torch.Tensor = attr.ib()
    info: Dict[str, Union[float, int]] = attr.ib()
    per_epoch_info: Dict[str, Union[float, int]] = attr.ib()
    batch_memory: Memory = attr.ib()
    stream_memory: Memory = attr.ib()
    bsize: int = attr.ib()


class GenericAbstractLoss(Loss):
    # noinspection PyMethodOverriding
    @abc.abstractmethod
    def loss(  # type: ignore
        self,
        *,  # No positional arguments
        model: ModelType,
        batch: ObservationType,
        batch_memory: Memory,
        stream_memory: Memory,
    ) -> LossOutput:
        """Computes the loss.

        Loss after processing a batch of data with (part of) a model (possibly with memory).

        We support two different types of memory: `batch_memory` and `stream_memory` that can be
        used to compute losses and share computation.

        ## `batch_memory`
        During the update phase of training, the following
        steps happen in order:
        1. A `batch` of data is sampled from an `ExperienceStorage` (which stores data possibly collected during previous
             rollout steps).
        2.  This `batch` is passed to each of the specified `GenericAbstractLoss`'s and is used, along with the `model`,
             to compute each such loss.
        3. The losses are summed together, gradients are computed by backpropagation, and an update step is taken.
        4. The process loops back to (1) with a new batch until.
        Now supposed that the computation used by a `GenericAbstractLoss` (`LossA`) can be shared across multiple of the
        `GenericAbstractLoss`'s (`LossB`, ...). For instance, `LossA` might run the visual encoder of `model` across
        all the images contained in `batch` so that it can compute a classification loss while `LossB` would like to
        run the same visual encoder on the same images to compute a depth-prediction loss. Without having some sort
        of memory, you would need to rerun this visual encoder on all images multiple times, wasting computational
        resources. This is where `batch_memory` comes in: `LossA` is can store the visual representations it computed
        in `batch_memory` and then `LossB` can access them.  Note that the `batch_memory` will be reinitialized after
        each new `batch` is sampled.

        ## `stream_memory`
        As described above, `batch_memory` treats each batch as its own independent collection of data. But what if
        your `ExperienceStorage` samples its batches in a streaming fashion? E.g. your `ExperienceStorage`
        might be a fixed collection of expert trajectories for use with imitation learning. In this case you can't
        simply treat each batch independently: you might want to save information from one batch to use in another.
        The simplest case of this would be if your agent `model` uses an RNN and produces a recurrent hidden state.
        In this case, the hidden state from the end of one batch should be used at the start of computations for the
        next batch. To allow for this, you can use the `stream_memory`. `stream_memory` is not cleared across
        batches but, **importantly**, `stream_memory` is detached from the computation graph after each backpropagation
        step so that the size of the computation graph does not grow unboundedly.

        # Parameters

        model: model to run on data batch (both assumed to be on the same device)
        batch: data to use as input for model (already on the same device as model)
        batch_memory: See above.
        stream_memory: See above.

        # Returns

        A tuple with:

        current_loss: total loss
        current_info: additional information about the current loss
        batch_memory: `batch_memory` memory after processing current data batch, see above.
        stream_memory: `stream_memory` memory after processing current data batch, see above.
        bsize: batch size
        """
        raise NotImplementedError()


================================================
FILE: allenact/base_abstractions/preprocessor.py
================================================
import abc
from typing import List, Any, Dict
from typing import Sequence
from typing import Union

import gym
import networkx as nx
import torch
from gym.spaces import Dict as SpaceDict

from allenact.utils.experiment_utils import Builder


class Preprocessor(abc.ABC):
    """Represents a preprocessor that transforms data from a sensor or another
    preprocessor to the input of agents or other preprocessors. The user of
    this class needs to implement the process method and the user is also
    required to set the below attributes:

    # Attributes:
        input_uuids : List of input universally unique ids.
        uuid : Universally unique id.
        observation_space : ``gym.Space`` object corresponding to processed observation spaces.
    """

    input_uuids: List[str]
    uuid: str
    observation_space: gym.Space

    def __init__(
        self,
        input_uuids: List[str],
        output_uuid: str,
        observation_space: gym.Space,
        **kwargs: Any
    ) -> None:
        self.uuid = output_uuid
        self.input_uuids = input_uuids
        self.observation_space = observation_space

    @abc.abstractmethod
    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
        """Returns processed observations from sensors or other preprocessors.

        # Parameters

        obs : Dict with available observations and processed observations.

        # Returns

        Processed observation.
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def to(self, device: torch.device) -> "Preprocessor":
        raise NotImplementedError()


class SensorPreprocessorGraph:
    """Represents a graph of preprocessors, with each preprocessor being
    identified through a universally unique id.

    Allows for the construction of observations that are a function of
    sensor readings. For instance, perhaps rather than giving your agent
    a raw RGB image, you'd rather first pass that image through a pre-trained
    convolutional network and only give your agent the resulting features
    (see e.g. the `ResNetPreprocessor` class).

    # Attributes

    preprocessors : List containing preprocessors with required input uuids, output uuid of each
        sensor must be unique.
    observation_spaces: The observation spaces of the values returned when calling `get_observations`.
        By default (see the `additionally_exposed_uuids` parameter to to change this default) the observations
        returned by the `SensorPreprocessorGraph` **include only the sink nodes** of the graph (i.e.
        those that are not used by any other preprocessor).
        Thus if one of the input preprocessors takes as input the `'YOUR_SENSOR_UUID'` sensor, then
        `'YOUR_SENSOR_UUID'` will not be returned when calling `get_observations`.
    device: The `torch.device` upon which the preprocessors are run.
    """

    preprocessors: Dict[str, Preprocessor]
    observation_spaces: SpaceDict
    device: torch.device

    def __init__(
        self,
        source_observation_spaces: SpaceDict,
        preprocessors: Sequence[Union[Preprocessor, Builder[Preprocessor]]],
        additional_output_uuids: Sequence[str] = tuple(),
    ) -> None:
        """Initializer.

        # Parameters

        source_observation_spaces : The observation spaces of all sensors before preprocessing.
            This generally should be the output of `SensorSuite.observation_spaces`.
        preprocessors : The preprocessors that will be included in the graph.
        additional_output_uuids: As described in the documentation for this class, the observations
            returned when calling `get_observations` only include, by default, those observations
            that are not processed by any preprocessor. If you'd like to include observations that
            would otherwise not be included, the uuids of these sensors should be included as
            a sequence of strings here.
        """
        self.device: torch.device = torch.device("cpu")

        obs_spaces: Dict[str, gym.Space] = {
            k: source_observation_spaces[k] for k in source_observation_spaces
        }

        self.preprocessors: Dict[str, Preprocessor] = {}
        for preprocessor in preprocessors:
            if isinstance(preprocessor, Builder):
                preprocessor = preprocessor()

            assert (
                preprocessor.uuid not in self.preprocessors
            ), "'{}' is duplicated preprocessor uuid".format(preprocessor.uuid)

            self.preprocessors[preprocessor.uuid] = preprocessor
            obs_spaces[preprocessor.uuid] = preprocessor.observation_space

        g = nx.DiGraph()
        for k in obs_spaces:
            g.add_node(k)
        for k in self.preprocessors:
            for j in self.preprocessors[k].input_uuids:
                g.add_edge(j, k)

        assert nx.is_directed_acyclic_graph(
            g
        ), "preprocessors do not form a direct acyclic graph"

        # noinspection PyCallingNonCallable
        self.observation_spaces = SpaceDict(
            spaces={
                uuid: obs_spaces[uuid]
                for uuid in obs_spaces
                if uuid in additional_output_uuids or g.out_degree(uuid) == 0
            }
        )

        # ensure dependencies are precomputed
        self.compute_order = [n for n in nx.dfs_preorder_nodes(g)]

    def get(self, uuid: str) -> Preprocessor:
        """Return preprocessor with the given `uuid`.

        # Parameters

        uuid : The unique id of the preprocessor.

        # Returns

        The preprocessor with unique id `uuid`.
        """
        return self.preprocessors[uuid]

    def to(self, device: torch.device) -> "SensorPreprocessorGraph":
        for k, v in self.preprocessors.items():
            self.preprocessors[k] = v.to(device)
        self.device = device
        return self

    def get_observations(
        self, obs: Dict[str, Any], *args: Any, **kwargs: Any
    ) -> Dict[str, Any]:
        """Get processed observations.

        # Returns

        Collect observations processed from all sensors and return them packaged inside a Dict.
        """

        for uuid in self.compute_order:
            if uuid not in obs:
                obs[uuid] = self.preprocessors[uuid].process(obs)

        return {uuid: obs[uuid] for uuid in self.observation_spaces}


class PreprocessorGraph(SensorPreprocessorGraph):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        raise DeprecationWarning(
            "`PreprocessorGraph` has been deprecated, use `SensorPreprocessorGraph` instead."
        )


class ObservationSet:
    def __init__(self, *args, **kwargs) -> None:
        raise DeprecationWarning(
            "`ObservationSet` has been deprecated. Use `SensorPreprocessorGraph` instead."
        )


================================================
FILE: allenact/base_abstractions/sensor.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from collections import OrderedDict
from typing import (
    Generic,
    Dict,
    Any,
    Optional,
    TYPE_CHECKING,
    TypeVar,
    Sequence,
    Union,
    Tuple,
    cast,
)
import abc

import gym
import gym.spaces as gyms
import numpy as np
from torch.distributions.utils import lazy_property

from allenact.base_abstractions.misc import EnvType
from allenact.utils import spaces_utils as su
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact.utils.system import get_logger

if TYPE_CHECKING:
    from allenact.base_abstractions.task import SubTaskType
else:
    SubTaskType = TypeVar("SubTaskType", bound="Task")

SpaceDict = gyms.Dict


class Sensor(Generic[EnvType, SubTaskType]):
    """Represents a sensor that provides data from the environment to agent.
    The user of this class needs to implement the get_observation method and
    the user is also required to set the below attributes:

    # Attributes

    uuid : universally unique id.
    observation_space : ``gym.Space`` object corresponding to observation of
        sensor.
    """

    uuid: str
    observation_space: gym.Space

    def __init__(self, uuid: str, observation_space: gym.Space, **kwargs: Any) -> None:
        self.uuid = uuid
        self.observation_space = observation_space

    def get_observation(
        self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any
    ) -> Any:
        """Returns observations from the environment (or task).

        # Parameters

        env : The environment the sensor is used upon.
        task : (Optionally) a Task from which the sensor should get data.

        # Returns

        Current observation for Sensor.
        """
        raise NotImplementedError()


class SensorSuite(Generic[EnvType]):
    """Represents a set of sensors, with each sensor being identified through a
    unique id.

    # Attributes

    sensors: list containing sensors for the environment, uuid of each
        sensor must be unique.
    """

    sensors: Dict[str, Sensor[EnvType, Any]]
    observation_spaces: gyms.Dict

    def __init__(self, sensors: Sequence[Sensor]) -> None:
        """Initializer.

        # Parameters

        param sensors: the sensors that will be included in the suite.
        """
        self.sensors = OrderedDict()
        spaces: OrderedDict[str, gym.Space] = OrderedDict()
        for sensor in sensors:
            assert (
                sensor.uuid not in self.sensors
            ), "'{}' is duplicated sensor uuid".format(sensor.uuid)
            self.sensors[sensor.uuid] = sensor
            spaces[sensor.uuid] = sensor.observation_space
        self.observation_spaces = SpaceDict(spaces=spaces)

    def get(self, uuid: str) -> Sensor:
        """Return sensor with the given `uuid`.

        # Parameters

        uuid : The unique id of the sensor

        # Returns

        The sensor with unique id `uuid`.
        """
        return self.sensors[uuid]

    def get_observations(
        self, env: EnvType, task: Optional[SubTaskType], **kwargs: Any
    ) -> Dict[str, Any]:
        """Get all observations corresponding to the sensors in the suite.

        # Parameters

        env : The environment from which to get the observation.
        task : (Optionally) the task from which to get the observation.

        # Returns

        Data from all sensors packaged inside a Dict.
        """
        return {
            uuid: sensor.get_observation(env=env, task=task, **kwargs)  # type: ignore
            for uuid, sensor in self.sensors.items()
        }


class AbstractExpertSensor(Sensor[EnvType, SubTaskType], abc.ABC):
    """Base class for sensors that obtain the expert action for a given task
    (if available)."""

    ACTION_POLICY_LABEL: str = "action_or_policy"
    EXPERT_SUCCESS_LABEL: str = "expert_success"
    _NO_GROUPS_LABEL: str = "__dummy_expert_group__"

    def __init__(
        self,
        action_space: Optional[Union[gym.Space, int]] = None,
        uuid: str = "expert_sensor_type_uuid",
        expert_args: Optional[Dict[str, Any]] = None,
        nactions: Optional[int] = None,
        use_dict_as_groups: bool = True,
        **kwargs: Any,
    ) -> None:
        """Initialize an `ExpertSensor`.

        # Parameters
        action_space : The action space of the agent. This is necessary in order for this sensor
            to know what its output observation space is.
        uuid : A string specifying the unique ID of this sensor.
        expert_args : This sensor obtains an expert action from the task by calling the `query_expert`
            method of the task. `expert_args` are any keyword arguments that should be passed to the
            `query_expert` method when called.
        nactions : [DEPRECATED] The number of actions available to the agent, corresponds to an `action_space`
            of `gym.spaces.Discrete(nactions)`.
        use_dict_as_groups : Whether to use the top-level action_space of type `gym.spaces.Dict` as action groups.
        """
        if isinstance(action_space, int):
            action_space = gym.spaces.Discrete(action_space)
        elif action_space is None:
            assert (
                nactions is not None
            ), "One of `action_space` or `nactions` must be not `None`."
            get_logger().warning(
                "The `nactions` parameter to `AbstractExpertSensor` is deprecated and will be removed, please use"
                " the `action_space` parameter instead."
            )
            action_space = gym.spaces.Discrete(nactions)

        self.action_space = action_space

        self.use_groups = (
            isinstance(action_space, gym.spaces.Dict) and use_dict_as_groups
        )

        self.group_spaces = (
            self.action_space
            if self.use_groups
            else OrderedDict(
                [
                    (
                        self._NO_GROUPS_LABEL,
                        self.action_space,
                    )
                ]
            )
        )

        self.expert_args: Dict[str, Any] = expert_args or {}

        assert (
            "expert_sensor_group_name" not in self.expert_args
        ), "`expert_sensor_group_name` is reserved for `AbstractExpertSensor`"

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    @classmethod
    @abc.abstractmethod
    def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict:
        """gym space resulting from wrapping the given action space (or a
        derived space, as in `AbstractExpertPolicySensor`) together with a
        binary action space corresponding to an expert success flag, in a Dict
        space.

        # Parameters
        group_space : The source action space to be (optionally used to derive a policy space,) flagged and wrapped
        """
        raise NotImplementedError

    @classmethod
    def flagged_space(
        cls, action_space: gym.spaces.Space, use_dict_as_groups: bool = True
    ) -> gym.spaces.Dict:
        """gym space resulting from wrapping the given action space (or every
        highest-level entry in a Dict action space), together with binary
        action space corresponding to an expert success flag, in a Dict space.

        # Parameters
        action_space : The agent's action space (to be flagged and wrapped)
        use_dict_as_groups : Flag enabling every highest-level entry in a Dict action space to be independently flagged.
        """
        use_groups = isinstance(action_space, gym.spaces.Dict) and use_dict_as_groups

        if not use_groups:
            return cls.flagged_group_space(action_space)
        else:
            return gym.spaces.Dict(
                [
                    (
                        group_space,
                        cls.flagged_group_space(action_space[group_space]),
                    )
                    for group_space in cast(gym.spaces.Dict, action_space)
                ]
            )

    def _get_observation_space(self) -> gym.spaces.Dict:
        """The observation space of the expert sensor.

        For the most basic discrete agent's ExpertActionSensor, it will
        equal `gym.spaces.Dict([ (self.ACTION_POLICY_LABEL,
        self.action_space), (self.EXPERT_SUCCESS_LABEL,
        gym.spaces.Discrete(2))])`, where the first entry hosts the
        expert action index and the second equals 0 if and only if the
        expert failed to generate a true expert action.
        """
        return self.flagged_space(self.action_space, use_dict_as_groups=self.use_groups)

    @lazy_property
    def _zeroed_observation(self) -> Union[OrderedDict, Tuple]:
        # AllenAct-style flattened space (to easily generate an all-zeroes action as an array)
        flat_space = su.flatten_space(self.observation_space)
        # torch point to correctly unflatten `Discrete` for zeroed output
        flat_zeroed = su.torch_point(flat_space, np.zeros_like(flat_space.sample()))
        # unflatten zeroed output and convert to numpy
        return su.numpy_point(
            self.observation_space, su.unflatten(self.observation_space, flat_zeroed)
        )

    def flatten_output(self, unflattened):
        return (
            su.flatten(
                self.observation_space,
                su.torch_point(self.observation_space, unflattened),
            )
            .cpu()
            .numpy()
        )

    @abc.abstractmethod
    def query_expert(
        self,
        task: SubTaskType,
        expert_sensor_group_name: Optional[str],
    ) -> Tuple[Any, bool]:
        """Query the expert for the given task (and optional group name).

        # Returns

         A tuple (x, y) where x is the expert action or policy and y is False \
            if the expert could not determine the optimal action (otherwise True). Here y \
            is used for masking. Even when y is False, x should still lie in the space of \
            possible values (e.g. if x is the expert policy then x should be the correct length, \
            sum to 1, and have non-negative entries).
        """
        raise NotImplementedError

    def get_observation(
        self, env: EnvType, task: SubTaskType, *args: Any, **kwargs: Any
    ) -> Union[OrderedDict, Tuple]:
        # If the task is completed, we needn't (perhaps can't) find the expert
        # action from the (current) terminal state.
        if task.is_done():
            return self.flatten_output(self._zeroed_observation)

        actions_or_policies = OrderedDict()
        for group_name in self.group_spaces:
            action_or_policy, expert_was_successful = self.query_expert(
                task=task, expert_sensor_group_name=group_name
            )

            actions_or_policies[group_name] = OrderedDict(
                [
                    (self.ACTION_POLICY_LABEL, action_or_policy),
                    (self.EXPERT_SUCCESS_LABEL, expert_was_successful),
                ]
            )

        return self.flatten_output(
            actions_or_policies
            if self.use_groups
            else actions_or_policies[self._NO_GROUPS_LABEL]
        )


class AbstractExpertActionSensor(AbstractExpertSensor, abc.ABC):
    def __init__(
        self,
        action_space: Optional[Union[gym.Space, int]] = None,
        uuid: str = "expert_action",
        expert_args: Optional[Dict[str, Any]] = None,
        nactions: Optional[int] = None,
        use_dict_as_groups: bool = True,
        **kwargs: Any,
    ) -> None:
        super().__init__(**prepare_locals_for_super(locals()))

    @classmethod
    def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict:
        """gym space resulting from wrapping the given action space, together
        with a binary action space corresponding to an expert success flag, in
        a Dict space.

        # Parameters
        group_space : The action space to be flagged and wrapped
        """
        return gym.spaces.Dict(
            [
                (cls.ACTION_POLICY_LABEL, group_space),
                (cls.EXPERT_SUCCESS_LABEL, gym.spaces.Discrete(2)),
            ]
        )


class ExpertActionSensor(AbstractExpertActionSensor):
    """(Deprecated) A sensor that obtains the expert action from a given task
    (if available)."""

    def query_expert(
        self, task: SubTaskType, expert_sensor_group_name: Optional[str]
    ) -> Tuple[Any, bool]:
        return task.query_expert(
            **self.expert_args, expert_sensor_group_name=expert_sensor_group_name
        )


class AbstractExpertPolicySensor(AbstractExpertSensor, abc.ABC):
    def __init__(
        self,
        action_space: Optional[Union[gym.Space, int]] = None,
        uuid: str = "expert_policy",
        expert_args: Optional[Dict[str, Any]] = None,
        nactions: Optional[int] = None,
        use_dict_as_groups: bool = True,
        **kwargs: Any,
    ) -> None:
        super().__init__(**prepare_locals_for_super(locals()))

    @classmethod
    def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict:
        """gym space resulting from wrapping the policy space corresponding to
        `allenact.utils.spaces_utils.policy_space(group_space)` together with a
        binary action space corresponding to an expert success flag, in a Dict
        space.

        # Parameters
        group_space : The source action space to be used to derive a policy space, flagged and wrapped
        """
        return gym.spaces.Dict(
            [
                (cls.ACTION_POLICY_LABEL, su.policy_space(group_space)),
                (cls.EXPERT_SUCCESS_LABEL, gym.spaces.Discrete(2)),
            ]
        )


class ExpertPolicySensor(AbstractExpertPolicySensor):
    """(Deprecated) A sensor that obtains the expert policy from a given task
    (if available)."""

    def query_expert(
        self, task: SubTaskType, expert_sensor_group_name: Optional[str]
    ) -> Tuple[Any, bool]:
        return task.query_expert(
            **self.expert_args, expert_sensor_group_name=expert_sensor_group_name
        )


================================================
FILE: allenact/base_abstractions/task.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""Defines the primary data structures by which agents interact with their
environment."""

import abc
from typing import Any, Dict, Generic, List, Optional, Sequence, Tuple, TypeVar, Union

import gym
import numpy as np
from gym.spaces.dict import Dict as SpaceDict

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.utils.misc_utils import deprecated

EnvType = TypeVar("EnvType")


class Task(Generic[EnvType]):
    """An abstract class defining a, goal directed, 'task.' Agents interact
    with their environment through a task by taking a `step` after which they
    receive new observations, rewards, and (potentially) other useful
    information.

    A Task is a helpful generalization of the OpenAI gym's `Env` class
    and allows for multiple tasks (e.g. point and object navigation) to
    be defined on a single environment (e.g. AI2-THOR).

    # Attributes

    env : The environment.
    sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer.
    task_info : Dictionary of (k, v) pairs defining task goals and other task information.
    max_steps : The maximum number of steps an agent can take an in the task before it is considered failed.
    observation_space: The observation space returned on each step from the sensors.
    """

    env: EnvType
    sensor_suite: SensorSuite[EnvType]
    task_info: Dict[str, Any]
    max_steps: int
    observation_space: SpaceDict

    def __init__(
        self,
        env: EnvType,
        sensors: Union[SensorSuite, Sequence[Sensor]],
        task_info: Dict[str, Any],
        max_steps: int,
        **kwargs
    ) -> None:
        self.env = env
        self.sensor_suite = (
            SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors
        )
        self.task_info = task_info
        self.max_steps = max_steps
        self.observation_space = self.sensor_suite.observation_spaces
        self._num_steps_taken = 0
        self._total_reward: Union[float, List[float]] = 0.0

    def get_observations(self, **kwargs) -> Any:
        return self.sensor_suite.get_observations(env=self.env, task=self, **kwargs)

    @property
    @abc.abstractmethod
    def action_space(self) -> gym.Space:
        """Task's action space.

        # Returns

        The action space for the task.
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        """Render the current task state.

        Rendered task state can come in any supported modes.

        # Parameters

        mode : The mode in which to render. For example, you might have a 'rgb'
            mode that renders the agent's egocentric viewpoint or a 'dev' mode
            returning additional information.
        args : Extra args.
        kwargs : Extra kwargs.

        # Returns

        An numpy array corresponding to the requested render.
        """
        raise NotImplementedError()

    def _increment_num_steps_taken(self) -> None:
        """Helper function that increases the number of steps counter by
        one."""
        self._num_steps_taken += 1

    def step(self, action: Any) -> RLStepResult:
        """Take an action in the environment (one per agent).

        Takes the action in the environment and returns
        observations (& rewards and any additional information)
        corresponding to the agent's new state. Note that this function
        should not be overwritten without care (instead
        implement the `_step` function).

        # Parameters

        action : The action to take, should be of the same form as specified by `self.action_space`.

        # Returns

        A `RLStepResult` object encoding the new observations, reward, and
        (possibly) additional information.
        """
        assert not self.is_done()
        sr = self._step(action=action)

        # If reward is Sequence, it's assumed to follow the same order imposed by spaces' flatten operation
        if isinstance(sr.reward, Sequence):
            if isinstance(self._total_reward, Sequence):
                for it, rew in enumerate(sr.reward):
                    self._total_reward[it] += float(rew)
            else:
                self._total_reward = [float(r) for r in sr.reward]
        else:
            self._total_reward += float(sr.reward)  # type:ignore

        self._increment_num_steps_taken()
        # TODO: We need a better solution to the below. It's not a good idea
        #   to pre-increment the step counter as this might play poorly with `_step`
        #   if it relies on some aspect of the current number of steps taken.
        return sr.clone({"done": sr.done or self.is_done()})

    @abc.abstractmethod
    def _step(self, action: Any) -> RLStepResult:
        """Helper function called by `step` to take a step by each agent in the
        environment.

        Takes the action in the environment and returns
        observations (& rewards and any additional information)
        corresponding to the agent's new state. This function is called
        by the (public) `step` function and is what should be implemented
        when defining your new task. Having separate `_step` be separate from `step`
        is useful as this allows the `step` method to perform bookkeeping (e.g.
        keeping track of the number of steps), without having `_step` as a separate
        method, everyone implementing `step` would need to copy this bookkeeping code.

        # Parameters

        action : The action to take.

        # Returns

        A `RLStepResult` object encoding the new observations, reward, and
        (possibly) additional information.
        """
        raise NotImplementedError()

    def reached_max_steps(self) -> bool:
        """Has the agent reached the maximum number of steps."""
        return self.num_steps_taken() >= self.max_steps

    @abc.abstractmethod
    def reached_terminal_state(self) -> bool:
        """Has the agent reached a terminal state (excluding reaching the
        maximum number of steps)."""
        raise NotImplementedError()

    def is_done(self) -> bool:
        """Did the agent reach a terminal state or performed the maximum number
        of steps."""
        return self.reached_terminal_state() or self.reached_max_steps()

    def num_steps_taken(self) -> int:
        """Number of steps taken by the agent in the task so far."""
        return self._num_steps_taken

    @deprecated
    def action_names(self) -> Tuple[str, ...]:
        """Action names of the Task instance.

        This function has been deprecated and will be removed.

        This function is a hold-over from when the `Task`
        abstraction only considered `gym.space.Discrete` action spaces (in which
        case it makes sense name these actions).

        This implementation of `action_names` requires that a `class_action_names`
        method has been defined. This method should be overwritten if `class_action_names`
        requires key word arguments to determine the number of actions.
        """
        if hasattr(self, "class_action_names"):
            return self.class_action_names()
        else:
            raise NotImplementedError(
                "`action_names` requires that a function `class_action_names` be defined."
                " This said, please do not use this functionality as it has been deprecated and will be removed."
                " If you would like an `action_names` function for your task, feel free to define one"
                " with the knowledge that the AllenAct internals will ignore it."
            )

    @abc.abstractmethod
    def close(self) -> None:
        """Closes the environment and any other files opened by the Task (if
        applicable)."""
        raise NotImplementedError()

    def metrics(self) -> Dict[str, Any]:
        """Computes metrics related to the task after the task's completion.

        By default this function is automatically called during training
        and the reported metrics logged to tensorboard.

        # Returns

        A dictionary where every key is a string (the metric's
            name) and the value is the value of the metric.
        """
        return {
            "ep_length": self.num_steps_taken(),
            "reward": self.cumulative_reward,
            "task_info": self.task_info,
        }

    def query_expert(self, **kwargs) -> Tuple[Any, bool]:
        """(Deprecated) Query the expert policy for this task.

        The new correct way to include this functionality is through the definition of a class
        derived from `allenact.base_abstractions.sensor.AbstractExpertActionSensor` or
        `allenact.base_abstractions.sensor.AbstractExpertPolicySensor`, where a
        `query_expert` method must be defined.

        # Returns

        A tuple (x, y) where x is the expert action (or policy) and y is False \
            if the expert could not determine the optimal action (otherwise True). Here y \
            is used for masking. Even when y is False, x should still lie in the space of \
            possible values (e.g. if x is the expert policy then x should be the correct length, \
            sum to 1, and have non-negative entries).
        """
        return None, False

    @property
    def cumulative_reward(self) -> float:
        """Mean per-agent total cumulative in the task so far.

        # Returns

        Mean per-agent cumulative reward as a float.
        """
        return (
            np.mean(self._total_reward).item()
            if isinstance(self._total_reward, Sequence)
            else self._total_reward
        )


SubTaskType = TypeVar("SubTaskType", bound=Task)


class TaskSampler(abc.ABC):
    """Abstract class defining a how new tasks are sampled."""

    @property
    @abc.abstractmethod
    def length(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled. Can be
            float('inf').
        """
        raise NotImplementedError()

    @property
    @abc.abstractmethod
    def last_sampled_task(self) -> Optional[Task]:
        """Get the most recently sampled Task.

        # Returns

        The most recently sampled Task.
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def next_task(self, force_advance_scene: bool = False) -> Optional[Task]:
        """Get the next task in the sampler's stream.

        # Parameters

        force_advance_scene : Used to (if applicable) force the task sampler to
            use a new scene for the next task. This is useful if, during training,
            you would like to train with one scene for some number of steps and
            then explicitly control when you begin training with the next scene.

        # Returns

        The next Task in the sampler's stream if a next task exists. Otherwise None.
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def close(self) -> None:
        """Closes any open environments or streams.

        Should be run when done sampling.
        """
        raise NotImplementedError()

    @property
    @abc.abstractmethod
    def all_observation_spaces_equal(self) -> bool:
        """Checks if all observation spaces of tasks that can be sampled are
        equal.

        This will almost always simply return `True`. A case in which it should
        return `False` includes, for example, a setting where you design
        a `TaskSampler` that can generate different types of tasks, i.e.
        point navigation tasks and object navigation tasks. In this case, these
        different tasks may output different types of observations.

        # Returns

        True if all Tasks that can be sampled by this sampler have the
            same observation space. Otherwise False.
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def reset(self) -> None:
        """Resets task sampler to its original state (except for any seed)."""
        raise NotImplementedError()

    @abc.abstractmethod
    def set_seed(self, seed: int) -> None:
        """Sets new RNG seed.

        # Parameters

        seed : New seed.
        """
        raise NotImplementedError()


================================================
FILE: allenact/embodiedai/__init__.py
================================================


================================================
FILE: allenact/embodiedai/aux_losses/__init__.py
================================================


================================================
FILE: allenact/embodiedai/aux_losses/losses.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Defining the auxiliary loss for actor critic type models.

Several of the losses defined in this file are modified versions of those found in
    https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/
"""


import abc
from typing import Dict, cast, Tuple, Sequence

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
    ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput


def _bernoulli_subsample_mask_like(masks, p=0.1):
    return (torch.rand_like(masks) <= p).float()


class MultiAuxTaskNegEntropyLoss(AbstractActorCriticLoss):
    """Used in multiple auxiliary tasks setting.

    Add a negative entropy loss over all the task weights.
    """

    UUID = "multitask_entropy"  # make sure this is unique

    def __init__(self, task_names: Sequence[str], *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.num_tasks = len(task_names)
        self.task_names = task_names

    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
        *args,
        **kwargs,
    ) -> Tuple[torch.FloatTensor, Dict[str, float]]:
        task_weights = actor_critic_output.extras[self.UUID]
        task_weights = task_weights.view(-1, self.num_tasks)
        entropy = CategoricalDistr(task_weights).entropy()

        avg_loss = (-entropy).mean()
        avg_task_weights = task_weights.mean(dim=0)  # (K)

        outputs = {"entropy_loss": cast(torch.Tensor, avg_loss).item()}
        for i in range(self.num_tasks):
            outputs["weight_" + self.task_names[i]] = cast(
                torch.Tensor, avg_task_weights[i]
            ).item()

        return (
            avg_loss,
            outputs,
        )


class AuxiliaryLoss(AbstractActorCriticLoss):
    """Base class of auxiliary loss.

    Any auxiliary task loss should inherit from it, and implement the
    `get_aux_loss` function.
    """

    def __init__(self, auxiliary_uuid: str, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.auxiliary_uuid = auxiliary_uuid

    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
        *args,
        **kwargs,
    ) -> Tuple[torch.Tensor, Dict[str, float]]:

        # auxiliary loss
        return self.get_aux_loss(
            **actor_critic_output.extras[self.auxiliary_uuid],
            observations=batch["observations"],
            actions=batch["actions"],
            masks=batch["masks"],
        )

    @abc.abstractmethod
    def get_aux_loss(
        self,
        aux_model: nn.Module,
        observations: ObservationType,
        obs_embeds: torch.Tensor,
        actions: torch.Tensor,
        beliefs: torch.Tensor,
        masks: torch.Tensor,
        *args,
        **kwargs,
    ):
        raise NotImplementedError()


def _propagate_final_beliefs_to_all_steps(
    beliefs: torch.Tensor,
    masks: torch.Tensor,
    num_sampler: int,
    num_steps: int,
):
    final_beliefs = torch.zeros_like(beliefs)  # (T, B, *)
    start_locs_list = []
    end_locs_list = []

    for i in range(num_sampler):
        # right shift: to locate the 1 before 0 and ignore the 1st element
        end_locs = torch.where(masks[1:, i] == 0)[0]  # maybe [], dtype=torch.Long

        start_locs = torch.cat(
            [torch.tensor([0]).to(end_locs), end_locs + 1]
        )  # add the first element
        start_locs_list.append(start_locs)

        end_locs = torch.cat(
            [end_locs, torch.tensor([num_steps - 1]).to(end_locs)]
        )  # add the last element
        end_locs_list.append(end_locs)

        for st, ed in zip(start_locs, end_locs):
            final_beliefs[st : ed + 1, i] = beliefs[ed, i]

    return final_beliefs, start_locs_list, end_locs_list


class InverseDynamicsLoss(AuxiliaryLoss):
    """Auxiliary task of Inverse Dynamics from Auxiliary Tasks Speed Up
    Learning PointGoal Navigation (Ye, 2020) https://arxiv.org/abs/2007.04561
    originally from Curiosity-driven Exploration by Self-supervised Prediction
    (Pathak, 2017) https://arxiv.org/abs/1705.05363."""

    UUID = "InvDyn"

    def __init__(
        self, subsample_rate: float = 0.2, subsample_min_num: int = 10, *args, **kwargs
    ):
        """Subsample the valid samples by the rate of `subsample_rate`, if the
        total num of the valid samples is larger than `subsample_min_num`."""
        super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)

        self.cross_entropy_loss = nn.CrossEntropyLoss(reduction="none")
        self.subsample_rate = subsample_rate
        self.subsample_min_num = subsample_min_num

    def get_aux_loss(
        self,
        aux_model: nn.Module,
        observations: ObservationType,
        obs_embeds: torch.FloatTensor,
        actions: torch.FloatTensor,
        beliefs: torch.FloatTensor,
        masks: torch.FloatTensor,
        *args,
        **kwargs,
    ):
        ## we discard the last action in the batch
        num_steps, num_sampler = actions.shape  # T, B
        actions = cast(torch.LongTensor, actions)
        actions = actions[:-1]  # (T-1, B)

        ## find the final belief state based on masks
        # we did not compute loss here as model.forward is compute-heavy
        masks = masks.squeeze(-1)  # (T, B)

        final_beliefs, _, _ = _propagate_final_beliefs_to_all_steps(
            beliefs,
            masks,
            num_sampler,
            num_steps,
        )

        ## compute CE loss
        decoder_in = torch.cat(
            [obs_embeds[:-1], obs_embeds[1:], final_beliefs[:-1]], dim=2
        )  # (T-1, B, *)

        preds = aux_model(decoder_in)  # (T-1, B, A)
        # cross entropy loss require class dim at 1
        loss = self.cross_entropy_loss(
            preds.view((num_steps - 1) * num_sampler, -1),  # ((T-1)*B, A)
            actions.flatten(),  #  ((T-1)*B,)
        )
        loss = loss.view(num_steps - 1, num_sampler)  # (T-1, B)

        # def vanilla_valid_losses(loss, num_sampler, end_locs_batch):
        #     ##  this is just used to verify the vectorized version works correctly.
        #     ##  not used for experimentation
        #     valid_losses = []
        #     for i in range(num_sampler):
        #         end_locs = end_locs_batch[i]
        #         for j in range(len(end_locs)):
        #             if j == 0:
        #                 start_loc = 0
        #             else:
        #                 start_loc = end_locs[j - 1] + 1
        #             end_loc = end_locs[j]
        #             if end_loc - start_loc <= 0:  # the episode only 1-step
        #                 continue
        #             valid_losses.append(loss[start_loc:end_loc, i])

        #     if len(valid_losses) == 0:
        #         valid_losses = torch.zeros(1, dtype=torch.float).to(loss)
        #     else:
        #         valid_losses = torch.cat(valid_losses)  # (sum m, )
        #     return valid_losses

        # valid_losses = masks[1:] * loss # (T-1, B)
        # valid_losses0 = vanilla_valid_losses(loss, num_sampler, end_locs_batch)
        # assert valid_losses0.sum() == valid_losses.sum()

        num_valid_losses = torch.count_nonzero(masks[1:])
        if num_valid_losses < self.subsample_min_num:  # don't subsample
            subsample_rate = 1.0
        else:
            subsample_rate = self.subsample_rate

        loss_masks = masks[1:] * _bernoulli_subsample_mask_like(
            masks[1:], subsample_rate
        )
        num_valid_losses = torch.count_nonzero(loss_masks)
        avg_loss = (loss * loss_masks).sum() / torch.clamp(num_valid_losses, min=1.0)

        return (
            avg_loss,
            {
                "total": cast(torch.Tensor, avg_loss).item(),
            },
        )


class TemporalDistanceLoss(AuxiliaryLoss):
    """Auxiliary task of Temporal Distance from Auxiliary Tasks Speed Up
    Learning PointGoal Navigation (Ye, 2020)
    https://arxiv.org/abs/2007.04561."""

    UUID = "TempDist"

    def __init__(self, num_pairs: int = 8, epsiode_len_min: int = 5, *args, **kwargs):
        super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)
        self.num_pairs = num_pairs
        self.epsiode_len_min = float(epsiode_len_min)

    def get_aux_loss(
        self,
        aux_model: nn.Module,
        observations: ObservationType,
        obs_embeds: torch.FloatTensor,
        actions: torch.FloatTensor,
        beliefs: torch.FloatTensor,
        masks: torch.FloatTensor,
        *args,
        **kwargs,
    ):
        ## we discard the last action in the batch
        num_steps, num_sampler = actions.shape  # T, B

        ## find the final belief state based on masks
        # we did not compute loss here as model.forward is compute-heavy
        masks = masks.squeeze(-1)  # (T, B)

        (
            final_beliefs,
            start_locs_list,
            end_locs_list,
        ) = _propagate_final_beliefs_to_all_steps(
            beliefs,
            masks,
            num_sampler,
            num_steps,
        )

        ## also find the locs_batch of shape (M, 3)
        # the last dim: [0] is on num_sampler loc, [1] and [2] is start and end locs
        # of one episode
        # in other words: at locs_batch[m, 0] in num_sampler dim, there exists one episode
        # starting from locs_batch[m, 1], ends at locs_batch[m, 2] (included)
        locs_batch = []
        for i in range(num_sampler):
            locs_batch.append(
                torch.stack(
                    [
                        i * torch.ones_like(start_locs_list[i]),
                        start_locs_list[i],
                        end_locs_list[i],
                    ],
                    dim=-1,
                )
            )  # shape (M[i], 3)
        locs_batch = torch.cat(locs_batch)  # shape (M, 3)

        temporal_dist_max = (
            locs_batch[:, 2] - locs_batch[:, 1]
        ).float()  # end - start, (M)
        # create normalizer that ignores too short episode, otherwise 1/T
        normalizer = torch.where(
            temporal_dist_max > self.epsiode_len_min,
            1.0 / temporal_dist_max,
            torch.tensor([0]).to(temporal_dist_max),
        )  # (M)

        # sample valid pairs: sampled_pairs shape (M, num_pairs, 3)
        # where M is the num of total episodes in the batch
        locs = locs_batch.cpu().numpy()  # as torch.randint only support int, not tensor
        sampled_pairs = np.random.randint(
            np.repeat(locs[:, [1]], 2 * self.num_pairs, axis=-1),  # (M, 2*k)
            np.repeat(locs[:, [2]] + 1, 2 * self.num_pairs, axis=-1),  # (M, 2*k)
        ).reshape(
            (-1, self.num_pairs, 2)
        )  # (M, k, 2)
        sampled_pairs_batch = torch.from_numpy(sampled_pairs).to(
            locs_batch
        )  # (M, k, 2)

        num_sampler_batch = locs_batch[:, [0]].expand(
            -1, 2 * self.num_pairs
        )  # (M, 1) -> (M, 2*k)
        num_sampler_batch = num_sampler_batch.reshape(
            -1, self.num_pairs, 2
        )  # (M, k, 2)

        sampled_obs_embeds = obs_embeds[
            sampled_pairs_batch, num_sampler_batch
        ]  # (M, k, 2, H1)
        sampled_final_beliefs = final_beliefs[
            sampled_pairs_batch, num_sampler_batch
        ]  # (M, k, 2, H2)
        features = torch.cat(
            [
                sampled_obs_embeds[:, :, 0],
                sampled_obs_embeds[:, :, 1],
                sampled_final_beliefs[:, :, 0],
            ],
            dim=-1,
        )  # (M, k, 2*H1 + H2)

        pred_temp_dist = aux_model(features).squeeze(-1)  # (M, k)
        true_temp_dist = (
            sampled_pairs_batch[:, :, 1] - sampled_pairs_batch[:, :, 0]
        ).float()  # (M, k)

        pred_error = (pred_temp_dist - true_temp_dist) * normalizer.unsqueeze(1)
        loss = 0.5 * (pred_error).pow(2)
        avg_loss = loss.mean()

        return (
            avg_loss,
            {
                "total": cast(torch.Tensor, avg_loss).item(),
            },
        )


class CPCALoss(AuxiliaryLoss):
    """Auxiliary task of CPC|A from Auxiliary Tasks Speed Up Learning PointGoal
    Navigation (Ye, 2020) https://arxiv.org/abs/2007.04561 originally from
    Neural Predictive Belief Representations (Guo, 2018)
    https://arxiv.org/abs/1811.06407."""

    UUID = "CPCA"

    def __init__(
        self, planning_steps: int = 8, subsample_rate: float = 0.2, *args, **kwargs
    ):
        super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)
        self.planning_steps = planning_steps
        self.subsample_rate = subsample_rate
        self.cross_entropy_loss = nn.BCEWithLogitsLoss(reduction="none")

    def get_aux_loss(
        self,
        aux_model: nn.Module,
        observations: ObservationType,
        obs_embeds: torch.Tensor,
        actions: torch.Tensor,
        beliefs: torch.Tensor,
        masks: torch.Tensor,
        *args,
        **kwargs,
    ):
        # prepare for autoregressive inputs: c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) <-> z_{t+k}
        ## where b_t = RNN(b_{t-1}, z_t, a_{t-1}), prev action is optional
        num_steps, num_sampler, obs_embed_size = obs_embeds.shape  # T, N, H_O
        assert 0 < self.planning_steps <= num_steps

        ## prepare positive and negatives that sample from all the batch
        positives = obs_embeds  # (T, N, -1)
        negative_inds = torch.randperm(num_steps * num_sampler).to(positives.device)
        negatives = torch.gather(  # input[index[i,j]][j]
            positives.view(num_steps * num_sampler, -1),
            dim=0,
            index=negative_inds.view(num_steps * num_sampler, 1).expand(
                num_steps * num_sampler, positives.shape[-1]
            ),
        ).view(
            num_steps, num_sampler, -1
        )  # (T, N, -1)

        ## prepare action sequences and initial beliefs
        action_embedding = aux_model.action_embedder(actions)  # (T, N, -1)
        action_embed_size = action_embedding.size(-1)
        action_padding = torch.zeros(
            self.planning_steps - 1, num_sampler, action_embed_size
        ).to(
            action_embedding
        )  # (k-1, N, -1)
        action_padded = torch.cat(
            (action_embedding, action_padding), dim=0
        )  # (T+k-1, N, -1)

        ## unfold function will create consecutive action sequences
        action_seq = (
            action_padded.unfold(dimension=0, size=self.planning_steps, step=1)
            .permute(3, 0, 1, 2)
            .view(self.planning_steps, num_steps * num_sampler, action_embed_size)
        )  # (k, T*N, -1)

        ## beliefs GRU output
        beliefs = beliefs.view(num_steps * num_sampler, -1).unsqueeze(0)  # (1, T*N, -1)

        # get future contexts c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1})
        future_contexts_all, _ = aux_model.context_model(
            action_seq, beliefs
        )  # (k, T*N, -1)

        ## NOTE: future_contexts_all starting from next step t+1 to t+k, not t to t+k-1
        future_contexts_all = future_contexts_all.view(
            self.planning_steps, num_steps, num_sampler, -1
        ).permute(
            1, 0, 2, 3
        )  # (k, T, N, -1)

        # get all the classifier scores I(c_{t+1:t+k}; z_{t+1:t+k})
        positives_padding = torch.zeros(
            self.planning_steps, num_sampler, obs_embed_size
        ).to(
            positives
        )  # (k, N, -1)
        positives_padded = torch.cat(
            (positives[1:], positives_padding), dim=0
        )  # (T+k-1, N, -1)
        positives_expanded = positives_padded.unfold(
            dimension=0, size=self.planning_steps, step=1
        ).permute(
            0, 3, 1, 2
        )  # (T, k, N, -1)
        positives_logits = aux_model.classifier(
            torch.cat([positives_expanded, future_contexts_all], -1)
        )  # (T, k, N, 1)
        positive_loss = self.cross_entropy_loss(
            positives_logits, torch.ones_like(positives_logits)
        )  # (T, k, N, 1)

        negatives_padding = torch.zeros(
            self.planning_steps, num_sampler, obs_embed_size
        ).to(
            negatives
        )  # (k, N, -1)
        negatives_padded = torch.cat(
            (negatives[1:], negatives_padding), dim=0
        )  # (T+k-1, N, -1)
        negatives_expanded = negatives_padded.unfold(
            dimension=0, size=self.planning_steps, step=1
        ).permute(
            0, 3, 1, 2
        )  # (T, k, N, -1)
        negatives_logits = aux_model.classifier(
            torch.cat([negatives_expanded, future_contexts_all], -1)
        )  # (T, k, N, 1)
        negative_loss = self.cross_entropy_loss(
            negatives_logits, torch.zeros_like(negatives_logits)
        )  # (T, k, N, 1)

        # Masking to get valid scores
        ## masks: Note which timesteps [1, T+k+1] could have valid queries, at distance (k) (note offset by 1)
        ## we will extract the **diagonals** as valid_masks from masks later as below
        ## the vertical axis is (absolute) real timesteps, the horizontal axis is (relative) planning timesteps
        ## | - - - - - |
        ## | .         |
        ## | , .       |
        ## | . , .     |
        ## | , . , .   |
        ## |   , . , . |
        ## |     , . , |
        ## |       , . |
        ## |         , |
        ## | - - - - - |
        masks = masks.squeeze(-1)  # (T, N)
        pred_masks = torch.ones(
            num_steps + self.planning_steps,
            self.planning_steps,
            num_sampler,
            1,
            dtype=torch.bool,
        ).to(
            beliefs.device
        )  # (T+k, k, N, 1)

        pred_masks[num_steps - 1 :] = (
            False  # GRU(b_t, a_{t:t+k-1}) is invalid when t >= T, as we don't have real z_{t+1}
        )
        for j in range(1, self.planning_steps + 1):  # for j-step predictions
            pred_masks[: j - 1, j - 1] = (
                False  # Remove the upper triangle above the diagnonal (but I think this is unnecessary for valid_masks)
            )
            for n in range(num_sampler):
                has_zeros_batch = torch.where(masks[:, n] == 0)[0]
                # in j-step prediction, timesteps z -> z + j are disallowed as those are the first j timesteps of a new episode
                # z-> z-1 because of pred_masks being offset by 1
                for z in has_zeros_batch:
                    pred_masks[z - 1 : z - 1 + j, j - 1, n] = (
                        False  # can affect j timesteps
                    )

        # instead of the whole range, we actually are only comparing a window i:i+k for each query/target i - for each, select the appropriate k
        # we essentially gather diagonals from this full mask, t of them, k long
        valid_diagonals = [
            torch.diagonal(pred_masks, offset=-i) for i in range(num_steps)
        ]  # pull the appropriate k per timestep
        valid_masks = (
            torch.stack(valid_diagonals, dim=0).permute(0, 3, 1, 2).float()
        )  # (T, N, 1, k) -> (T, k, N, 1)
        # print(valid_masks.int().squeeze(-1)); print(masks) # verify its correctness

        loss_masks = valid_masks * _bernoulli_subsample_mask_like(
            valid_masks, self.subsample_rate
        )  # (T, k, N, 1)
        num_valid_losses = torch.count_nonzero(loss_masks)
        avg_positive_loss = (positive_loss * loss_masks).sum() / torch.clamp(
            num_valid_losses, min=1.0
        )
        avg_negative_loss = (negative_loss * loss_masks).sum() / torch.clamp(
            num_valid_losses, min=1.0
        )

        avg_loss = avg_positive_loss + avg_negative_loss

        return (
            avg_loss,
            {
                "total": cast(torch.Tensor, avg_loss).item(),
                "positive_loss": cast(torch.Tensor, avg_positive_loss).item(),
                "negative_loss": cast(torch.Tensor, avg_negative_loss).item(),
            },
        )


class CPCASoftMaxLoss(AuxiliaryLoss):
    """Auxiliary task of CPC|A with multi class softmax."""

    UUID = "cpcA_SOFTMAX"

    def __init__(
        self,
        planning_steps: int = 8,
        subsample_rate: float = 1,
        allow_skipping: bool = True,
        *args,
        **kwargs,
    ):
        super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)
        self.planning_steps = planning_steps
        self.subsample_rate = subsample_rate
        self.cross_entropy_loss = nn.CrossEntropyLoss(
            reduction="none"
        )  # nn.BCEWithLogitsLoss(reduction="none")
        self.allow_skipping = allow_skipping

    def get_aux_loss(
        self,
        aux_model: nn.Module,
        observations: ObservationType,
        obs_embeds: torch.Tensor,
        actions: torch.Tensor,
        beliefs: torch.Tensor,
        masks: torch.Tensor,
        *args,
        **kwargs,
    ):
        # prepare for autoregressive inputs: c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) <-> z_{t+k}
        ## where b_t = RNN(b_{t-1}, z_t, a_{t-1}), prev action is optional
        num_steps, num_samplers, obs_embed_size = obs_embeds.shape  # T, N, H_O
        ##visual observation of all num_steps

        if not (0 < self.planning_steps <= num_steps):
            if self.allow_skipping:
                return 0, {}
            else:
                raise RuntimeError(
                    f"Insufficient planning steps: self.planning_steps {self.planning_steps} must"
                    f" be greater than zero and less than or equal to num_steps {num_steps}."
                )

        ## prepare action sequences and initial beliefs
        action_embedding = aux_model.action_embedder(actions)  # (T, N, -1)
        action_embed_size = action_embedding.size(-1)
        action_padding = torch.zeros(
            self.planning_steps - 1,
            num_samplers,
            action_embed_size,
            device=action_embedding.device,
        )  # (k-1, N, -1)
        action_padded = torch.cat(
            (action_embedding, action_padding), dim=0
        )  # (T+k-1, N, -1)

        ## unfold function will create consecutive action sequences
        action_seq = (
            action_padded.unfold(dimension=0, size=self.planning_steps, step=1)
            .permute(3, 0, 1, 2)
            .view(self.planning_steps, num_steps * num_samplers, action_embed_size)
        )  # (k, T*N, -1)

        ## beliefs GRU output
        obs_embeds = aux_model.visual_mlp(obs_embeds)  # (T, N, 128)

        beliefs = beliefs.view(1, num_steps * num_samplers, -1)  # (1, T*N, -1)

        # get future contexts c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1})
        future_contexts_all, _ = aux_model.context_model(
            action_seq, beliefs
        )  # (k, T*N, -1)

        future_contexts_all = aux_model.belief_mlp(future_contexts_all)  # (k, T*N, 128)
        future_contexts_all = future_contexts_all.view(-1, 128)  # (k*T*N, 128)

        obs_embeds = obs_embeds.view(
            num_steps * num_samplers, obs_embeds.shape[-1]
        ).permute(
            1, 0
        )  # (-1, T*N)

        visual_logits = torch.matmul(future_contexts_all, obs_embeds)
        visual_log_probs = F.log_softmax(visual_logits, dim=1)  ## (k*T*N, T*N)

        target = torch.zeros(
            (self.planning_steps, num_steps, num_samplers),
            dtype=torch.long,
            device=beliefs.device,
        )  # (k, T, N)
        loss_mask = torch.zeros(
            (self.planning_steps, num_steps, num_samplers), device=beliefs.device
        )  # (k, T, N)

        num_valid_before = 0
        for j in range(num_samplers):
            for i in range(num_steps):
                index = i * num_samplers + j

                if i == 0 or masks[i, j].item() == 0:
                    num_valid_before = 0
                    continue

                num_valid_before += 1
                for back in range(min(num_valid_before, self.planning_steps)):
                    target[back, i - (back + 1), j] = index
                    loss_mask[back, i - (back + 1), j] = 1.0

        target = target.view(-1)  # (k*T*N,)

        loss_value = self.cross_entropy_loss(visual_log_probs, target)
        loss_value = loss_value.view(
            self.planning_steps, num_steps, num_samplers, 1
        )  # (k, T, N, 1)

        loss_mask = loss_mask.unsqueeze(-1)  # (k, T, N, 1)
        loss_valid_masks = loss_mask * _bernoulli_subsample_mask_like(
            loss_mask, self.subsample_rate
        )  # (k, T, N, 1)

        num_valid_losses = torch.count_nonzero(loss_valid_masks)

        avg_multi_class_loss = (loss_value * loss_valid_masks).sum() / torch.clamp(
            num_valid_losses, min=1.0
        )

        return (
            avg_multi_class_loss,
            {
                "total": cast(torch.Tensor, avg_multi_class_loss).item(),
            },
        )


######## CPCA Softmax variants ######


class CPCA1SoftMaxLoss(CPCASoftMaxLoss):
    UUID = "cpcA_SOFTMAX_1"

    def __init__(self, subsample_rate: float = 1, *args, **kwargs):
        super().__init__(
            planning_steps=1, subsample_rate=subsample_rate, *args, **kwargs
        )


class CPCA2SoftMaxLoss(CPCASoftMaxLoss):
    UUID = "cpcA_SOFTMAX_2"

    def __init__(self, subsample_rate: float = 1, *args, **kwargs):
        super().__init__(
            planning_steps=2, subsample_rate=subsample_rate, *args, **kwargs
        )


class CPCA4SoftMaxLoss(CPCASoftMaxLoss):
    UUID = "cpcA_SOFTMAX_4"

    def __init__(self, subsample_rate: float = 1, *args, **kwargs):
        super().__init__(
            planning_steps=4, subsample_rate=subsample_rate, *args, **kwargs
        )


class CPCA8SoftMaxLoss(CPCASoftMaxLoss):
    UUID = "cpcA_SOFTMAX_8"

    def __init__(self, subsample_rate: float = 1, *args, **kwargs):
        super().__init__(
            planning_steps=8, subsample_rate=subsample_rate, *args, **kwargs
        )


class CPCA16SoftMaxLoss(CPCASoftMaxLoss):
    UUID = "cpcA_SOFTMAX_16"

    def __init__(self, subsample_rate: float = 1, *args, **kwargs):
        super().__init__(
            planning_steps=16, subsample_rate=subsample_rate, *args, **kwargs
        )


###########


class CPCA1Loss(CPCALoss):
    UUID = "CPCA_1"

    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
        super().__init__(
            planning_steps=1, subsample_rate=subsample_rate, *args, **kwargs
        )


class CPCA2Loss(CPCALoss):
    UUID = "CPCA_2"

    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
        super().__init__(
            planning_steps=2, subsample_rate=subsample_rate, *args, **kwargs
        )


class CPCA4Loss(CPCALoss):
    UUID = "CPCA_4"

    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
        super().__init__(
            planning_steps=4, subsample_rate=subsample_rate, *args, **kwargs
        )


class CPCA8Loss(CPCALoss):
    UUID = "CPCA_8"

    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
        super().__init__(
            planning_steps=8, subsample_rate=subsample_rate, *args, **kwargs
        )


class CPCA16Loss(CPCALoss):
    UUID = "CPCA_16"

    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):
        super().__init__(
            planning_steps=16, subsample_rate=subsample_rate, *args, **kwargs
        )


================================================
FILE: allenact/embodiedai/mapping/__init__.py
================================================


================================================
FILE: allenact/embodiedai/mapping/mapping_losses.py
================================================
import torch
from torch.nn import functional as F

from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput


class BinnedPointCloudMapLoss(AbstractActorCriticLoss):
    """A (binary cross entropy) loss for training metric maps for free space
    prediction."""

    def __init__(
        self,
        binned_pc_uuid: str,
        map_logits_uuid: str,
    ):
        """Initializer.

        # Parameters
        binned_pc_uuid : The uuid of a sensor returning
            a dictionary with an "egocentric_update"
            key with the same format as returned by
            `allenact.embodied_ai.mapping_utils.map_builders.BinnedPointCloudMapBuilder`. Such a sensor
            can be found in the `allenact_plugins` library: see
            `allenact_plugins.ithor_plugin.ithor_sensors.BinnedPointCloudMapTHORSensor`.
        map_logits_uuid : key used to index into `actor_critic_output.extras` (returned by the model)
            whose value should be a tensor of the same shape as the tensor corresponding to the above
            "egocentric_update" key.
        """
        super().__init__()
        self.binned_pc_uuid = binned_pc_uuid
        self.map_logits_uuid = map_logits_uuid

    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
        *args,
        **kwargs,
    ):
        ego_map_gt = batch["observations"][self.binned_pc_uuid][
            "egocentric_update"
        ].float()
        *_, h, w, c = ego_map_gt.shape
        ego_map_gt = ego_map_gt.view(-1, h, w, c).permute(0, 3, 1, 2).contiguous()

        ego_map_logits = actor_critic_output.extras[self.map_logits_uuid]
        vision_range = ego_map_logits.shape[-1]
        ego_map_logits = ego_map_logits.view(-1, c, vision_range, vision_range)

        assert ego_map_gt.shape == ego_map_logits.shape

        ego_map_gt_thresholded = (ego_map_gt > 0.5).float()
        total_loss = F.binary_cross_entropy_with_logits(
            ego_map_logits, ego_map_gt_thresholded
        )

        return (
            total_loss,
            {"binned_pc_map_ce": total_loss.item()},
        )

        # FOR DEBUGGING: Save all the ground-truth & predicted maps side by side
        # import numpy as np
        # import imageio
        # for i in range(ego_map_gt_thresholded.shape[0]):
        #     a = ego_map_gt_thresholded[i].permute(1, 2, 0).flip(0).detach().numpy()
        #     b = torch.sigmoid(ego_map_logits)[i].permute(1, 2, 0).flip(0).detach().numpy()
        #
        #     imageio.imwrite(
        #         f"z_occupancy_maps/{i}.png",
        #         np.concatenate((a, 1 + 0 * a[:, :10], b), axis=1),
        #     )


class SemanticMapFocalLoss(AbstractActorCriticLoss):
    """A (focal-loss based) loss for training metric maps for free space
    prediction.

    As semantic maps tend to be quite sparse this loss uses the focal
    loss (https://arxiv.org/abs/1708.02002) rather than binary cross
    entropy (BCE). If the `gamma` parameter is 0.0 then this is just the
    normal BCE, larger values of `gamma` result less and less emphasis
    being paid to examples that are already well classified.
    """

    def __init__(
        self, semantic_map_uuid: str, map_logits_uuid: str, gamma: float = 2.0
    ):
        """Initializer.

        # Parameters
        semantic_map_uuid : The uuid of a sensor returning
            a dictionary with an "egocentric_update"
            key with the same format as returned by
            `allenact.embodied_ai.mapping_utils.map_builders.SemanticMapBuilder`. Such a sensor
            can be found in the `allenact_plugins` library: see
            `allenact_plugins.ithor_plugin.ithor_sensors.SemanticMapTHORSensor`.
        map_logits_uuid : key used to index into `actor_critic_output.extras` (returned by the model)
            whose value should be a tensor of the same shape as the tensor corresponding to the above
            "egocentric_update" key.
        """
        super().__init__()
        assert gamma >= 0, f"`gamma` (=={gamma}) must be >= 0"
        self.semantic_map_uuid = semantic_map_uuid
        self.map_logits_uuid = map_logits_uuid
        self.gamma = gamma

    def loss(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
        *args,
        **kwargs,
    ):
        ego_map_gt = batch["observations"][self.semantic_map_uuid]["egocentric_update"]
        ego_map_gt = (
            ego_map_gt.view(-1, *ego_map_gt.shape[-3:]).permute(0, 3, 1, 2).contiguous()
        )

        ego_map_logits = actor_critic_output.extras[self.map_logits_uuid]
        ego_map_logits = ego_map_logits.view(-1, *ego_map_logits.shape[-3:])

        assert ego_map_gt.shape == ego_map_logits.shape

        p = torch.sigmoid(ego_map_logits)
        one_minus_p = torch.sigmoid(-ego_map_logits)

        log_p = F.logsigmoid(ego_map_logits)
        log_one_minus_p = F.logsigmoid(-ego_map_logits)

        ego_map_gt = ego_map_gt.float()
        total_loss = -(
            ego_map_gt * (log_p * (one_minus_p**self.gamma))
            + (1 - ego_map_gt) * (log_one_minus_p * (p**self.gamma))
        ).mean()

        return (
            total_loss,
            {"sem_map_focal_loss": total_loss.item()},
        )

        # FOR DEBUGGING: Save all the ground-truth & predicted maps side by side
        # import numpy as np
        # import imageio
        # from allenact.embodiedai.mapping.mapping_utils.map_builders import SemanticMapBuilder
        #
        # print("\n" * 3)
        # for i in range(ego_map_gt.shape[0]):
        #     pred_sem_map = torch.sigmoid(ego_map_logits)[i].permute(1, 2, 0).flip(0).detach()
        #     a = SemanticMapBuilder.randomly_color_semantic_map(ego_map_gt[i].permute(1, 2, 0).flip(0).detach())
        #     b = SemanticMapBuilder.randomly_color_semantic_map(pred_sem_map)
        #     imageio.imwrite(
        #         f"z_semantic_maps/{i}.png",
        #         np.concatenate((a, 255 + a[:, :10] * 0, b), axis=1),
        #     )
        #


================================================
FILE: allenact/embodiedai/mapping/mapping_models/__init__.py
================================================


================================================
FILE: allenact/embodiedai/mapping/mapping_models/active_neural_slam.py
================================================
# MIT License
#
# Original Copyright (c) 2020 Devendra Chaplot
#
# Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import math
from typing import Optional, Tuple, Dict, Any, cast

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

from allenact.utils.model_utils import simple_conv_and_linear_weights_init

DEGREES_TO_RADIANS = np.pi / 180.0
RADIANS_TO_DEGREES = 180.0 / np.pi


def _inv_sigmoid(x: torch.Tensor):
    return torch.log(x) - torch.log1p(-x)


class ActiveNeuralSLAM(nn.Module):
    """Active Neural SLAM module.

    This is an implementation of the Active Neural SLAM module
    from:
    ```
    Chaplot, D.S., Gandhi, D., Gupta, S., Gupta, A. and Salakhutdinov, R., 2020.
    Learning To Explore Using Active Neural SLAM.
    In International Conference on Learning Representations (ICLR).
    ```
    Note that this is purely the mapping component and does not include the planning
    components from the above paper.

    This implementation is adapted from `https://github.com/devendrachaplot/Neural-SLAM`,
    we have extended this implementation to allow for an arbitrary number of output map
    channels (enabling semantic mapping).

    At a high level, this model takes as input RGB egocentric images and outputs metric
    map tensors of shape (# channels) x height x width where height/width correspond to the
    ground plane of the environment.
    """

    def __init__(
        self,
        frame_height: int,
        frame_width: int,
        n_map_channels: int,
        resolution_in_cm: int = 5,
        map_size_in_cm: int = 2400,
        vision_range_in_cm: int = 300,
        use_pose_estimation: bool = False,
        pretrained_resnet: bool = True,
        freeze_resnet_batchnorm: bool = True,
        use_resnet_layernorm: bool = False,
    ):
        """Initialize an Active Neural SLAM module.

        # Parameters

        frame_height : The height of the RGB images given to this module on calls to `forward`.
        frame_width : The width of the RGB images given to this module on calls to `forward`.
        n_map_channels : The number of output channels in the output maps.
        resolution_in_cm : The resolution of the output map, see `map_size_in_cm`.
        map_size_in_cm : The height & width of the map in centimeters. The size of the map
            tensor returned on calls to forward will be `map_size_in_cm/resolution_in_cm`. Note
            that `map_size_in_cm` must be an divisible by resolution_in_cm.
        vision_range_in_cm : Given an RGB image input, this module will transform this image into
            an "egocentric map" with height and width equaling `vision_range_in_cm/resolution_in_cm`.
            This egocentr map corresponds to the area of the world directly in front of the agent.
            This "egocentric map" will be rotated/translated into the allocentric reference frame and
            used to update the larger, allocentric, map whose
            height and width equal `map_size_in_cm/resolution_in_cm`. Thus this parameter controls
            how much of the map will be updated on every step.
        use_pose_estimation : Whether or not we should estimate the agent's change in position/rotation.
            If `False`, you'll need to provide the ground truth changes in position/rotation.
        pretrained_resnet : Whether or not to use ImageNet pre-trained model weights for the ResNet18
            backbone.
        freeze_resnet_batchnorm : Whether or not the batch normalization layers in the ResNet18 backbone
            should be frozen and batchnorm updates disabled. You almost certainly want this to be `True`
            as using batch normalization during RL training results in all sorts of issues unless you're
            very careful.
        use_resnet_layernorm : If you've enabled `freeze_resnet_batchnorm` (recommended) you'll likely want
            to normalize the output from the ResNet18 model as we've found that these values can otherwise
            grow quite large harming learning.
        """
        super(ActiveNeuralSLAM, self).__init__()
        self.frame_height = frame_height
        self.frame_width = frame_width
        self.n_map_channels = n_map_channels
        self.resolution_in_cm = resolution_in_cm
        self.map_size_in_cm = map_size_in_cm
        self.input_channels = 3
        self.vision_range_in_cm = vision_range_in_cm
        self.dropout = 0.5
        self.use_pose_estimation = use_pose_estimation
        self.freeze_resnet_batchnorm = freeze_resnet_batchnorm

        self.max_abs_map_logit_value = 20

        # Visual Encoding
        resnet = models.resnet18(pretrained=pretrained_resnet)
        self.resnet_l5 = nn.Sequential(*list(resnet.children())[0:8])
        self.conv = nn.Sequential(
            *filter(bool, [nn.Conv2d(512, 64, (1, 1), stride=(1, 1)), nn.ReLU()])
        )
        self.bn_modules = [
            module
            for module in self.resnet_l5.modules()
            if "BatchNorm" in type(module).__name__
        ]
        if freeze_resnet_batchnorm:
            for bn in self.bn_modules:
                bn.momentum = 0

        # Layernorm (if requested)
        self.use_resnet_layernorm = use_resnet_layernorm
        if self.use_resnet_layernorm:
            assert (
                self.freeze_resnet_batchnorm
            ), "When using layernorm, we require that set `freeze_resnet_batchnorm` to True."
            self.resnet_normalizer = nn.Sequential(
                nn.Conv2d(512, 512, 1),
                nn.LayerNorm(
                    normalized_shape=[512, 7, 7],
                    elementwise_affine=True,
                ),
            )
            self.resnet_normalizer.apply(simple_conv_and_linear_weights_init)
        else:
            self.resnet_normalizer = nn.Identity()

        # convolution output size
        input_test = torch.randn(
            1, self.input_channels, self.frame_height, self.frame_width
        )
        # Have to explicitly call .forward to get past LGTM checks as it thinks nn.Sequential isn't callable
        conv_output = self.conv.forward(self.resnet_l5.forward(input_test))

        self.conv_output_size = conv_output.view(-1).size(0)

        # projection layer
        self.proj1 = nn.Linear(self.conv_output_size, 1024)
        assert self.vision_range % 8 == 0
        self.deconv_in_height = self.vision_range // 8
        self.deconv_in_width = self.deconv_in_height
        self.n_input_channels_for_deconv = 64
        proj2_out_size = 64 * self.deconv_in_height * self.deconv_in_width
        self.proj2 = nn.Linear(1024, proj2_out_size)

        if self.dropout > 0:
            self.dropout1 = nn.Dropout(self.dropout)
            self.dropout2 = nn.Dropout(self.dropout)

        # Deconv layers to predict map
        self.deconv = nn.Sequential(
            *filter(
                bool,
                [
                    nn.ConvTranspose2d(
                        self.n_input_channels_for_deconv,
                        32,
                        (4, 4),
                        stride=(2, 2),
                        padding=(1, 1),
                    ),
                    nn.ReLU(),
                    nn.ConvTranspose2d(32, 16, (4, 4), stride=(2, 2), padding=(1, 1)),
                    nn.ReLU(),
                    nn.ConvTranspose2d(
                        16, self.n_map_channels, (4, 4), stride=(2, 2), padding=(1, 1)
                    ),
                ],
            )
        )

        # Pose Estimator
        self.pose_conv = nn.Sequential(
            nn.Conv2d(2 * self.n_map_channels, 64, (4, 4), stride=(2, 2)),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 32, (4, 4), stride=(2, 2)),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 16, (3, 3), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Flatten(),
        )

        self.pose_conv_output_dim = (
            self.pose_conv.forward(
                torch.zeros(
                    1, 2 * self.n_map_channels, self.vision_range, self.vision_range
                )
            )
            .view(-1)
            .size(0)
        )

        # projection layer
        self.pose_proj1 = nn.Linear(self.pose_conv_output_dim, 1024)
        self.pose_proj2_x = nn.Linear(1024, 128)
        self.pose_proj2_z = nn.Linear(1024, 128)
        self.pose_proj2_o = nn.Linear(1024, 128)
        self.pose_proj3_x = nn.Linear(128, 1)
        self.pose_proj3_y = nn.Linear(128, 1)
        self.pose_proj3_o = nn.Linear(128, 1)

        if self.dropout > 0:
            self.pose_dropout1 = nn.Dropout(self.dropout)

        self.train()

    @property
    def device(self):
        d = self.pose_proj1.weight.get_device()
        if d < 0:
            return torch.device("cpu")
        return torch.device(d)

    def train(self, mode: bool = True):
        super().train(mode=mode)
        if mode and self.freeze_resnet_batchnorm:
            for module in self.bn_modules:
                module.eval()

    @property
    def map_size(self):
        return self.map_size_in_cm // self.resolution_in_cm

    @property
    def vision_range(self):
        return self.vision_range_in_cm // self.resolution_in_cm

    def image_to_egocentric_map_logits(
        self,
        images: Optional[torch.Tensor],
        resnet_image_features: Optional[torch.Tensor] = None,
    ):
        if resnet_image_features is None:
            bs, _, _, _ = images.size()
            resnet_image_features = self.resnet_normalizer(
                self.resnet_l5(images[:, :3, :, :])
            )
        else:
            bs = resnet_image_features.shape[0]

        conv_output = self.conv(resnet_image_features)

        proj1 = F.relu(self.proj1(conv_output.reshape(-1, self.conv_output_size)))
        if self.dropout > 0:
            proj1 = self.dropout1(proj1)
        proj3 = F.relu(self.proj2(proj1))

        deconv_input = proj3.view(
            bs,
            self.n_input_channels_for_deconv,
            self.deconv_in_height,
            self.deconv_in_width,
        )
        deconv_output = self.deconv(deconv_input)
        return deconv_output

    def allocentric_map_to_egocentric_view(
        self, allocentric_map: torch.Tensor, xzr: torch.Tensor, padding_mode: str
    ):
        # Index the egocentric viewpoints at the given xzr locations
        with torch.no_grad():
            allocentric_map = allocentric_map.float()
            xzr = xzr.float()

            theta = xzr[:, 2].float() * float(np.pi / 180)

            # Here form the rotation matrix
            cos_theta = torch.cos(theta)
            sin_theta = torch.sin(theta)
            rot_mat = torch.stack(
                (
                    torch.stack((cos_theta, -sin_theta), -1),
                    torch.stack((sin_theta, cos_theta), -1),
                ),
                1,
            )

            scaler = 2 * (100 / (self.resolution_in_cm * self.map_size))
            offset_to_center_the_agent = scaler * xzr[:, :2].unsqueeze(-1) - 1

            offset_to_top_of_image = rot_mat @ torch.FloatTensor([0, 1.0]).unsqueeze(
                1
            ).to(self.device)
            rotation_and_translate_mat = torch.cat(
                (
                    rot_mat,
                    offset_to_top_of_image + offset_to_center_the_agent,
                ),
                dim=-1,
            )

            ego_map = F.grid_sample(
                allocentric_map,
                F.affine_grid(
                    rotation_and_translate_mat.to(self.device),
                    allocentric_map.shape,
                ),
                padding_mode=padding_mode,
                align_corners=False,
            )

            vr = self.vision_range
            half_vr = vr // 2
            center = self.map_size_in_cm // (2 * self.resolution_in_cm)
            cropped = ego_map[:, :, :vr, (center - half_vr) : (center + half_vr)]
            return cropped

    def estimate_egocentric_dx_dz_dr(
        self,
        map_probs_egocentric: torch.Tensor,
        last_map_probs_egocentric: torch.Tensor,
    ):
        assert last_map_probs_egocentric.shape == map_probs_egocentric.shape

        pose_est_input = torch.cat(
            (map_probs_egocentric.detach(), last_map_probs_egocentric.detach()), dim=1
        )
        pose_conv_output = self.pose_conv(pose_est_input)

        proj1 = F.relu(self.pose_proj1(pose_conv_output))

        if self.dropout > 0:
            proj1 = self.pose_dropout1(proj1)

        proj2_x = F.relu(self.pose_proj2_x(proj1))
        pred_dx = self.pose_proj3_x(proj2_x)

        proj2_z = F.relu(self.pose_proj2_z(proj1))
        pred_dz = self.pose_proj3_y(proj2_z)

        proj2_o = F.relu(self.pose_proj2_o(proj1))
        pred_do = self.pose_proj3_o(proj2_o)

        return torch.cat((pred_dx, pred_dz, pred_do), dim=1)

    @staticmethod
    def update_allocentric_xzrs_with_egocentric_movement(
        last_xzrs_allocentric: torch.Tensor,
        dx_dz_drs_egocentric: torch.Tensor,
    ):
        new_xzrs_allocentric = last_xzrs_allocentric.clone()

        theta = new_xzrs_allocentric[:, 2] * DEGREES_TO_RADIANS
        sin_theta = torch.sin(theta)
        cos_theta = torch.cos(theta)
        new_xzrs_allocentric[:, :2] += torch.matmul(
            torch.stack([cos_theta, -sin_theta, sin_theta, cos_theta], dim=-1).view(
                -1, 2, 2
            ),
            dx_dz_drs_egocentric[:, :2].unsqueeze(-1),
        ).squeeze(-1)

        new_xzrs_allocentric[:, 2] += dx_dz_drs_egocentric[:, 2]
        new_xzrs_allocentric[:, 2] = (
            torch.fmod(new_xzrs_allocentric[:, 2] - 180.0, 360.0) + 180.0
        )
        new_xzrs_allocentric[:, 2] = (
            torch.fmod(new_xzrs_allocentric[:, 2] + 180.0, 360.0) - 180.0
        )

        return new_xzrs_allocentric

    def forward(
        self,
        images: Optional[torch.Tensor],
        last_map_probs_allocentric: Optional[torch.Tensor],
        last_xzrs_allocentric: Optional[torch.Tensor],
        dx_dz_drs_egocentric: Optional[torch.Tensor],
        last_map_logits_egocentric: Optional[torch.Tensor],
        return_allocentric_maps=True,
        resnet_image_features: Optional[torch.Tensor] = None,
    ) -> Dict[str, Any]:
        """Create allocentric/egocentric maps predictions given RGB image
        inputs.

        Here it is assumed that `last_xzrs_allocentric` has been re-centered so that (x, z) == (0,0)
        corresponds to the top left of the returned map (with increasing x/z moving to the bottom right of the map).

        Note that all maps are oriented so that:
        * **Increasing x values** correspond to **increasing columns** in the map(s).
        * **Increasing z values** correspond to **increasing rows** in the map(s).
        Note that this may seem a bit weird as:
        * "north" is pointing downwards in the map,
        * if you picture yourself as the agent facing north (i.e. down) in the map, then moving to the right from
            the agent's perspective will correspond to **increasing** which column the agent is at:
        ```
        agent facing downwards - - > (dir. to the right of the agent, i.e. moving right corresponds to +cols)
            |
            |
            v (dir. agent faces, i.e. moving ahead corresponds to +rows)
        ```
            This may be the opposite of what you expect.

        # Parameters
        images : A (# batches) x 3 x height x width tensor of RGB images. These should be
            normalized for use with a resnet model. See [here](https_DOC_COLON_//pytorch.org/vision/stable/models.html)
            for information (see also the `use_resnet_normalization` parameter of the
            `allenact.base_abstractions.sensor.RGBSensor` sensor).
        last_map_probs_allocentric : A (# batches) x (map channels) x (map height) x (map width)
            tensor representing the colllection of allocentric maps to be updated.
        last_xzrs_allocentric : A (# batches) x 3 tensor where `last_xzrs_allocentric[_DOC_COLON_, 0]`
            are the agent's (allocentric) x-coordinates on the previous step,
            `last_xzrs_allocentric[_DOC_COLON_, 1]` are the agent's (allocentric) z-coordinates from the previous
            step, and `last_xzrs_allocentric[_DOC_COLON_, 2]` are the agent's rotations (allocentric, in degrees)
            from the prevoius step.
        dx_dz_drs_egocentric : A (# batches) x 3 tensor representing the agent's change in x (in meters), z (in meters),
            and rotation (in degrees) from the previous step. Note that these changes are "egocentric" so that if the
            agent moved 1 meter ahead from it's perspective this should correspond to a dz of +1.0 regardless of
            the agent's orientation (similarly moving right would result in a dx of +1.0). This
            is ignored (and thus can be `None`) if you are using pose estimation
            (i.e. `self.use_pose_estimation` is `True`) or if `return_allocentric_maps` is `False`.
        last_map_logits_egocentric : The "egocentric_update" output when calling this function
            on the last agent's step. I.e. this should be the egocentric map view of the agent
            from the last step. This is used to compute the change in the agent's position rotation.
            This is ignored (and thus can be `None`) if you do not wish to estimate the agent's pose
            (i.e. `self.use_pose_estimation` is `False`).
        return_allocentric_maps : Whether or not to generate new allocentric maps given `last_map_probs_allocentric`
            and the new map estimates. Creating these new allocentric maps is expensive so better avoided when
            not needed.
        resnet_image_features : Sometimes you may wish to compute the ResNet image features yourself for use
            in another part of your model. Rather than having to recompute them multiple times, you can
            instead compute them once and pass them into this forward call (in this case the input `images`
            parameter is ignored). Note that if you're using the `self.resnet_l5` module to compute these
            features, be sure to also normalize them with `self.resnet_normalizer` if you have opted to
            `use_resnet_layernorm` when initializing this module).

        # Returns
        A dictionary with keys/values:
        * "egocentric_update" - The egocentric map view for the given RGB image. This is what should
            be used for computing losses in general.
        * "map_logits_probs_update_no_grad" - The egocentric map view after it has been
            rotated, translated, and moved into a full-sized allocentric map. This map has been
            detached from the computation graph and so should not be used for gradient computations.
            This will be `None` if `return_allocentric_maps` was `False`.
        * "map_logits_probs_no_grad" - The newly updated allocentric map, this corresponds to
            performing a pointwise maximum between `last_map_probs_allocentric` and the
            above returned `map_probs_allocentric_update_no_grad`.
            This will be `None` if `return_allocentric_maps` was `False`.
        * "dx_dz_dr_egocentric_preds" - The predicted change in x, z, and rotation of the agent (from the
            egocentric perspective of the agent).
        *  "xzr_allocentric_preds" - The (predicted if `self.use_pose_estimation == True`) allocentric
            (x, z) position and rotation of the agent. This will equal `None` if `self.use_pose_estimation == False`
            and `dx_dz_drs_egocentric` is `None`.
        """
        # TODO: For consistency we should update things so that:
        #  "Furthermore, the rotation component of `last_xzrs_allocentric` and `dx_dz_drs_egocentric`
        #  should be specified in **degrees* with positive rotation corresponding to a **CLOCKWISE**
        #  rotation (this is the default used by the many game engines)."
        map_logits_egocentric = self.image_to_egocentric_map_logits(
            images=images, resnet_image_features=resnet_image_features
        )
        map_probs_egocentric = torch.sigmoid(map_logits_egocentric)

        dx_dz_dr_egocentric_preds = None
        if last_map_logits_egocentric is not None:
            dx_dz_dr_egocentric_preds = self.estimate_egocentric_dx_dz_dr(
                map_probs_egocentric=map_probs_egocentric,
                last_map_probs_egocentric=torch.sigmoid(last_map_logits_egocentric),
            )

        if self.use_pose_estimation:
            updated_xzrs_allocentrc = (
                self.update_allocentric_xzrs_with_egocentric_movement(
                    last_xzrs_allocentric=last_xzrs_allocentric,
                    dx_dz_drs_egocentric=dx_dz_dr_egocentric_preds,
                )
            )
        elif dx_dz_drs_egocentric is not None:
            updated_xzrs_allocentrc = (
                self.update_allocentric_xzrs_with_egocentric_movement(
                    last_xzrs_allocentric=last_xzrs_allocentric,
                    dx_dz_drs_egocentric=dx_dz_drs_egocentric,
                )
            )
        else:
            updated_xzrs_allocentrc = None

        if return_allocentric_maps:
            # Aggregate egocentric map prediction in the allocentric map
            # using the predicted pose (if `self.use_pose_estimation`) or the ground
            # truth pose (if not `self.use_pose_estimation`)
            with torch.no_grad():
                # Rotate and translate the egocentric map view, we do this grid sampling
                # at the level of probabilities as bad results can occur at the logit level
                full_size_allocentric_map_probs_update = (
                    _move_egocentric_map_view_into_allocentric_position(
                        map_probs_egocentric=map_probs_egocentric,
                        xzrs_allocentric=updated_xzrs_allocentrc,
                        allocentric_map_height_width=(self.map_size, self.map_size),
                        resolution_in_cm=self.resolution_in_cm,
                    )
                )

                map_probs_allocentric = torch.max(
                    last_map_probs_allocentric, full_size_allocentric_map_probs_update
                )
        else:
            full_size_allocentric_map_probs_update = None
            map_probs_allocentric = None

        return {
            "egocentric_update": map_logits_egocentric,
            "map_probs_allocentric_update_no_grad": full_size_allocentric_map_probs_update,
            "map_probs_allocentric_no_grad": map_probs_allocentric,
            "dx_dz_dr_egocentric_preds": dx_dz_dr_egocentric_preds,
            "xzr_allocentric_preds": updated_xzrs_allocentrc,
        }


def _move_egocentric_map_view_into_allocentric_position(
    map_probs_egocentric: torch.Tensor,
    xzrs_allocentric: torch.Tensor,
    allocentric_map_height_width: Tuple[int, int],
    resolution_in_cm: float,
):
    """Translate/rotate an egocentric map view into an allocentric map.

    Let's say you have a collection of egocentric maps in a tensor of shape
    `(# batches) x (# channels) x (# ego rows) x (# ego columns)`
    where these are "egocentric" as we assume the agent is always
    at the center of the map and facing "downwards", namely
    * **ahead** of the agent should correspond to **increasing rows** in the map(s).
    * **right** of the agent should correspond to **increasing columns** in the map(s).
    Note that the above is a bit weird as, if you picture yourself as the agent facing
    downwards in the map, then moving to the right from the agent perspective. Here's how things
    should look if you plotted one of these egocentric maps:
    ```
    center of map - - > (dir. to the right of the agent, i.e. moving right corresponds to +cols)
        |
        |
        v (dir. agent faces, i.e. moving ahead corresponds to +rows)
    ```

    This function is used to translate/rotate the above ego maps so that
    they are in the right position/rotation in an allocentric map of size
    `(# batches) x (# channels) x (# allocentric_map_height_width[0]) x (# allocentric_map_height_width[1])`.

    Adapted from the get_grid function in https://github.com/devendrachaplot/Neural-SLAM.

    # Parameters
    map_probs_egocentric : Egocentric map views.
    xzrs_allocentric : (# batches)x3 tensor with `xzrs_allocentric[:, 0]` being the x-coordinates (in meters),
        `xzrs_allocentric[:, 1]` being the z-coordinates (in meters), and `xzrs_allocentric[:, 2]` being the rotation
        (in degrees) of the agent in the allocentric reference frame. Here it is assumed that `xzrs_allocentric` has
        been re-centered so that (x, z) == (0,0) corresponds to the top left of the returned map (with increasing
        x/z moving to the bottom right of the map). Note that positive rotations are in the counterclockwise direction.
    allocentric_map_height_width : Height/width of the allocentric map to be returned
    resolution_in_cm : Resolution (in cm) of map to be returned (and of map_probs_egocentric). I.e.
        `map_probs_egocentric[0,0,0:1,0:1]` should correspond to a `resolution_in_cm x resolution_in_cm`
        square on the ground plane in the world.

    # Returns
    `(# batches) x (# channels) x (# allocentric_map_height_width[0]) x (# allocentric_map_height_width[1])`
    tensor where the input `map_probs_egocentric` maps have been rotated/translated so that they
    are in the positions specified by `xzrs_allocentric`.
    """
    # TODO: For consistency we should update the rotations so they are in the clockwise direction.

    # First we place the egocentric map view into the center
    # of a map that has the same size as the allocentric map

    nbatch, c, ego_h, ego_w = cast(
        Tuple[int, int, int, int], map_probs_egocentric.shape
    )
    allo_h, allo_w = allocentric_map_height_width

    max_view_range = math.sqrt((ego_w / 2.0) ** 2 + ego_h**2)
    if min(allo_h, allo_w) / 2.0 < max_view_range:
        raise NotImplementedError(
            f"The shape of your egocentric view (ego_h, ego_w)==({ego_h, ego_w})"
            f" is too large relative the size of the allocentric map (allo_h, allo_w)==({allo_h}, {allo_w})."
            f" The height/width of your allocentric map should be at least {2 * max_view_range} to allow"
            f" for no information to be lost when rotating the egocentric map."
        )

    full_size_ego_map_update_probs = map_probs_egocentric.new(
        nbatch, c, *allocentric_map_height_width
    ).fill_(0)

    assert (ego_h % 2, ego_w % 2, allo_h % 2, allo_w % 2) == (
        0,
    ) * 4, "All map heights/widths should be divisible by 2."

    x1 = allo_w // 2 - ego_w // 2
    x2 = x1 + ego_w
    z1 = allo_h // 2
    z2 = z1 + ego_h
    full_size_ego_map_update_probs[:, :, z1:z2, x1:x2] = map_probs_egocentric

    # Now we'll rotate and translate `full_size_ego_map_update_probs`
    # so that the egocentric map view is positioned where it should be
    # in the allocentric coordinate frame

    # To do this we first need to rescale our allocentric xz coordinates
    # so that the center of the map is (0,0) and the top left corner is (-1, -1)
    # as this is what's expected by the `affine_grid` function below.
    rescaled_xzrs_allocentric = xzrs_allocentric.clone().detach().float()
    rescaled_xzrs_allocentric[:, :2] *= (
        100.0 / resolution_in_cm
    )  # Put x / z into map units rather than meters
    rescaled_xzrs_allocentric[:, 0] /= allo_w / 2  # x corresponds to columns
    rescaled_xzrs_allocentric[:, 1] /= allo_h / 2  # z corresponds to rows
    rescaled_xzrs_allocentric[:, :2] -= 1.0  # Re-center

    x = rescaled_xzrs_allocentric[:, 0]
    z = rescaled_xzrs_allocentric[:, 1]
    theta = (
        -rescaled_xzrs_allocentric[:, 2] * DEGREES_TO_RADIANS
    )  # Notice the negative sign

    cos_theta = theta.cos()
    sin_theta = theta.sin()
    zeroes = torch.zeros_like(cos_theta)
    ones = torch.ones_like(cos_theta)

    theta11 = torch.stack([cos_theta, -sin_theta, zeroes], 1)
    theta12 = torch.stack([sin_theta, cos_theta, zeroes], 1)
    theta1 = torch.stack([theta11, theta12], 1)

    theta21 = torch.stack([ones, zeroes, x], 1)
    theta22 = torch.stack([zeroes, ones, z], 1)
    theta2 = torch.stack([theta21, theta22], 1)

    grid_size = [nbatch, c, allo_h, allo_w]
    rot_grid = F.affine_grid(theta1, grid_size)
    trans_grid = F.affine_grid(theta2, grid_size)

    return F.grid_sample(
        F.grid_sample(
            full_size_ego_map_update_probs,
            rot_grid,
            padding_mode="zeros",
            align_corners=False,
        ),
        trans_grid,
        padding_mode="zeros",
        align_corners=False,
    )


================================================
FILE: allenact/embodiedai/mapping/mapping_utils/__init__.py
================================================


================================================
FILE: allenact/embodiedai/mapping/mapping_utils/map_builders.py
================================================
# MIT License
#
# Original Copyright (c) 2020 Devendra Chaplot
#
# Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import random
from typing import Optional, Sequence, Union, Dict

import cv2
import numpy as np
import torch
import torch.nn.functional as F

from allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import (
    depth_frame_to_world_space_xyz,
    project_point_cloud_to_map,
)


class BinnedPointCloudMapBuilder(object):
    """Class used to iteratively construct a map of "free space" based on input
    depth maps (i.e. pointclouds).

    Adapted from https://github.com/devendrachaplot/Neural-SLAM

    This class can be used to (iteratively) construct a metric map of free space in an environment as
    an agent moves around. After every step the agent takes, you should call the `update` function and
    pass the agent's egocentric depth image along with the agent's new position. This depth map will
    be converted into a pointcloud, binned along the up/down axis, and then projected
    onto a 3-dimensional tensor of shape (HxWxC) whose where HxW represent the ground plane
    and where C equals the number of bins the up-down coordinate was binned into. This 3d map counts the
    number of points in each bin. Thus a lack of points within a region can be used to infer that
    that region is free space.

    # Attributes

    fov : FOV of the camera used to produce the depth images given when calling `update`.
    vision_range_in_map_units : The maximum distance (in number of rows/columns) that will
        be updated when calling `update`, points outside of this map vision range are ignored.
    map_size_in_cm : Total map size in cm.
    resolution_in_cm : Number of cm per row/column in the map.
    height_bins : The bins used to bin the up-down coordinate (for us the y-coordinate). For example,
        if `height_bins = [0.1, 1]` then
        all y-values < 0.1 will be mapped to 0, all y values in [0.1, 1) will be mapped to 1, and
        all y-values >= 1 will be mapped to 2.
        **Importantly:** these y-values will first be recentered by the `min_xyz` value passed when
        calling `reset(...)`.
    device : A `torch.device` on which to run computations. If this device is a GPU you can potentially
        obtain significant speed-ups.
    """

    def __init__(
        self,
        fov: float,
        vision_range_in_cm: int,
        map_size_in_cm: int,
        resolution_in_cm: int,
        height_bins: Sequence[float],
        return_egocentric_local_context: bool = False,
        device: torch.device = torch.device("cpu"),
    ):
        assert vision_range_in_cm % resolution_in_cm == 0

        self.fov = fov
        self.vision_range_in_map_units = vision_range_in_cm // resolution_in_cm
        self.map_size_in_cm = map_size_in_cm
        self.resolution_in_cm = resolution_in_cm
        self.height_bins = height_bins
        self.device = device
        self.return_egocentric_local_context = return_egocentric_local_context

        self.binned_point_cloud_map = np.zeros(
            (
                self.map_size_in_cm // self.resolution_in_cm,
                self.map_size_in_cm // self.resolution_in_cm,
                len(self.height_bins) + 1,
            ),
            dtype=np.float32,
        )

        self.min_xyz: Optional[np.ndarray] = None

    def update(
        self,
        depth_frame: np.ndarray,
        camera_xyz: np.ndarray,
        camera_rotation: float,
        camera_horizon: float,
    ) -> Dict[str, np.ndarray]:
        """Updates the map with the input depth frame from the agent.

        See the `allenact.embodiedai.mapping.mapping_utils.point_cloud_utils.project_point_cloud_to_map`
        function for more information input parameter definitions. **We assume that the input
        `depth_frame` has depths recorded in meters**.

        # Returns
        Let `map_size = self.map_size_in_cm // self.resolution_in_cm`. Returns a dictionary with keys-values:

        * `"egocentric_update"` - A tensor of shape
            `(vision_range_in_map_units)x(vision_range_in_map_units)x(len(self.height_bins) + 1)` corresponding
            to the binned pointcloud after having been centered on the agent and rotated so that
            points ahead of the agent correspond to larger row indices and points further to the right of the agent
            correspond to larger column indices. Note that by "centered" we mean that one can picture
             the agent as being positioned at (0, vision_range_in_map_units/2) and facing downward. Each entry in this tensor
             is a count equaling the number of points in the pointcloud that, once binned, fell into this
            entry. This is likely the output you want to use if you want to build a model to predict free space from an image.
        * `"allocentric_update"` - A `(map_size)x(map_size)x(len(self.height_bins) + 1)` corresponding
            to `"egocentric_update"` but rotated to the world-space coordinates. This `allocentric_update`
             is what is used to update the internally stored representation of the map.
        *  `"map"` -  A `(map_size)x(map_size)x(len(self.height_bins) + 1)` tensor corresponding
            to the sum of all `"allocentric_update"` values since the last `reset()`.
        ```
        """
        with torch.no_grad():
            assert self.min_xyz is not None, "Please call `reset` before `update`."

            camera_xyz = (
                torch.from_numpy(camera_xyz - self.min_xyz).float().to(self.device)
            )

            try:
                depth_frame = torch.from_numpy(depth_frame).to(self.device)
            except ValueError:
                depth_frame = torch.from_numpy(depth_frame.copy()).to(self.device)

            depth_frame[
                depth_frame
                > self.vision_range_in_map_units * self.resolution_in_cm / 100
            ] = np.NaN

            world_space_point_cloud = depth_frame_to_world_space_xyz(
                depth_frame=depth_frame,
                camera_world_xyz=camera_xyz,
                rotation=camera_rotation,
                horizon=camera_horizon,
                fov=self.fov,
            )

            world_binned_map_update = project_point_cloud_to_map(
                xyz_points=world_space_point_cloud,
                bin_axis="y",
                bins=self.height_bins,
                map_size=self.binned_point_cloud_map.shape[0],
                resolution_in_cm=self.resolution_in_cm,
                flip_row_col=True,
            )

            # Center the cloud on the agent
            recentered_point_cloud = world_space_point_cloud - (
                torch.FloatTensor([1.0, 0.0, 1.0]).to(self.device) * camera_xyz
            ).reshape((1, 1, 3))
            # Rotate the cloud so that positive-z is the direction the agent is looking
            theta = (
                np.pi * camera_rotation / 180
            )  # No negative since THOR rotations are already backwards
            cos_theta = np.cos(theta)
            sin_theta = np.sin(theta)
            rotation_transform = torch.FloatTensor(
                [
                    [cos_theta, 0, -sin_theta],
                    [0, 1, 0],  # unchanged
                    [sin_theta, 0, cos_theta],
                ]
            ).to(self.device)
            rotated_point_cloud = recentered_point_cloud @ rotation_transform.T
            xoffset = (self.map_size_in_cm / 100) / 2
            agent_centric_point_cloud = rotated_point_cloud + torch.FloatTensor(
                [xoffset, 0, 0]
            ).to(self.device)

            allocentric_update_numpy = world_binned_map_update.cpu().numpy()
            self.binned_point_cloud_map = (
                self.binned_point_cloud_map + allocentric_update_numpy
            )

            agent_centric_binned_map = project_point_cloud_to_map(
                xyz_points=agent_centric_point_cloud,
                bin_axis="y",
                bins=self.height_bins,
                map_size=self.binned_point_cloud_map.shape[0],
                resolution_in_cm=self.resolution_in_cm,
                flip_row_col=True,
            )
            vr = self.vision_range_in_map_units
            vr_div_2 = self.vision_range_in_map_units // 2
            width_div_2 = agent_centric_binned_map.shape[1] // 2
            agent_centric_binned_map = agent_centric_binned_map[
                :vr, (width_div_2 - vr_div_2) : (width_div_2 + vr_div_2), :
            ]

            to_return = {
                "egocentric_update": agent_centric_binned_map.cpu().numpy(),
                "allocentric_update": allocentric_update_numpy,
                "map": self.binned_point_cloud_map,
            }

            if self.return_egocentric_local_context:
                # See the update function of the semantic map sensor for in depth comments regarding the below
                # Essentially we are simply rotating the full map into the orientation of the agent and then
                # selecting a smaller region around the agent.
                theta = -np.pi * camera_rotation / 180
                cos_theta = np.cos(theta)
                sin_theta = np.sin(theta)
                rot_mat = torch.FloatTensor(
                    [[cos_theta, -sin_theta], [sin_theta, cos_theta]]
                ).to(self.device)

                move_back_offset = (
                    -0.5
                    * (self.vision_range_in_map_units * self.resolution_in_cm / 100)
                ) * (
                    rot_mat
                    @ torch.tensor(
                        [0, 1], dtype=torch.float, device=self.device
                    ).unsqueeze(-1)
                )

                map_size = self.binned_point_cloud_map.shape[0]
                scaler = 2 * (100 / (self.resolution_in_cm * map_size))
                offset_to_center_the_agent = (
                    scaler
                    * (
                        torch.tensor(
                            [
                                camera_xyz[0],
                                camera_xyz[2],
                            ],
                            dtype=torch.float,
                            device=self.device,
                        ).unsqueeze(-1)
                        + move_back_offset
                    )
                    - 1
                )
                offset_to_top_of_image = rot_mat @ torch.FloatTensor(
                    [0, 1.0]
                ).unsqueeze(1).to(self.device)
                rotation_and_translate_mat = torch.cat(
                    (
                        rot_mat,
                        offset_to_top_of_image + offset_to_center_the_agent,
                    ),
                    dim=1,
                )

                full_map_tensor = (
                    torch.tensor(
                        self.binned_point_cloud_map,
                        dtype=torch.float,
                        device=self.device,
                    )
                    .unsqueeze(0)
                    .permute(0, 3, 1, 2)
                )
                full_ego_map = (
                    F.grid_sample(
                        full_map_tensor,
                        F.affine_grid(
                            rotation_and_translate_mat.to(self.device).unsqueeze(0),
                            full_map_tensor.shape,
                            align_corners=False,
                        ),
                        align_corners=False,
                    )
                    .squeeze(0)
                    .permute(1, 2, 0)
                )

                egocentric_local_context = full_ego_map[
                    :vr, (width_div_2 - vr_div_2) : (width_div_2 + vr_div_2), :
                ]

                to_return["egocentric_local_context"] = (
                    egocentric_local_context.cpu().numpy()
                )

            return to_return

    def reset(self, min_xyz: np.ndarray):
        """Reset the map.

        Resets the internally stored map.

        # Parameters
        min_xyz : An array of size (3,) corresponding to the minimum possible x, y, and z values that will be observed
            as a point in a pointcloud when calling `.update(...)`. The (world-space) maps returned by calls to `update`
            will have been normalized so the (0,0,:) entry corresponds to these minimum values.
        """
        self.min_xyz = min_xyz
        self.binned_point_cloud_map = np.zeros_like(self.binned_point_cloud_map)


class ObjectHull2d:
    def __init__(
        self,
        object_id: str,
        object_type: str,
        hull_points: Union[np.ndarray, Sequence[Sequence[float]]],
    ):
        """A class used to represent 2d convex hulls of objects when projected
        to the ground plane.

        # Parameters
        object_id : A unique id for the object.
        object_type : The type of the object.
        hull_points : A Nx2 matrix with `hull_points[:, 0]` being the x coordinates and `hull_points[:, 1]` being
            the `z` coordinates (this is using the Unity game engine conventions where the `y` axis is up/down).
        """
        self.object_id = object_id
        self.object_type = object_type
        self.hull_points = (
            hull_points
            if isinstance(hull_points, np.ndarray)
            else np.array(hull_points)
        )


class SemanticMapBuilder(object):
    """Class used to iteratively construct a semantic map based on input depth
    maps (i.e. pointclouds).

    Adapted from https://github.com/devendrachaplot/Neural-SLAM

    This class can be used to (iteratively) construct a semantic map of objects in the environment.

    This map is similar to that generated by `BinnedPointCloudMapBuilder` (see its documentation for
    more information) but the various channels correspond to different object types. Thus
    if the `(i,j,k)` entry of a map generated by this function is `True`, this means that an
    object of type `k` is present in position `i,j` in the map. In particular, by "present" we mean that,
    after projecting the object to the ground plane and taking the convex hull of the resulting
    2d object, a non-trivial portion of this convex hull overlaps the `i,j` position.

    For attribute information, see the documentation of the `BinnedPointCloudMapBuilder` class. The
    only attribute present in this class that is not present in `BinnedPointCloudMapBuilder` is
    `ordered_object_types` which corresponds to a list of unique object types where
    object type `ordered_object_types[i]` will correspond to the `i`th channel of the map
    generated by this class.
    """

    def __init__(
        self,
        fov: float,
        vision_range_in_cm: int,
        map_size_in_cm: int,
        resolution_in_cm: int,
        ordered_object_types: Sequence[str],
        device: torch.device = torch.device("cpu"),
    ):
        self.fov = fov
        self.vision_range_in_map_units = vision_range_in_cm // resolution_in_cm
        self.map_size_in_cm = map_size_in_cm
        self.resolution_in_cm = resolution_in_cm
        self.ordered_object_types = tuple(ordered_object_types)
        self.device = device

        self.object_type_to_index = {
            ot: i for i, ot in enumerate(self.ordered_object_types)
        }

        self.ground_truth_semantic_map = np.zeros(
            (
                self.map_size_in_cm // self.resolution_in_cm,
                self.map_size_in_cm // self.resolution_in_cm,
                len(self.ordered_object_types),
            ),
            dtype=np.uint8,
        )
        self.explored_mask = np.zeros(
            (
                self.map_size_in_cm // self.resolution_in_cm,
                self.map_size_in_cm // self.resolution_in_cm,
                1,
            ),
            dtype=bool,
        )

        self.min_xyz: Optional[np.ndarray] = None

    @staticmethod
    def randomly_color_semantic_map(
        map: Union[np.ndarray, torch.Tensor], threshold: float = 0.5, seed: int = 1
    ) -> np.ndarray:
        if not isinstance(map, np.ndarray):
            map = np.array(map)

        rnd = random.Random(seed)
        semantic_int_mat = (
            (map >= threshold)
            * np.array(list(range(1, map.shape[-1] + 1))).reshape((1, 1, -1))
        ).max(-1)
        # noinspection PyTypeChecker
        return np.uint8(
            np.array(
                [(0, 0, 0)]
                + [
                    tuple(rnd.randint(0, 256) for _ in range(3))
                    for _ in range(map.shape[-1])
                ]
            )[semantic_int_mat]
        )

    def _xzs_to_colrows(self, xzs: np.ndarray):
        height, width, _ = self.ground_truth_semantic_map.shape
        return np.clip(
            np.int32(
                (
                    (100 / self.resolution_in_cm)
                    * (xzs - np.array([[self.min_xyz[0], self.min_xyz[2]]]))
                )
            ),
            a_min=0,
            a_max=np.array(
                [width - 1, height - 1]
            ),  # width then height as we're returns cols then rows
        )

    def build_ground_truth_map(self, object_hulls: Sequence[ObjectHull2d]):
        self.ground_truth_semantic_map.fill(0)

        height, width, _ = self.ground_truth_semantic_map.shape
        for object_hull in object_hulls:
            ot = object_hull.object_type

            if ot in self.object_type_to_index:
                ind = self.object_type_to_index[ot]

                self.ground_truth_semantic_map[:, :, ind : (ind + 1)] = (
                    cv2.fillConvexPoly(
                        img=np.array(
                            self.ground_truth_semantic_map[:, :, ind : (ind + 1)],
                            dtype=np.uint8,
                        ),
                        points=self._xzs_to_colrows(np.array(object_hull.hull_points)),
                        color=255,
                    )
                )

    def update(
        self,
        depth_frame: np.ndarray,
        camera_xyz: np.ndarray,
        camera_rotation: float,
        camera_horizon: float,
    ) -> Dict[str, np.ndarray]:
        """Updates the map with the input depth frame from the agent.

        See the documentation for `BinnedPointCloudMapBuilder.update`,
        the inputs and outputs are similar except that channels are used
        to represent the presence/absence of objects of given types.
        Unlike `BinnedPointCloudMapBuilder.update`, this function also
        returns two masks with keys `"egocentric_mask"` and `"mask"`
        that can be used to determine what portions of the map have been
        observed by the agent so far in the egocentric and world-space
        reference frames respectively.
        """
        with torch.no_grad():
            assert self.min_xyz is not None

            camera_xyz = torch.from_numpy(camera_xyz - self.min_xyz).to(self.device)
            map_size = self.ground_truth_semantic_map.shape[0]

            depth_frame = torch.from_numpy(depth_frame).to(self.device)
            depth_frame[
                depth_frame
                > self.vision_range_in_map_units * self.resolution_in_cm / 100
            ] = np.NaN

            world_space_point_cloud = depth_frame_to_world_space_xyz(
                depth_frame=depth_frame,
                camera_world_xyz=camera_xyz,
                rotation=camera_rotation,
                horizon=camera_horizon,
                fov=self.fov,
            )

            world_newly_explored = (
                project_point_cloud_to_map(
                    xyz_points=world_space_point_cloud,
                    bin_axis="y",
                    bins=[],
                    map_size=map_size,
                    resolution_in_cm=self.resolution_in_cm,
                    flip_row_col=True,
                )
                > 0.001
            )
            world_update_and_mask = torch.cat(
                (
                    torch.logical_and(
                        torch.from_numpy(self.ground_truth_semantic_map).to(
                            self.device
                        ),
                        world_newly_explored,
                    ),
                    world_newly_explored,
                ),
                dim=-1,
            ).float()
            world_update_and_mask_for_sample = world_update_and_mask.unsqueeze(
                0
            ).permute(0, 3, 1, 2)

            # We now use grid sampling to rotate world_update_for_sample into the egocentric coordinate
            # frame of the agent so that the agent's forward direction is downwards in the tensor
            # (and it's right side is to the right in the image, this means that right/left
            # when taking the perspective of the agent in the image). This convention aligns with
            # what's expected by grid_sample where +x corresponds to +cols and +z corresponds to +rows.
            # Here also the rows/cols have been normalized so that the center of the image is at (0,0)
            # and the bottom right is at (1,1).

            # Mentally you can think of the output from the F.affine_grid function as you wanting
            # rotating/translating an axis-aligned square on the image-to-be-sampled and then
            # copying whatever is in this square to a new image. Note that the translation always
            # happens in the global reference frame after the rotation. We'll start by rotating
            # the square so that the the agent's z direction is downwards in the image.
            # Since the global axis of the map and the grid sampling are aligned, this requires
            # rotating the square by the rotation of the agent. As rotation is negative the usual
            # standard in THOR, we need to negate the rotation of the agent.
            theta = -np.pi * camera_rotation / 180

            # Here form the rotation matrix
            cos_theta = np.cos(theta)
            sin_theta = np.sin(theta)
            rot_mat = torch.FloatTensor(
                [[cos_theta, -sin_theta], [sin_theta, cos_theta]]
            ).to(self.device)

            # Now we need to figure out the translation. For an intuitive understanding, we break this
            # translation into two different "offsets". The first offset centers the square on the
            # agent's current location:
            scaler = 2 * (100 / (self.resolution_in_cm * map_size))
            offset_to_center_the_agent = (
                scaler
                * torch.FloatTensor([camera_xyz[0], camera_xyz[2]])
                .unsqueeze(-1)
                .to(self.device)
                - 1
            )
            # The second offset moves the square in the direction of the agent's z direction
            # so that the output image will have the agent's view starting directly at the
            # top of the image.
            offset_to_top_of_image = rot_mat @ torch.FloatTensor([0, 1.0]).unsqueeze(
                1
            ).to(self.device)
            rotation_and_translate_mat = torch.cat(
                (
                    rot_mat,
                    offset_to_top_of_image + offset_to_center_the_agent,
                ),
                dim=1,
            )

            ego_update_and_mask = F.grid_sample(
                world_update_and_mask_for_sample.to(self.device),
                F.affine_grid(
                    rotation_and_translate_mat.to(self.device).unsqueeze(0),
                    world_update_and_mask_for_sample.shape,
                    align_corners=False,
                ),
                align_corners=False,
            )

            # All that's left now is to crop out the portion of the transformed tensor that we actually
            # care about (i.e. the portion corresponding to the agent's `self.vision_range_in_map_units`.
            vr = self.vision_range_in_map_units
            half_vr = vr // 2
            center = self.map_size_in_cm // (2 * self.resolution_in_cm)
            cropped = ego_update_and_mask[
                :, :, :vr, (center - half_vr) : (center + half_vr)
            ]

            np.logical_or(
                self.explored_mask,
                world_newly_explored.cpu().numpy(),
                out=self.explored_mask,
            )

            return {
                "egocentric_update": cropped[0, :-1].permute(1, 2, 0).cpu().numpy(),
                "egocentric_mask": (cropped[0, -1:].view(vr, vr, 1) > 0.001)
                .cpu()
                .numpy(),
                "explored_mask": np.array(self.explored_mask),
                "map": np.logical_and(
                    self.explored_mask, (self.ground_truth_semantic_map > 0)
                ),
            }

    def reset(self, min_xyz: np.ndarray, object_hulls: Sequence[ObjectHull2d]):
        """Reset the map.

        Resets the internally stored map.

        # Parameters
        min_xyz : An array of size (3,) corresponding to the minimum possible x, y, and z values that will be observed
            as a point in a pointcloud when calling `.update(...)`. The (world-space) maps returned by calls to `update`
            will have been normalized so the (0,0,:) entry corresponds to these minimum values.
        object_hulls : The object hulls corresponding to objects in the scene. These will be used to
            construct the map.
        """
        self.min_xyz = min_xyz
        self.build_ground_truth_map(object_hulls=object_hulls)


================================================
FILE: allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.py
================================================
# MIT License
#
# Original Copyright (c) 2020 Devendra Chaplot
#
# Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import math
from typing import Optional, Sequence, cast

import numpy as np
import torch

from allenact_plugins.ithor_plugin.ithor_util import vertical_to_horizontal_fov


def camera_space_xyz_to_world_xyz(
    camera_space_xyzs: torch.Tensor,
    camera_world_xyz: torch.Tensor,
    rotation: float,
    horizon: float,
) -> torch.Tensor:
    """Transforms xyz coordinates in the camera's coordinate frame to world-
    space (global) xyz frame.

    This code has been adapted from https://github.com/devendrachaplot/Neural-SLAM.

    **IMPORTANT:** We use the conventions from the Unity game engine. In particular:

    * A rotation of 0 corresponds to facing north.
    * Positive rotations correspond to CLOCKWISE rotations. That is a rotation of 90 degrees corresponds
        to facing east. **THIS IS THE OPPOSITE CONVENTION OF THE ONE GENERALLY USED IN MATHEMATICS.**
    * When facing NORTH (rotation==0) moving ahead by 1 meter results in the the z coordinate
        increasing by 1. Moving to the right by 1 meter corresponds to increasing the x coordinate by 1.
         Finally moving upwards by 1 meter corresponds to increasing the y coordinate by 1.
         **Having x,z as the ground plane in this way is common in computer graphics but is different than
         the usual mathematical convention of having z be "up".**
    * The horizon corresponds to how far below the horizontal the camera is facing. I.e. a horizon
        of 30 corresponds to the camera being angled downwards at an angle of 30 degrees.

    # Parameters
    camera_space_xyzs : A 3xN matrix of xyz coordinates in the camera's reference frame.
        Here `x, y, z = camera_space_xyzs[:, i]` should equal the xyz coordinates for the ith point.
    camera_world_xyz : The camera's xyz position in the world reference frame.
    rotation : The world-space rotation (in degrees) of the camera.
    horizon : The horizon (in degrees) of the camera.

    # Returns
    3xN tensor with entry [:, i] is the xyz world-space coordinate corresponding to the camera-space
    coordinate camera_space_xyzs[:, i]
    """
    # Adapted from https://github.com/devendrachaplot/Neural-SLAM.

    # First compute the transformation that points undergo
    # due to the camera's horizon
    psi = -horizon * np.pi / 180
    cos_psi = np.cos(psi)
    sin_psi = np.sin(psi)
    # fmt: off
    horizon_transform = camera_space_xyzs.new(
        [
            [1, 0, 0], # unchanged
            [0, cos_psi, sin_psi],
            [0, -sin_psi, cos_psi,],
        ],
    )
    # fmt: on

    # Next compute the transformation that points undergo
    # due to the agent's rotation about the y-axis
    phi = -rotation * np.pi / 180
    cos_phi = np.cos(phi)
    sin_phi = np.sin(phi)
    # fmt: off
    rotation_transform = camera_space_xyzs.new(
        [
            [cos_phi, 0, -sin_phi],
            [0, 1, 0], # unchanged
            [sin_phi, 0, cos_phi],],
    )
    # fmt: on

    # Apply the above transformations
    view_points = (rotation_transform @ horizon_transform) @ camera_space_xyzs

    # Translate the points w.r.t. the camera's position in world space.
    world_points = view_points + camera_world_xyz[:, None]
    return world_points


def depth_frame_to_camera_space_xyz(
    depth_frame: torch.Tensor, mask: Optional[torch.Tensor], fov: float = 90
) -> torch.Tensor:
    """Transforms a input depth map into a collection of xyz points (i.e. a
    point cloud) in the camera's coordinate frame.

    # Parameters
    depth_frame : A square depth map, i.e. an MxM matrix with entry `depth_frame[i, j]` equaling
        the distance from the camera to nearest surface at pixel (i,j).
    mask : An optional boolean mask of the same size (MxM) as the input depth. Only values
        where this mask are true will be included in the returned matrix of xyz coordinates. If
        `None` then no pixels will be masked out (so the returned matrix of xyz points will have
        dimension 3x(M*M)
    fov: The field of view of the camera.

    # Returns

    A 3xN matrix with entry [:, i] equalling a the xyz coordinates (in the camera's coordinate
    frame) of a point in the point cloud corresponding to the input depth frame.
    """
    h, w = depth_frame.shape[:2]
    if mask is None:
        mask = torch.ones_like(depth_frame, dtype=torch.bool)

    # pixel centers
    camera_space_yx_offsets = (
        torch.stack(torch.where(mask))
        + 0.5  # Offset by 0.5 so that we are in the middle of the pixel
    )

    # Subtract center
    camera_space_yx_offsets[:1] -= h / 2.0
    camera_space_yx_offsets[1:] -= w / 2.0

    # Make "up" in y be positive
    camera_space_yx_offsets[0, :] *= -1

    # Put points on the clipping plane
    camera_space_yx_offsets[:1] *= (2.0 / h) * math.tan((fov / 2) / 180 * math.pi)
    camera_space_yx_offsets[1:] *= (2.0 / w) * math.tan(
        (vertical_to_horizontal_fov(fov, height=h, width=w) / 2) / 180 * math.pi
    )

    # noinspection PyArgumentList
    camera_space_xyz = torch.cat(
        [
            camera_space_yx_offsets[1:, :],  # This is x
            camera_space_yx_offsets[:1, :],  # This is y
            torch.ones_like(camera_space_yx_offsets[:1, :]),
        ],
        axis=0,
    )

    return camera_space_xyz * depth_frame[mask][None, :]


def depth_frame_to_world_space_xyz(
    depth_frame: torch.Tensor,
    camera_world_xyz: torch.Tensor,
    rotation: float,
    horizon: float,
    fov: float,
):
    """Transforms a input depth map into a collection of xyz points (i.e. a
    point cloud) in the world-space coordinate frame.

    **IMPORTANT:** We use the conventions from the Unity game engine. In particular:

    * A rotation of 0 corresponds to facing north.
    * Positive rotations correspond to CLOCKWISE rotations. That is a rotation of 90 degrees corresponds
        to facing east. **THIS IS THE OPPOSITE CONVENTION OF THE ONE GENERALLY USED IN MATHEMATICS.**
    * When facing NORTH (rotation==0) moving ahead by 1 meter results in the the z coordinate
        increasing by 1. Moving to the right by 1 meter corresponds to increasing the x coordinate by 1.
         Finally moving upwards by 1 meter corresponds to increasing the y coordinate by 1.
         **Having x,z as the ground plane in this way is common in computer graphics but is different than
         the usual mathematical convention of having z be "up".**
    * The horizon corresponds to how far below the horizontal the camera is facing. I.e. a horizon
        of 30 corresponds to the camera being angled downwards at an angle of 30 degrees.

    # Parameters
    depth_frame : A square depth map, i.e. an MxM matrix with entry `depth_frame[i, j]` equaling
        the distance from the camera to nearest surface at pixel (i,j).
    mask : An optional boolean mask of the same size (MxM) as the input depth. Only values
        where this mask are true will be included in the returned matrix of xyz coordinates. If
        `None` then no pixels will be masked out (so the returned matrix of xyz points will have
        dimension 3x(M*M)
    camera_space_xyzs : A 3xN matrix of xyz coordinates in the camera's reference frame.
        Here `x, y, z = camera_space_xyzs[:, i]` should equal the xyz coordinates for the ith point.
    camera_world_xyz : The camera's xyz position in the world reference frame.
    rotation : The world-space rotation (in degrees) of the camera.
    horizon : The horizon (in degrees) of the camera.
    fov: The field of view of the camera.

    # Returns

    A 3xN matrix with entry [:, i] equalling a the xyz coordinates (in the world coordinate
    frame) of a point in the point cloud corresponding to the input depth frame.
    """

    camera_space_xyz = depth_frame_to_camera_space_xyz(
        depth_frame=depth_frame, mask=None, fov=fov
    )

    world_points = camera_space_xyz_to_world_xyz(
        camera_space_xyzs=camera_space_xyz,
        camera_world_xyz=camera_world_xyz,
        rotation=rotation,
        horizon=horizon,
    )

    return world_points.view(3, *depth_frame.shape).permute(1, 2, 0)


def project_point_cloud_to_map(
    xyz_points: torch.Tensor,
    bin_axis: str,
    bins: Sequence[float],
    map_size: int,
    resolution_in_cm: int,
    flip_row_col: bool,
):
    """Bins an input point cloud into a map tensor with the bins equaling the
    channels.

    This code has been adapted from https://github.com/devendrachaplot/Neural-SLAM.

    # Parameters
    xyz_points : (x,y,z) pointcloud(s) as a torch.Tensor of shape (... x height x width x 3).
        All operations are vectorized across the `...` dimensions.
    bin_axis : Either "x", "y", or "z", the axis which should be binned by the values in `bins`.
        If you have generated your point clouds with any of the other functions in the `point_cloud_utils`
        module you almost certainly want this to be "y" as this is the default upwards dimension.
    bins: The values by which to bin along `bin_axis`, see the `bins` parameter of `np.digitize`
        for more info.
    map_size : The axes not specified by `bin_axis` will be be divided by `resolution_in_cm / 100`
        and then rounded to the nearest integer. They are then expected to have their values
        within the interval [0, ..., map_size - 1].
    resolution_in_cm: The resolution_in_cm, in cm, of the map output from this function. Every
        grid square of the map corresponds to a (`resolution_in_cm`x`resolution_in_cm`) square
        in space.
    flip_row_col: Should the rows/cols of the map be flipped? See the 'Returns' section below for more
        info.

    # Returns
    A collection of maps of shape (... x map_size x map_size x (len(bins)+1)), note that bin_axis
    has been moved to the last index of this returned map, the other two axes stay in their original
    order unless `flip_row_col` has been called in which case they are reversed (useful as often
    rows should correspond to y or z instead of x).
    """
    bin_dim = ["x", "y", "z"].index(bin_axis)

    start_shape = xyz_points.shape
    xyz_points = xyz_points.reshape([-1, *start_shape[-3:]])
    num_clouds, h, w, _ = xyz_points.shape

    if not flip_row_col:
        new_order = [i for i in [0, 1, 2] if i != bin_dim] + [bin_dim]
    else:
        new_order = [i for i in [2, 1, 0] if i != bin_dim] + [bin_dim]

    uvw_points = cast(
        torch.Tensor, torch.stack([xyz_points[..., i] for i in new_order], dim=-1)
    )

    num_bins = len(bins) + 1

    isnotnan = ~torch.isnan(xyz_points[..., 0])

    uvw_points_binned: torch.Tensor = torch.cat(
        (
            torch.round(100 * uvw_points[..., :-1] / resolution_in_cm).long(),
            torch.bucketize(
                uvw_points[..., -1:].contiguous(), boundaries=uvw_points.new(bins)
            ),
        ),
        dim=-1,
    )

    maxes = (
        xyz_points.new()
        .long()
        .new([map_size, map_size, num_bins])
        .reshape((1, 1, 1, 3))
    )

    isvalid = torch.logical_and(
        torch.logical_and(
            (uvw_points_binned >= 0).all(-1),
            (uvw_points_binned < maxes).all(-1),
        ),
        isnotnan,
    )

    uvw_points_binned_with_index_mat = torch.cat(
        (
            torch.repeat_interleave(
                torch.arange(0, num_clouds).to(xyz_points.device), h * w
            ).reshape(-1, 1),
            uvw_points_binned.reshape(-1, 3),
        ),
        dim=1,
    )

    uvw_points_binned_with_index_mat[~isvalid.reshape(-1), :] = 0
    ind = (
        uvw_points_binned_with_index_mat[:, 0] * (map_size * map_size * num_bins)
        + uvw_points_binned_with_index_mat[:, 1] * (map_size * num_bins)
        + uvw_points_binned_with_index_mat[:, 2] * num_bins
        + uvw_points_binned_with_index_mat[:, 3]
    )
    ind[~isvalid.reshape(-1)] = 0
    count = torch.bincount(
        ind.view(-1),
        isvalid.view(-1).long(),
        minlength=num_clouds * map_size * map_size * num_bins,
    )

    return count.view(*start_shape[:-3], map_size, map_size, num_bins)


################
# FOR DEBUGGNG #
################
# The below functions are versions of the above which, because of their reliance on
# numpy functions, cannot use GPU acceleration. These are possibly useful for debugging,
# performance comparisons, or for validating that the above GPU variants work properly.


def _cpu_only_camera_space_xyz_to_world_xyz(
    camera_space_xyzs: np.ndarray,
    camera_world_xyz: np.ndarray,
    rotation: float,
    horizon: float,
):
    # Adapted from https://github.com/devendrachaplot/Neural-SLAM.

    # view_position = 3, world_points = 3 x N
    # NOTE: camera_position is not equal to agent_position!!

    # First compute the transformation that points undergo
    # due to the camera's horizon
    psi = -horizon * np.pi / 180
    cos_psi = np.cos(psi)
    sin_psi = np.sin(psi)
    # fmt: off
    horizon_transform = np.array(
        [
            [1, 0, 0], # unchanged
            [0, cos_psi, sin_psi],
            [0, -sin_psi, cos_psi,],
        ],
        np.float64,
    )
    # fmt: on

    # Next compute the transformation that points undergo
    # due to the agent's rotation about the y-axis
    phi = -rotation * np.pi / 180
    cos_phi = np.cos(phi)
    sin_phi = np.sin(phi)
    # fmt: off
    rotation_transform = np.array(
        [
            [cos_phi, 0, -sin_phi],
            [0, 1, 0], # unchanged
            [sin_phi, 0, cos_phi],],
        np.float64,
    )
    # fmt: on

    # Apply the above transformations
    view_points = (rotation_transform @ horizon_transform) @ camera_space_xyzs

    # Translate the points w.r.t. the camera's position in world space.
    world_points = view_points + camera_world_xyz[:, None]
    return world_points


def _cpu_only_depth_frame_to_camera_space_xyz(
    depth_frame: np.ndarray, mask: Optional[np.ndarray], fov: float = 90
):
    """"""
    assert (
        len(depth_frame.shape) == 2 and depth_frame.shape[0] == depth_frame.shape[1]
    ), f"depth has shape {depth_frame.shape}, we only support (N, N) shapes for now."

    resolution = depth_frame.shape[0]
    if mask is None:
        mask = np.ones(depth_frame.shape, dtype=bool)

    # pixel centers
    camera_space_yx_offsets = (
        np.stack(np.where(mask))
        + 0.5  # Offset by 0.5 so that we are in the middle of the pixel
    )

    # Subtract center
    camera_space_yx_offsets -= resolution / 2.0

    # Make "up" in y be positive
    camera_space_yx_offsets[0, :] *= -1

    # Put points on the clipping plane
    camera_space_yx_offsets *= (2.0 / resolution) * math.tan((fov / 2) / 180 * math.pi)

    camera_space_xyz = np.concatenate(
        [
            camera_space_yx_offsets[1:, :],  # This is x
            camera_space_yx_offsets[:1, :],  # This is y
            np.ones_like(camera_space_yx_offsets[:1, :]),
        ],
        axis=0,
    )

    return camera_space_xyz * depth_frame[mask][None, :]


def _cpu_only_depth_frame_to_world_space_xyz(
    depth_frame: np.ndarray,
    camera_world_xyz: np.ndarray,
    rotation: float,
    horizon: float,
    fov: float,
):
    camera_space_xyz = _cpu_only_depth_frame_to_camera_space_xyz(
        depth_frame=depth_frame, mask=None, fov=fov
    )

    world_points = _cpu_only_camera_space_xyz_to_world_xyz(
        camera_space_xyzs=camera_space_xyz,
        camera_world_xyz=camera_world_xyz,
        rotation=rotation,
        horizon=horizon,
    )

    return world_points.reshape((3, *depth_frame.shape)).transpose((1, 2, 0))


def _cpu_only_project_point_cloud_to_map(
    xyz_points: np.ndarray,
    bin_axis: str,
    bins: Sequence[float],
    map_size: int,
    resolution_in_cm: int,
    flip_row_col: bool,
):
    """Bins points into  bins.

    Adapted from https://github.com/devendrachaplot/Neural-SLAM.

    # Parameters
    xyz_points : (x,y,z) point clouds as a np.ndarray of shape (... x height x width x 3). (x,y,z)
        should be coordinates specified in meters.
    bin_axis : Either "x", "y", or "z", the axis which should be binned by the values in `bins`
    bins: The values by which to bin along `bin_axis`, see the `bins` parameter of `np.digitize`
        for more info.
    map_size : The axes not specified by `bin_axis` will be be divided by `resolution_in_cm / 100`
        and then rounded to the nearest integer. They are then expected to have their values
        within the interval [0, ..., map_size - 1].
    resolution_in_cm: The resolution_in_cm, in cm, of the map output from this function. Every
        grid square of the map corresponds to a (`resolution_in_cm`x`resolution_in_cm`) square
        in space.
    flip_row_col: Should the rows/cols of the map be flipped

    # Returns
    A collection of maps of shape (... x map_size x map_size x (len(bins)+1)), note that bin_axis
    has been moved to the last index of this returned map, the other two axes stay in their original
    order unless `flip_row_col` has been called in which case they are reversed (useful if you give
    points as often rows should correspond to y or z instead of x).
    """
    bin_dim = ["x", "y", "z"].index(bin_axis)

    start_shape = xyz_points.shape
    xyz_points = xyz_points.reshape([-1, *start_shape[-3:]])
    num_clouds, h, w, _ = xyz_points.shape

    if not flip_row_col:
        new_order = [i for i in [0, 1, 2] if i != bin_dim] + [bin_dim]
    else:
        new_order = [i for i in [2, 1, 0] if i != bin_dim] + [bin_dim]

    uvw_points: np.ndarray = np.stack([xyz_points[..., i] for i in new_order], axis=-1)

    num_bins = len(bins) + 1

    isnotnan = ~np.isnan(xyz_points[..., 0])

    uvw_points_binned = np.concatenate(
        (
            np.round(100 * uvw_points[..., :-1] / resolution_in_cm).astype(np.int32),
            np.digitize(uvw_points[..., -1:], bins=bins).astype(np.int32),
        ),
        axis=-1,
    )

    maxes = np.array([map_size, map_size, num_bins]).reshape((1, 1, 1, 3))

    isvalid = np.logical_and.reduce(
        (
            (uvw_points_binned >= 0).all(-1),
            (uvw_points_binned < maxes).all(-1),
            isnotnan,
        )
    )

    uvw_points_binned_with_index_mat = np.concatenate(
        (
            np.repeat(np.arange(0, num_clouds), h * w).reshape(-1, 1),
            uvw_points_binned.reshape(-1, 3),
        ),
        axis=1,
    )

    uvw_points_binned_with_index_mat[~isvalid.reshape(-1), :] = 0
    ind = np.ravel_multi_index(
        uvw_points_binned_with_index_mat.transpose(),
        (num_clouds, map_size, map_size, num_bins),
    )
    ind[~isvalid.reshape(-1)] = 0
    count = np.bincount(
        ind.ravel(),
        isvalid.ravel().astype(np.int32),
        minlength=num_clouds * map_size * map_size * num_bins,
    )

    return count.reshape([*start_shape[:-3], map_size, map_size, num_bins])


================================================
FILE: allenact/embodiedai/models/__init__.py
================================================


================================================
FILE: allenact/embodiedai/models/aux_models.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Several of the models defined in this file are modified versions of those
found in https://github.com/joel99/habitat-pointnav-
aux/blob/master/habitat_baselines/"""

import torch
import torch.nn as nn

from allenact.embodiedai.aux_losses.losses import (
    InverseDynamicsLoss,
    TemporalDistanceLoss,
    CPCALoss,
    CPCASoftMaxLoss,
)
from allenact.utils.model_utils import FeatureEmbedding


class AuxiliaryModel(nn.Module):
    """The class of defining the models for all kinds of self-supervised
    auxiliary tasks."""

    def __init__(
        self,
        aux_uuid: str,
        action_dim: int,
        obs_embed_dim: int,
        belief_dim: int,
        action_embed_size: int = 4,
        cpca_classifier_hidden_dim: int = 32,
        cpca_softmax_dim: int = 128,
    ):
        super().__init__()
        self.aux_uuid = aux_uuid
        self.action_dim = action_dim
        self.obs_embed_dim = obs_embed_dim
        self.belief_dim = belief_dim
        self.action_embed_size = action_embed_size
        self.cpca_classifier_hidden_dim = cpca_classifier_hidden_dim
        self.cpca_softmax_dim = cpca_softmax_dim

        self.initialize_model_given_aux_uuid(self.aux_uuid)

    def initialize_model_given_aux_uuid(self, aux_uuid: str):
        if aux_uuid == InverseDynamicsLoss.UUID:
            self.init_inverse_dynamics()
        elif aux_uuid == TemporalDistanceLoss.UUID:
            self.init_temporal_distance()
        elif CPCALoss.UUID in aux_uuid:  # the CPCA family with various k
            self.init_cpca()
        elif CPCASoftMaxLoss.UUID in aux_uuid:
            self.init_cpca_softmax()
        else:
            raise ValueError("Unknown Auxiliary Loss UUID")

    def init_inverse_dynamics(self):
        self.decoder = nn.Linear(
            2 * self.obs_embed_dim + self.belief_dim, self.action_dim
        )

    def init_temporal_distance(self):
        self.decoder = nn.Linear(2 * self.obs_embed_dim + self.belief_dim, 1)

    def init_cpca(self):
        ## Auto-regressive model to predict future context
        self.action_embedder = FeatureEmbedding(
            self.action_dim + 1, self.action_embed_size
        )
        # NOTE: add extra 1 in embedding dict cuz we will pad zero actions?
        self.context_model = nn.GRU(self.action_embed_size, self.belief_dim)

        ## Classifier to estimate mutual information
        self.classifier = nn.Sequential(
            nn.Linear(
                self.belief_dim + self.obs_embed_dim, self.cpca_classifier_hidden_dim
            ),
            nn.ReLU(),
            nn.Linear(self.cpca_classifier_hidden_dim, 1),
        )

    def init_cpca_softmax(self):
        # same as CPCA with extra MLP for contrastive losses.
        ###
        self.action_embedder = FeatureEmbedding(
            self.action_dim + 1, self.action_embed_size
        )
        # NOTE: add extra 1 in embedding dict cuz we will pad zero actions?
        self.context_model = nn.GRU(self.action_embed_size, self.belief_dim)

        ## Classifier to estimate mutual information
        self.visual_mlp = nn.Sequential(
            nn.Linear(self.obs_embed_dim, self.cpca_classifier_hidden_dim),
            nn.ReLU(),
            nn.Linear(self.cpca_classifier_hidden_dim, self.cpca_softmax_dim),
        )

        self.belief_mlp = nn.Sequential(
            nn.Linear(self.belief_dim, self.cpca_classifier_hidden_dim),
            nn.ReLU(),
            nn.Linear(self.cpca_classifier_hidden_dim, self.cpca_softmax_dim),
        )

    def forward(self, features: torch.FloatTensor):
        if self.aux_uuid in [InverseDynamicsLoss.UUID, TemporalDistanceLoss.UUID]:
            return self.decoder(features)
        else:
            raise NotImplementedError(
                f"Auxiliary model with UUID {self.aux_uuid} does not support `forward` call."
            )


================================================
FILE: allenact/embodiedai/models/basic_models.py
================================================
"""Basic building block torch networks that can be used across a variety of
tasks."""

from typing import (
    Sequence,
    Dict,
    Union,
    cast,
    List,
    Callable,
    Optional,
    Tuple,
    Any,
)

import gym
import numpy as np
import torch
from gym.spaces.dict import Dict as SpaceDict
import torch.nn as nn

from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel, DistributionType
from allenact.base_abstractions.distributions import CategoricalDistr, Distr
from allenact.base_abstractions.misc import ActorCriticOutput, Memory
from allenact.utils.model_utils import make_cnn, compute_cnn_output
from allenact.utils.system import get_logger


class SimpleCNN(nn.Module):
    """A Simple N-Conv CNN followed by a fully connected layer. Takes in
    observations (of type gym.spaces.dict) and produces an embedding of the
    `rgb_uuid` and/or `depth_uuid` components.

    # Attributes

    observation_space : The observation_space of the agent, should have `rgb_uuid` or `depth_uuid` as
        a component (otherwise it is a blind model).
    output_size : The size of the embedding vector to produce.
    """

    def __init__(
        self,
        observation_space: SpaceDict,
        output_size: int,
        rgb_uuid: Optional[str],
        depth_uuid: Optional[str],
        layer_channels: Sequence[int] = (32, 64, 32),
        kernel_sizes: Sequence[Tuple[int, int]] = ((8, 8), (4, 4), (3, 3)),
        layers_stride: Sequence[Tuple[int, int]] = ((4, 4), (2, 2), (1, 1)),
        paddings: Sequence[Tuple[int, int]] = ((0, 0), (0, 0), (0, 0)),
        dilations: Sequence[Tuple[int, int]] = ((1, 1), (1, 1), (1, 1)),
        flatten: bool = True,
        output_relu: bool = True,
    ):
        """Initializer.

        # Parameters

        observation_space : See class attributes documentation.
        output_size : See class attributes documentation.
        """
        super().__init__()

        self.rgb_uuid = rgb_uuid
        if self.rgb_uuid is not None:
            assert self.rgb_uuid in observation_space.spaces
            self._n_input_rgb = observation_space.spaces[self.rgb_uuid].shape[2]
            assert self._n_input_rgb >= 0
        else:
            self._n_input_rgb = 0

        self.depth_uuid = depth_uuid
        if self.depth_uuid is not None:
            assert self.depth_uuid in observation_space.spaces
            self._n_input_depth = observation_space.spaces[self.depth_uuid].shape[2]
            assert self._n_input_depth >= 0
        else:
            self._n_input_depth = 0

        if not self.is_blind:
            # hyperparameters for layers
            self._cnn_layers_channels = list(layer_channels)
            self._cnn_layers_kernel_size = list(kernel_sizes)
            self._cnn_layers_stride = list(layers_stride)
            self._cnn_layers_paddings = list(paddings)
            self._cnn_layers_dilations = list(dilations)

            if self._n_input_rgb > 0:
                input_rgb_cnn_dims = np.array(
                    observation_space.spaces[self.rgb_uuid].shape[:2], dtype=np.float32
                )
                self.rgb_cnn = self.make_cnn_from_params(
                    output_size=output_size,
                    input_dims=input_rgb_cnn_dims,
                    input_channels=self._n_input_rgb,
                    flatten=flatten,
                    output_relu=output_relu,
                )

            if self._n_input_depth > 0:
                input_depth_cnn_dims = np.array(
                    observation_space.spaces[self.depth_uuid].shape[:2],
                    dtype=np.float32,
                )
                self.depth_cnn = self.make_cnn_from_params(
                    output_size=output_size,
                    input_dims=input_depth_cnn_dims,
                    input_channels=self._n_input_depth,
                    flatten=flatten,
                    output_relu=output_relu,
                )

    def make_cnn_from_params(
        self,
        output_size: int,
        input_dims: np.ndarray,
        input_channels: int,
        flatten: bool,
        output_relu: bool,
    ) -> nn.Module:
        output_dims = input_dims
        for kernel_size, stride, padding, dilation in zip(
            self._cnn_layers_kernel_size,
            self._cnn_layers_stride,
            self._cnn_layers_paddings,
            self._cnn_layers_dilations,
        ):
            # noinspection PyUnboundLocalVariable
            output_dims = self._conv_output_dim(
                dimension=output_dims,
                padding=np.array(padding, dtype=np.float32),
                dilation=np.array(dilation, dtype=np.float32),
                kernel_size=np.array(kernel_size, dtype=np.float32),
                stride=np.array(stride, dtype=np.float32),
            )

        # noinspection PyUnboundLocalVariable
        cnn = make_cnn(
            input_channels=input_channels,
            layer_channels=self._cnn_layers_channels,
            kernel_sizes=self._cnn_layers_kernel_size,
            strides=self._cnn_layers_stride,
            paddings=self._cnn_layers_paddings,
            dilations=self._cnn_layers_dilations,
            output_height=output_dims[0],
            output_width=output_dims[1],
            output_channels=output_size,
            flatten=flatten,
            output_relu=output_relu,
        )
        self.layer_init(cnn)

        return cnn

    @staticmethod
    def _conv_output_dim(
        dimension: Sequence[int],
        padding: Sequence[int],
        dilation: Sequence[int],
        kernel_size: Sequence[int],
        stride: Sequence[int],
    ) -> Tuple[int, ...]:
        """Calculates the output height and width based on the input height and
        width to the convolution layer. For parameter definitions see.

        [here](https://pytorch.org/docs/master/nn.html#torch.nn.Conv2d).

        # Parameters

        dimension : See above link.
        padding : See above link.
        dilation : See above link.
        kernel_size : See above link.
        stride : See above link.
        """
        assert len(dimension) == 2
        out_dimension = []
        for i in range(len(dimension)):
            out_dimension.append(
                int(
                    np.floor(
                        (
                            (
                                dimension[i]
                                + 2 * padding[i]
                                - dilation[i] * (kernel_size[i] - 1)
                                - 1
                            )
                            / stride[i]
                        )
                        + 1
                    )
                )
            )
        return tuple(out_dimension)

    @staticmethod
    def layer_init(cnn) -> None:
        """Initialize layer parameters using Kaiming normal."""
        for layer in cnn:
            if isinstance(layer, (nn.Conv2d, nn.Linear)):
                nn.init.kaiming_normal_(layer.weight, nn.init.calculate_gain("relu"))
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, val=0)

    @property
    def is_blind(self):
        """True if the observation space doesn't include `self.rgb_uuid` or
        `self.depth_uuid`."""
        return self._n_input_rgb + self._n_input_depth == 0

    def forward(self, observations: Dict[str, torch.Tensor]):  # type: ignore
        if self.is_blind:
            return None

        def check_use_agent(new_setting):
            if use_agent is not None:
                assert (
                    use_agent is new_setting
                ), "rgb and depth must both use an agent dim or none"
            return new_setting

        cnn_output_list: List[torch.Tensor] = []
        use_agent: Optional[bool] = None

        if self.rgb_uuid is not None:
            use_agent = check_use_agent(len(observations[self.rgb_uuid].shape) == 6)
            cnn_output_list.append(
                compute_cnn_output(self.rgb_cnn, observations[self.rgb_uuid])
            )

        if self.depth_uuid is not None:
            use_agent = check_use_agent(len(observations[self.depth_uuid].shape) == 6)
            cnn_output_list.append(
                compute_cnn_output(self.depth_cnn, observations[self.depth_uuid])
            )

        if use_agent:
            channels_dim = 3  # [step, sampler, agent, channel (, height, width)]
        else:
            channels_dim = 2  # [step, sampler, channel (, height, width)]

        return torch.cat(cnn_output_list, dim=channels_dim)


class RNNStateEncoder(nn.Module):
    """A simple RNN-based model playing a role in many baseline embodied-
    navigation agents.

    See `seq_forward` for more details of how this model is used.
    """

    def __init__(
        self,
        input_size: int,
        hidden_size: int,
        num_layers: int = 1,
        rnn_type: str = "GRU",
        trainable_masked_hidden_state: bool = False,
    ):
        """An RNN for encoding the state in RL. Supports masking the hidden
        state during various timesteps in the forward lass.

        # Parameters

        input_size : The input size of the RNN.
        hidden_size : The hidden size.
        num_layers : The number of recurrent layers.
        rnn_type : The RNN cell type.  Must be GRU or LSTM.
        trainable_masked_hidden_state : If `True` the initial hidden state (used at the start of a Task)
            is trainable (as opposed to being a vector of zeros).
        """

        super().__init__()
        self._num_recurrent_layers = num_layers
        self._rnn_type = rnn_type

        self.rnn = getattr(torch.nn, rnn_type)(
            input_size=input_size, hidden_size=hidden_size, num_layers=num_layers
        )

        self.trainable_masked_hidden_state = trainable_masked_hidden_state
        if trainable_masked_hidden_state:
            self.init_hidden_state = nn.Parameter(
                0.1 * torch.randn((num_layers, 1, hidden_size)), requires_grad=True
            )

        self.layer_init()

    def layer_init(self):
        """Initialize the RNN parameters in the model."""
        for name, param in self.rnn.named_parameters():
            if "weight" in name:
                nn.init.orthogonal_(param)
            elif "bias" in name:
                nn.init.constant_(param, 0)

    @property
    def num_recurrent_layers(self) -> int:
        """The number of recurrent layers in the network."""
        return self._num_recurrent_layers * (2 if "LSTM" in self._rnn_type else 1)

    def _pack_hidden(
        self, hidden_states: Union[torch.FloatTensor, Sequence[torch.FloatTensor]]
    ) -> torch.FloatTensor:
        """Stacks hidden states in an LSTM together (if using a GRU rather than
        an LSTM this is just the identity).

        # Parameters

        hidden_states : The hidden states to (possibly) stack.
        """
        if "LSTM" in self._rnn_type:
            hidden_states = cast(
                torch.FloatTensor,
                torch.cat([hidden_states[0], hidden_states[1]], dim=0),
            )
        return cast(torch.FloatTensor, hidden_states)

    def _unpack_hidden(
        self, hidden_states: torch.FloatTensor
    ) -> Union[torch.FloatTensor, Tuple[torch.FloatTensor, torch.FloatTensor]]:
        """Partial inverse of `_pack_hidden` (exact if there are 2 hidden
        layers)."""
        if "LSTM" in self._rnn_type:
            new_hidden_states = (
                hidden_states[0 : self._num_recurrent_layers],
                hidden_states[self._num_recurrent_layers :],
            )
            return cast(Tuple[torch.FloatTensor, torch.FloatTensor], new_hidden_states)
        return cast(torch.FloatTensor, hidden_states)

    def _mask_hidden(
        self,
        hidden_states: Union[Tuple[torch.FloatTensor, ...], torch.FloatTensor],
        masks: torch.FloatTensor,
    ) -> Union[Tuple[torch.FloatTensor, ...], torch.FloatTensor]:
        """Mask input hidden states given `masks`. Useful when masks represent
        steps on which a task has completed.

        # Parameters

        hidden_states : The hidden states.
        masks : Masks to apply to hidden states (see seq_forward).

        # Returns

        Masked hidden states. Here masked hidden states will be replaced with
        either all zeros (if `trainable_masked_hidden_state` was False) and will
        otherwise be a learnable collection of parameters.
        """
        if not self.trainable_masked_hidden_state:
            if isinstance(hidden_states, tuple):
                hidden_states = tuple(
                    cast(torch.FloatTensor, v * masks) for v in hidden_states
                )
            else:
                hidden_states = cast(torch.FloatTensor, masks * hidden_states)
        else:
            if isinstance(hidden_states, tuple):
                # noinspection PyTypeChecker
                hidden_states = tuple(
                    v * masks  # type:ignore
                    + (1.0 - masks) * (self.init_hidden_state.repeat(1, v.shape[1], 1))  # type: ignore
                    for v in hidden_states  # type:ignore
                )  # type: ignore
            else:
                # noinspection PyTypeChecker
                hidden_states = masks * hidden_states + (1 - masks) * (  # type: ignore
                    self.init_hidden_state.repeat(1, hidden_states.shape[1], 1)
                )

        return hidden_states

    def single_forward(
        self,
        x: torch.FloatTensor,
        hidden_states: torch.FloatTensor,
        masks: torch.FloatTensor,
    ) -> Tuple[
        torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]
    ]:
        """Forward for a single-step input."""
        (
            x,
            hidden_states,
            masks,
            mem_agent,
            obs_agent,
            nsteps,
            nsamplers,
            nagents,
        ) = self.adapt_input(x, hidden_states, masks)

        unpacked_hidden_states = self._unpack_hidden(hidden_states)

        x, unpacked_hidden_states = self.rnn(
            x,
            self._mask_hidden(
                unpacked_hidden_states, cast(torch.FloatTensor, masks[0].view(1, -1, 1))
            ),
        )

        return self.adapt_result(
            x,
            self._pack_hidden(unpacked_hidden_states),
            mem_agent,
            obs_agent,
            nsteps,
            nsamplers,
            nagents,
        )

    def adapt_input(
        self,
        x: torch.FloatTensor,
        hidden_states: torch.FloatTensor,
        masks: torch.FloatTensor,
    ) -> Tuple[
        torch.FloatTensor,
        torch.FloatTensor,
        torch.FloatTensor,
        bool,
        bool,
        int,
        int,
        int,
    ]:
        nsteps, nsamplers = masks.shape[:2]

        assert len(hidden_states.shape) in [
            3,
            4,
        ], "hidden_states must be [layer, sampler, hidden] or [layer, sampler, agent, hidden]"

        assert len(x.shape) in [
            3,
            4,
        ], "observations must be [step, sampler, data] or [step, sampler, agent, data]"

        nagents = 1
        mem_agent: bool
        if len(hidden_states.shape) == 4:  # [layer, sampler, agent, hidden]
            mem_agent = True
            nagents = hidden_states.shape[2]
        else:  # [layer, sampler, hidden]
            mem_agent = False

        obs_agent: bool
        if len(x.shape) == 4:  # [step, sampler, agent, dims]
            obs_agent = True
        else:  # [step, sampler, dims]
            obs_agent = False

        # Flatten (nsamplers, nagents)
        x = x.view(nsteps, nsamplers * nagents, -1)  # type:ignore
        masks = masks.expand(-1, -1, nagents).reshape(  # type:ignore
            nsteps, nsamplers * nagents
        )

        # Flatten (nsamplers, nagents) and remove step dim
        hidden_states = hidden_states.view(  # type:ignore
            self.num_recurrent_layers, nsamplers * nagents, -1
        )

        # noinspection PyTypeChecker
        return x, hidden_states, masks, mem_agent, obs_agent, nsteps, nsamplers, nagents

    def adapt_result(
        self,
        outputs: torch.FloatTensor,
        hidden_states: torch.FloatTensor,
        mem_agent: bool,
        obs_agent: bool,
        nsteps: int,
        nsamplers: int,
        nagents: int,
    ) -> Tuple[
        torch.FloatTensor,
        torch.FloatTensor,
    ]:
        output_dims = (nsteps, nsamplers) + ((nagents, -1) if obs_agent else (-1,))
        hidden_dims = (self.num_recurrent_layers, nsamplers) + (
            (nagents, -1) if mem_agent else (-1,)
        )

        outputs = cast(torch.FloatTensor, outputs.view(*output_dims))
        hidden_states = cast(
            torch.FloatTensor,
            hidden_states.view(*hidden_dims),
        )

        return outputs, hidden_states

    def seq_forward(  # type: ignore
        self,
        x: torch.FloatTensor,
        hidden_states: torch.FloatTensor,
        masks: torch.FloatTensor,
    ) -> Tuple[
        torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]
    ]:
        """Forward for a sequence of length T.

        # Parameters

        x : (Steps, Samplers, Agents, -1) tensor.
        hidden_states : The starting hidden states.
        masks : A (Steps, Samplers, Agents) tensor.
            The masks to be applied to hidden state at every timestep, equal to 0 whenever the previous step finalized
            the task, 1 elsewhere.
        """
        (
            x,
            hidden_states,
            masks,
            mem_agent,
            obs_agent,
            nsteps,
            nsamplers,
            nagents,
        ) = self.adapt_input(x, hidden_states, masks)

        # steps in sequence which have zero for any episode. Assume t=0 has
        # a zero in it.
        has_zeros = (masks[1:] == 0.0).any(dim=-1).nonzero().squeeze().cpu()
        # +1 to correct the masks[1:]
        if has_zeros.dim() == 0:
            # handle scalar
            has_zeros = [has_zeros.item() + 1]  # type: ignore
        else:
            has_zeros = (has_zeros + 1).numpy().tolist()
        # add t=0 and t=T to the list
        has_zeros = cast(List[int], [0] + has_zeros + [nsteps])

        unpacked_hidden_states = self._unpack_hidden(
            cast(torch.FloatTensor, hidden_states)
        )

        outputs = []
        for i in range(len(has_zeros) - 1):
            # process steps that don't have any zeros in masks together
            start_idx = int(has_zeros[i])
            end_idx = int(has_zeros[i + 1])

            # noinspection PyTypeChecker
            rnn_scores, unpacked_hidden_states = self.rnn(
                x[start_idx:end_idx],
                self._mask_hidden(
                    unpacked_hidden_states,
                    cast(torch.FloatTensor, masks[start_idx].view(1, -1, 1)),
                ),
            )

            outputs.append(rnn_scores)

        return self.adapt_result(
            cast(torch.FloatTensor, torch.cat(outputs, dim=0)),
            self._pack_hidden(unpacked_hidden_states),
            mem_agent,
            obs_agent,
            nsteps,
            nsamplers,
            nagents,
        )

    def forward(  # type: ignore
        self,
        x: torch.FloatTensor,
        hidden_states: torch.FloatTensor,
        masks: torch.FloatTensor,
    ) -> Tuple[
        torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]
    ]:
        nsteps = masks.shape[0]
        if nsteps == 1:
            return self.single_forward(x, hidden_states, masks)
        return self.seq_forward(x, hidden_states, masks)


class LinearActorCritic(ActorCriticModel[CategoricalDistr]):
    def __init__(
        self,
        input_uuid: str,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
    ):
        super().__init__(action_space=action_space, observation_space=observation_space)

        assert (
            input_uuid in observation_space.spaces
        ), "LinearActorCritic expects only a single observational input."
        self.input_uuid = input_uuid

        box_space: gym.spaces.Box = observation_space[self.input_uuid]
        assert isinstance(box_space, gym.spaces.Box), (
            "LinearActorCritic requires that"
            "observation space corresponding to the input uuid is a Box space."
        )
        assert len(box_space.shape) == 1
        self.in_dim = box_space.shape[0]

        self.linear = nn.Linear(self.in_dim, action_space.n + 1)

        nn.init.orthogonal_(self.linear.weight)
        nn.init.constant_(self.linear.bias, 0)

    # noinspection PyMethodMayBeStatic
    def _recurrent_memory_specification(self):
        return None

    def forward(self, observations, memory, prev_actions, masks):
        out = self.linear(observations[self.input_uuid])

        # noinspection PyArgumentList
        return (
            ActorCriticOutput(
                # ensure [steps, samplers, ...]
                distributions=CategoricalDistr(logits=out[..., :-1]),
                # ensure [steps, samplers, flattened]
                values=cast(torch.FloatTensor, out[..., -1:].view(*out.shape[:2], -1)),
                extras={},
            ),
            None,
        )


class RNNActorCritic(ActorCriticModel[Distr]):
    def __init__(
        self,
        input_uuid: str,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        hidden_size: int = 128,
        num_layers: int = 1,
        rnn_type: str = "GRU",
        head_type: Callable[..., ActorCriticModel[Distr]] = LinearActorCritic,
    ):
        super().__init__(action_space=action_space, observation_space=observation_space)
        self.hidden_size = hidden_size
        self.rnn_type = rnn_type

        assert (
            input_uuid in observation_space.spaces
        ), "LinearActorCritic expects only a single observational input."
        self.input_uuid = input_uuid

        box_space: gym.spaces.Box = observation_space[self.input_uuid]
        assert isinstance(box_space, gym.spaces.Box), (
            "RNNActorCritic requires that"
            "observation space corresponding to the input uuid is a Box space."
        )
        assert len(box_space.shape) == 1
        self.in_dim = box_space.shape[0]

        self.state_encoder = RNNStateEncoder(
            input_size=self.in_dim,
            hidden_size=hidden_size,
            num_layers=num_layers,
            rnn_type=rnn_type,
            trainable_masked_hidden_state=True,
        )

        self.head_uuid = "{}_{}".format("rnn", input_uuid)

        self.ac_nonrecurrent_head: ActorCriticModel[Distr] = head_type(
            input_uuid=self.head_uuid,
            action_space=action_space,
            observation_space=SpaceDict(
                {
                    self.head_uuid: gym.spaces.Box(
                        low=np.float32(0.0), high=np.float32(1.0), shape=(hidden_size,)
                    )
                }
            ),
        )

        self.memory_key = "rnn"

    @property
    def recurrent_hidden_state_size(self) -> int:
        return self.hidden_size

    @property
    def num_recurrent_layers(self) -> int:
        return self.state_encoder.num_recurrent_layers

    def _recurrent_memory_specification(self):
        return {
            self.memory_key: (
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
        }

    def forward(  # type:ignore
        self,
        observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]],
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:

        if self.memory_key not in memory:
            get_logger().warning(
                f"Key {self.memory_key} not found in memory,"
                f" initializing this as all zeros."
            )

            obs = observations[self.input_uuid]
            memory.check_append(
                key=self.memory_key,
                tensor=obs.new(
                    self.num_recurrent_layers,
                    obs.shape[1],
                    self.recurrent_hidden_state_size,
                )
                .float()
                .zero_(),
                sampler_dim=1,
            )

        rnn_out, mem_return = self.state_encoder(
            x=observations[self.input_uuid],
            hidden_states=memory.tensor(self.memory_key),
            masks=masks,
        )

        # noinspection PyCallingNonCallable
        out, _ = self.ac_nonrecurrent_head(
            observations={self.head_uuid: rnn_out},
            memory=None,
            prev_actions=prev_actions,
            masks=masks,
        )

        # noinspection PyArgumentList
        return (
            out,
            memory.set_tensor(self.memory_key, mem_return),
        )


================================================
FILE: allenact/embodiedai/models/fusion_models.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Adapted from https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/

import math
from typing import Tuple

import torch
import torch.nn as nn


class Fusion(nn.Module):
    """Base class of belief fusion model from Auxiliary Tasks Speed Up Learning
    PointGoal Navigation (Ye, 2020) Child class should implement
    `get_belief_weights` function to generate weights to fuse the beliefs from
    all the auxiliary task into one."""

    def __init__(self, hidden_size, obs_embed_size, num_tasks):
        super().__init__()
        self.hidden_size = hidden_size  # H
        self.obs_embed_size = obs_embed_size  # Z
        self.num_tasks = num_tasks  # k

    def forward(
        self,
        all_beliefs: torch.FloatTensor,  # (T, N, H, K)
        obs_embeds: torch.FloatTensor,  # (T, N, Z)
    ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:  # (T, N, H), (T, N, K)

        num_steps, num_samplers, _, _ = all_beliefs.shape
        all_beliefs = all_beliefs.view(
            num_steps * num_samplers, self.hidden_size, self.num_tasks
        )
        obs_embeds = obs_embeds.view(num_steps * num_samplers, -1)

        weights = self.get_belief_weights(
            all_beliefs=all_beliefs,
            obs_embeds=obs_embeds,  # (T*N, H, K)  # (T*N, Z)
        ).unsqueeze(
            -1
        )  # (T*N, K, 1)

        beliefs = torch.bmm(all_beliefs, weights)  # (T*N, H, 1)

        beliefs = beliefs.squeeze(-1).view(num_steps, num_samplers, self.hidden_size)
        weights = weights.squeeze(-1).view(num_steps, num_samplers, self.num_tasks)

        return beliefs, weights

    def get_belief_weights(
        self,
        all_beliefs: torch.FloatTensor,  # (T*N, H, K)
        obs_embeds: torch.FloatTensor,  # (T*N, Z)
    ) -> torch.FloatTensor:  # (T*N, K)
        raise NotImplementedError()


class AverageFusion(Fusion):
    UUID = "avg"

    def get_belief_weights(
        self,
        all_beliefs: torch.FloatTensor,  # (T*N, H, K)
        obs_embeds: torch.FloatTensor,  # (T*N, Z)
    ) -> torch.FloatTensor:  # (T*N, K)

        batch_size = all_beliefs.shape[0]
        weights = torch.ones(batch_size, self.num_tasks).to(all_beliefs)
        weights /= self.num_tasks
        return weights


class SoftmaxFusion(Fusion):
    """Situational Fusion of Visual Representation for Visual Navigation
    https://arxiv.org/abs/1908.09073."""

    UUID = "smax"

    def __init__(self, hidden_size, obs_embed_size, num_tasks):
        super().__init__(hidden_size, obs_embed_size, num_tasks)
        # mapping from rnn input to task
        # ignore beliefs
        self.linear = nn.Linear(obs_embed_size, num_tasks)

    def get_belief_weights(
        self,
        all_beliefs: torch.Tensor,  # (T*N, H, K)
        obs_embeds: torch.Tensor,  # (T*N, Z)
    ) -> torch.Tensor:  # (T*N, K)

        scores = self.linear(obs_embeds)  # (T*N, K)
        weights = torch.softmax(scores, dim=-1)
        return weights


class AttentiveFusion(Fusion):
    """Attention is All You Need https://arxiv.org/abs/1706.03762 i.e. scaled
    dot-product attention."""

    UUID = "attn"

    def __init__(self, hidden_size, obs_embed_size, num_tasks):
        super().__init__(hidden_size, obs_embed_size, num_tasks)
        self.linear = nn.Linear(obs_embed_size, hidden_size)

    def get_belief_weights(
        self,
        all_beliefs: torch.Tensor,  # (T*N, H, K)
        obs_embeds: torch.Tensor,  # (T*N, Z)
    ) -> torch.Tensor:  # (T*N, K)

        queries = self.linear(obs_embeds).unsqueeze(1)  # (T*N, 1, H)
        scores = torch.bmm(queries, all_beliefs).squeeze(1)  # (T*N, K)
        weights = torch.softmax(
            scores / math.sqrt(self.hidden_size), dim=-1
        )  # (T*N, K)
        return weights


================================================
FILE: allenact/embodiedai/models/resnet.py
================================================
# Original work Copyright (c) Facebook, Inc. and its affiliates.
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Adapted from https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/

from typing import Optional

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from gym.spaces.dict import Dict as SpaceDict

from allenact.utils.model_utils import Flatten
from allenact.utils.system import get_logger


def conv3x3(in_planes, out_planes, stride=1, groups=1):
    """3x3 convolution with padding."""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=1,
        bias=False,
        groups=groups,
    )


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution."""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class BasicBlock(nn.Module):
    expansion = 1
    resneXt = False

    def __init__(
        self,
        inplanes,
        planes,
        ngroups,
        stride=1,
        downsample=None,
        cardinality=1,
    ):
        super(BasicBlock, self).__init__()
        self.convs = nn.Sequential(
            conv3x3(inplanes, planes, stride, groups=cardinality),
            nn.GroupNorm(ngroups, planes),
            nn.ReLU(True),
            conv3x3(planes, planes, groups=cardinality),
            nn.GroupNorm(ngroups, planes),
        )
        self.downsample = downsample
        self.relu = nn.ReLU(True)

    def forward(self, x):
        residual = x

        out = self.convs(x)

        if self.downsample is not None:
            residual = self.downsample(x)

        return self.relu(out + residual)


def _build_bottleneck_branch(inplanes, planes, ngroups, stride, expansion, groups=1):
    return nn.Sequential(
        conv1x1(inplanes, planes),
        nn.GroupNorm(ngroups, planes),
        nn.ReLU(True),
        conv3x3(planes, planes, stride, groups=groups),
        nn.GroupNorm(ngroups, planes),
        nn.ReLU(True),
        conv1x1(planes, planes * expansion),
        nn.GroupNorm(ngroups, planes * expansion),
    )


class SE(nn.Module):
    def __init__(self, planes, r=16):
        super().__init__()
        self.squeeze = nn.AdaptiveAvgPool2d(1)
        self.excite = nn.Sequential(
            nn.Linear(planes, int(planes / r)),
            nn.ReLU(True),
            nn.Linear(int(planes / r), planes),
            nn.Sigmoid(),
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        x = self.squeeze(x)
        x = x.view(b, c)
        x = self.excite(x)

        return x.view(b, c, 1, 1)


def _build_se_branch(planes, r=16):
    return SE(planes, r)


class Bottleneck(nn.Module):
    expansion = 4
    resneXt = False

    def __init__(
        self,
        inplanes,
        planes,
        ngroups,
        stride=1,
        downsample=None,
        cardinality=1,
    ):
        super().__init__()
        self.convs = _build_bottleneck_branch(
            inplanes,
            planes,
            ngroups,
            stride,
            self.expansion,
            groups=cardinality,
        )
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def _impl(self, x):
        identity = x

        out = self.convs(x)

        if self.downsample is not None:
            identity = self.downsample(x)

        return self.relu(out + identity)

    def forward(self, x):
        return self._impl(x)


class SEBottleneck(Bottleneck):
    def __init__(
        self,
        inplanes,
        planes,
        ngroups,
        stride=1,
        downsample=None,
        cardinality=1,
    ):
        super().__init__(inplanes, planes, ngroups, stride, downsample, cardinality)

        self.se = _build_se_branch(planes * self.expansion)

    def _impl(self, x):
        identity = x

        out = self.convs(x)
        out = self.se(out) * out

        if self.downsample is not None:
            identity = self.downsample(x)

        return self.relu(out + identity)


class SEResNeXtBottleneck(SEBottleneck):
    expansion = 2
    resneXt = True


class ResNeXtBottleneck(Bottleneck):
    expansion = 2
    resneXt = True


class GroupNormResNet(nn.Module):
    def __init__(self, in_channels, base_planes, ngroups, block, layers, cardinality=1):
        super(GroupNormResNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels,
                base_planes,
                kernel_size=7,
                stride=2,
                padding=3,
                bias=False,
            ),
            nn.GroupNorm(ngroups, base_planes),
            nn.ReLU(True),
        )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.cardinality = cardinality

        self.inplanes = base_planes
        if block.resneXt:
            base_planes *= 2

        self.layer1 = self._make_layer(block, ngroups, base_planes, layers[0])
        self.layer2 = self._make_layer(
            block, ngroups, base_planes * 2, layers[1], stride=2
        )
        self.layer3 = self._make_layer(
            block, ngroups, base_planes * 2 * 2, layers[2], stride=2
        )
        self.layer4 = self._make_layer(
            block, ngroups, base_planes * 2 * 2 * 2, layers[3], stride=2
        )

        self.final_channels = self.inplanes
        self.final_spatial_compress = 1.0 / (2**5)

    def _make_layer(self, block, ngroups, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                nn.GroupNorm(ngroups, planes * block.expansion),
            )

        layers = [
            block(
                self.inplanes,
                planes,
                ngroups,
                stride,
                downsample,
                cardinality=self.cardinality,
            )
        ]
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, ngroups))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        return x


def gnresnet18(in_channels, base_planes, ngroups):
    model = GroupNormResNet(in_channels, base_planes, ngroups, BasicBlock, [2, 2, 2, 2])

    return model


def gnresnet50(in_channels, base_planes, ngroups):
    model = GroupNormResNet(in_channels, base_planes, ngroups, Bottleneck, [3, 4, 6, 3])

    return model


def gnresneXt50(in_channels, base_planes, ngroups):
    model = GroupNormResNet(
        in_channels,
        base_planes,
        ngroups,
        ResNeXtBottleneck,
        [3, 4, 6, 3],
        cardinality=int(base_planes / 2),
    )

    return model


def se_gnresnet50(in_channels, base_planes, ngroups):
    model = GroupNormResNet(
        in_channels, base_planes, ngroups, SEBottleneck, [3, 4, 6, 3]
    )

    return model


def se_gnresneXt50(in_channels, base_planes, ngroups):
    model = GroupNormResNet(
        in_channels,
        base_planes,
        ngroups,
        SEResNeXtBottleneck,
        [3, 4, 6, 3],
        cardinality=int(base_planes / 2),
    )

    return model


def se_gnresneXt101(in_channels, base_planes, ngroups):
    model = GroupNormResNet(
        in_channels,
        base_planes,
        ngroups,
        SEResNeXtBottleneck,
        [3, 4, 23, 3],
        cardinality=int(base_planes / 2),
    )

    return model


class GroupNormResNetEncoder(nn.Module):
    def __init__(
        self,
        observation_space: SpaceDict,
        rgb_uuid: Optional[str],
        depth_uuid: Optional[str],
        output_size: int,
        baseplanes=32,
        ngroups=32,
        make_backbone=None,
    ):
        super().__init__()

        self._inputs = []

        self.rgb_uuid = rgb_uuid
        if self.rgb_uuid is not None:
            assert self.rgb_uuid in observation_space.spaces
            self._n_input_rgb = observation_space.spaces[self.rgb_uuid].shape[2]
            assert self._n_input_rgb >= 0
            self._inputs.append(self.rgb_uuid)
        else:
            self._n_input_rgb = 0

        self.depth_uuid = depth_uuid
        if self.depth_uuid is not None:
            assert self.depth_uuid in observation_space.spaces
            self._n_input_depth = observation_space.spaces[self.depth_uuid].shape[2]
            assert self._n_input_depth >= 0
            self._inputs.append(self.depth_uuid)
        else:
            self._n_input_depth = 0

        if not self.is_blind:
            spatial_size = (
                observation_space.spaces[self._inputs[0]].shape[0] // 2
            )  # H (=W) / 2

            # RGBD into one model
            input_channels = self._n_input_rgb + self._n_input_depth  # C

            self.backbone = make_backbone(input_channels, baseplanes, ngroups)

            final_spatial = int(
                np.ceil(spatial_size * self.backbone.final_spatial_compress)
            )  # fix bug in habitat that uses int()
            after_compression_flat_size = 2048
            num_compression_channels = int(
                round(after_compression_flat_size / (final_spatial**2))
            )
            self.compression = nn.Sequential(
                nn.Conv2d(
                    self.backbone.final_channels,
                    num_compression_channels,
                    kernel_size=3,
                    padding=1,
                    bias=False,
                ),
                nn.GroupNorm(1, num_compression_channels),
                nn.ReLU(True),
            )

            self.output_shape = (
                num_compression_channels,
                final_spatial,
                final_spatial,
            )

            self.head = nn.Sequential(
                Flatten(),
                nn.Linear(np.prod(self.output_shape), output_size),
                nn.ReLU(True),
            )

            self.layer_init()

    @property
    def is_blind(self):
        return self._n_input_rgb + self._n_input_depth == 0

    def layer_init(self):
        for layer in self.modules():
            if isinstance(layer, (nn.Conv2d, nn.Linear)):
                nn.init.kaiming_normal_(layer.weight, nn.init.calculate_gain("relu"))
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, val=0)
        get_logger().debug("Initializing resnet encoder")

    def forward(self, observations):
        if self.is_blind:
            return None

        # TODO: the reshape follows compute_cnn_output()
        # but it's hard to make the forward as a nn.Module as cnn param
        nagents: Optional[int] = None
        nsteps: Optional[int] = None
        nsamplers: Optional[int] = None
        assert len(self._inputs) > 0

        cnn_input = []
        for mode in self._inputs:
            mode_obs = observations[mode]
            assert len(mode_obs.shape) in [
                5,
                6,
            ], "CNN input must have shape [STEP, SAMPLER, (AGENT,) dim1, dim2, dim3]"
            if len(mode_obs.shape) == 6:
                nsteps, nsamplers, nagents = mode_obs.shape[:3]
            else:
                nsteps, nsamplers = mode_obs.shape[:2]
            # Make FLAT_BATCH = nsteps * nsamplers (* nagents)
            mode_obs = mode_obs.view(
                (-1,) + mode_obs.shape[2 + int(nagents is not None) :]
            )
            # permute tensor to dimension [BATCH x CHANNEL x HEIGHT X WIDTH]
            mode_obs = mode_obs.permute(0, 3, 1, 2)
            cnn_input.append(mode_obs)

        x = torch.cat(cnn_input, dim=1)
        x = F.avg_pool2d(x, 2)  # 2x downsampling

        x = self.backbone(x)  # (256, 4, 4)
        x = self.compression(x)  # (128, 4, 4)
        x = self.head(x)  # (2048) -> (hidden_size)

        if nagents is not None:
            x = x.reshape(
                (
                    nsteps,
                    nsamplers,
                    nagents,
                )
                + x.shape[1:]
            )
        else:
            x = x.reshape(
                (
                    nsteps,
                    nsamplers,
                )
                + x.shape[1:]
            )

        return x


================================================
FILE: allenact/embodiedai/models/visual_nav_models.py
================================================
from collections import OrderedDict
from typing import Tuple, Dict, Optional, List, Sequence
from typing import TypeVar

import gym
import torch
import torch.nn as nn
from gym.spaces.dict import Dict as SpaceDict

from allenact.algorithms.onpolicy_sync.policy import (
    ActorCriticModel,
    LinearCriticHead,
    LinearActorHead,
    ObservationType,
    DistributionType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput, Memory
from allenact.embodiedai.aux_losses.losses import MultiAuxTaskNegEntropyLoss
from allenact.embodiedai.models.aux_models import AuxiliaryModel
from allenact.embodiedai.models.basic_models import RNNStateEncoder
from allenact.embodiedai.models.fusion_models import Fusion
from allenact.utils.model_utils import FeatureEmbedding
from allenact.utils.system import get_logger

FusionType = TypeVar("FusionType", bound=Fusion)


class VisualNavActorCritic(ActorCriticModel[CategoricalDistr]):
    """Base class of visual navigation / manipulation (or broadly, embodied AI)
    model.

    `forward_encoder` function requires implementation.
    """

    action_space: gym.spaces.Discrete

    def __init__(
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        hidden_size=512,
        multiple_beliefs=False,
        beliefs_fusion: Optional[FusionType] = None,
        auxiliary_uuids: Optional[List[str]] = None,
        auxiliary_model_class=AuxiliaryModel,
    ):
        super().__init__(action_space=action_space, observation_space=observation_space)
        self._hidden_size = hidden_size
        assert multiple_beliefs == (beliefs_fusion is not None)
        self.multiple_beliefs = multiple_beliefs
        self.beliefs_fusion = beliefs_fusion
        self.auxiliary_uuids = auxiliary_uuids
        if isinstance(self.auxiliary_uuids, list) and len(self.auxiliary_uuids) == 0:
            self.auxiliary_uuids = None

        # Define the placeholders in init function
        self.state_encoders: Optional[nn.ModuleDict] = None
        self.aux_models: Optional[nn.ModuleDict] = None
        self.actor: Optional[LinearActorHead] = None
        self.critic: Optional[LinearCriticHead] = None
        self.prev_action_embedder: Optional[FeatureEmbedding] = None

        self.fusion_model: Optional[nn.Module] = None
        self.belief_names: Optional[Sequence[str]] = None
        self.auxiliary_model_class = auxiliary_model_class

    def create_state_encoders(
        self,
        obs_embed_size: int,
        prev_action_embed_size: int,
        num_rnn_layers: int,
        rnn_type: str,
        add_prev_actions: bool,
        add_prev_action_null_token: bool,
        trainable_masked_hidden_state=False,
    ):
        rnn_input_size = obs_embed_size
        self.prev_action_embedder = FeatureEmbedding(
            input_size=int(add_prev_action_null_token) + self.action_space.n,
            output_size=prev_action_embed_size if add_prev_actions else 0,
        )
        if add_prev_actions:
            rnn_input_size += prev_action_embed_size

        state_encoders = OrderedDict()  # perserve insertion order in py3.6
        if self.multiple_beliefs:  # multiple belief model
            for aux_uuid in self.auxiliary_uuids:
                state_encoders[aux_uuid] = RNNStateEncoder(
                    rnn_input_size,
                    self._hidden_size,
                    num_layers=num_rnn_layers,
                    rnn_type=rnn_type,
                    trainable_masked_hidden_state=trainable_masked_hidden_state,
                )
            # create fusion model
            self.fusion_model = self.beliefs_fusion(
                hidden_size=self._hidden_size,
                obs_embed_size=obs_embed_size,
                num_tasks=len(self.auxiliary_uuids),
            )

        else:  # single belief model
            state_encoders["single_belief"] = RNNStateEncoder(
                rnn_input_size,
                self._hidden_size,
                num_layers=num_rnn_layers,
                rnn_type=rnn_type,
                trainable_masked_hidden_state=trainable_masked_hidden_state,
            )

        self.state_encoders = nn.ModuleDict(state_encoders)

        self.belief_names = list(self.state_encoders.keys())

        get_logger().info(
            "there are {} belief models: {}".format(
                len(self.belief_names), self.belief_names
            )
        )

    def load_state_dict(self, state_dict, **kwargs):
        new_state_dict = OrderedDict()
        for key in state_dict.keys():
            if "state_encoder." in key:  # old key name
                new_key = key.replace("state_encoder.", "state_encoders.single_belief.")
            elif "goal_visual_encoder.embed_class" in key:
                new_key = key.replace(
                    "goal_visual_encoder.embed_class", "goal_visual_encoder.embed_goal"
                )
            else:
                new_key = key
            new_state_dict[new_key] = state_dict[key]

        return super().load_state_dict(new_state_dict, **kwargs)  # compatible in keys

    def create_actorcritic_head(self):
        self.actor = LinearActorHead(self._hidden_size, self.action_space.n)
        self.critic = LinearCriticHead(self._hidden_size)

    def create_aux_models(self, obs_embed_size: int, action_embed_size: int):
        if self.auxiliary_uuids is None:
            return
        aux_models = OrderedDict()
        for aux_uuid in self.auxiliary_uuids:
            aux_models[aux_uuid] = self.auxiliary_model_class(
                aux_uuid=aux_uuid,
                action_dim=self.action_space.n,
                obs_embed_dim=obs_embed_size,
                belief_dim=self._hidden_size,
                action_embed_size=action_embed_size,
            )

        self.aux_models = nn.ModuleDict(aux_models)

    @property
    def num_recurrent_layers(self):
        """Number of recurrent hidden layers."""
        return list(self.state_encoders.values())[0].num_recurrent_layers

    @property
    def recurrent_hidden_state_size(self):
        """The recurrent hidden state size of a single model."""
        return self._hidden_size

    def _recurrent_memory_specification(self):
        return {
            memory_key: (
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
            for memory_key in self.belief_names
        }

    def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:
        raise NotImplementedError("Obs Encoder Not Implemented")

    def fuse_beliefs(
        self,
        beliefs_dict: Dict[str, torch.FloatTensor],
        obs_embeds: torch.FloatTensor,
    ) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]:
        all_beliefs = torch.stack(list(beliefs_dict.values()), dim=-1)  # (T, N, H, k)

        if self.multiple_beliefs:  # call the fusion model
            return self.fusion_model(all_beliefs=all_beliefs, obs_embeds=obs_embeds)
        # single belief
        beliefs = all_beliefs.squeeze(-1)  # (T,N,H)
        return beliefs, None

    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        """Processes input batched observations to produce new actor and critic
        values. Processes input batched observations (along with prior hidden
        states, previous actions, and masks denoting which recurrent hidden
        states should be masked) and returns an `ActorCriticOutput` object
        containing the model's policy (distribution over actions) and
        evaluation of the current state (value).

        # Parameters
        observations : Batched input observations.
        memory : `Memory` containing the hidden states from initial timepoints.
        prev_actions : Tensor of previous actions taken.
        masks : Masks applied to hidden states. See `RNNStateEncoder`.
        # Returns
        Tuple of the `ActorCriticOutput` and recurrent hidden state.
        """

        # 1.1 use perception model (i.e. encoder) to get observation embeddings
        obs_embeds = self.forward_encoder(observations)

        # 1.2 use embedding model to get prev_action embeddings
        if self.prev_action_embedder.input_size == self.action_space.n + 1:
            # In this case we have a unique embedding for the start of an episode
            prev_actions_embeds = self.prev_action_embedder(
                torch.where(
                    condition=0 != masks.view(*prev_actions.shape),
                    input=prev_actions + 1,
                    other=torch.zeros_like(prev_actions),
                )
            )
        else:
            prev_actions_embeds = self.prev_action_embedder(prev_actions)
        joint_embeds = torch.cat((obs_embeds, prev_actions_embeds), dim=-1)  # (T, N, *)

        # 2. use RNNs to get single/multiple beliefs
        beliefs_dict = {}
        for key, model in self.state_encoders.items():
            beliefs_dict[key], rnn_hidden_states = model(
                joint_embeds, memory.tensor(key), masks
            )
            memory.set_tensor(key, rnn_hidden_states)  # update memory here

        # 3. fuse beliefs for multiple belief models
        beliefs, task_weights = self.fuse_beliefs(
            beliefs_dict, obs_embeds
        )  # fused beliefs

        # 4. prepare output
        extras = (
            {
                aux_uuid: {
                    "beliefs": (
                        beliefs_dict[aux_uuid] if self.multiple_beliefs else beliefs
                    ),
                    "obs_embeds": obs_embeds,
                    "aux_model": (
                        self.aux_models[aux_uuid]
                        if aux_uuid in self.aux_models
                        else None
                    ),
                }
                for aux_uuid in self.auxiliary_uuids
            }
            if self.auxiliary_uuids is not None
            else {}
        )

        if self.multiple_beliefs:
            extras[MultiAuxTaskNegEntropyLoss.UUID] = task_weights

        actor_critic_output = ActorCriticOutput(
            distributions=self.actor(beliefs),
            values=self.critic(beliefs),
            extras=extras,
        )

        return actor_critic_output, memory


================================================
FILE: allenact/embodiedai/preprocessors/__init__.py
================================================


================================================
FILE: allenact/embodiedai/preprocessors/resnet.py
================================================
from typing import List, Callable, Optional, Any, cast, Dict

import gym
import numpy as np
import torch
import torch.nn as nn
from torchvision import models

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.misc_utils import prepare_locals_for_super


class ResNetEmbedder(nn.Module):
    def __init__(self, resnet, pool=True):
        super().__init__()
        self.model = resnet
        self.pool = pool
        self.eval()

    def forward(self, x):
        with torch.no_grad():
            x = self.model.conv1(x)
            x = self.model.bn1(x)
            x = self.model.relu(x)
            x = self.model.maxpool(x)

            x = self.model.layer1(x)
            x = self.model.layer2(x)
            x = self.model.layer3(x)
            x = self.model.layer4(x)

            if not self.pool:
                return x
            else:
                x = self.model.avgpool(x)
                x = torch.flatten(x, 1)
                return x


class ResNetPreprocessor(Preprocessor):
    """Preprocess RGB or depth image using a ResNet model."""

    def __init__(
        self,
        input_uuids: List[str],
        output_uuid: str,
        input_height: int,
        input_width: int,
        output_height: int,
        output_width: int,
        output_dims: int,
        pool: bool,
        torchvision_resnet_model: Callable[..., models.ResNet] = models.resnet18,
        device: Optional[torch.device] = None,
        device_ids: Optional[List[torch.device]] = None,
        **kwargs: Any,
    ):
        def f(x, k):
            assert k in x, "{} must be set in ResNetPreprocessor".format(k)
            return x[k]

        def optf(x, k, default):
            return x[k] if k in x else default

        self.input_height = input_height
        self.input_width = input_width
        self.output_height = output_height
        self.output_width = output_width
        self.output_dims = output_dims
        self.pool = pool
        self.make_model = torchvision_resnet_model

        self.device = torch.device("cpu") if device is None else device
        self.device_ids = device_ids or cast(
            List[torch.device], list(range(torch.cuda.device_count()))
        )

        self._resnet: Optional[ResNetEmbedder] = None

        low = -np.inf
        high = np.inf
        shape = (self.output_dims, self.output_height, self.output_width)

        assert (
            len(input_uuids) == 1
        ), "resnet preprocessor can only consume one observation type"

        observation_space = gym.spaces.Box(low=low, high=high, shape=shape)

        super().__init__(**prepare_locals_for_super(locals()))

    @property
    def resnet(self) -> ResNetEmbedder:
        if self._resnet is None:
            self._resnet = ResNetEmbedder(
                self.make_model(pretrained=True).to(self.device), pool=self.pool
            )
        return self._resnet

    def to(self, device: torch.device) -> "ResNetPreprocessor":
        self._resnet = self.resnet.to(device)
        self.device = device
        return self

    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
        x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)  # bhwc -> bchw
        # If the input is depth, repeat it across all 3 channels
        if x.shape[1] == 1:
            x = x.repeat(1, 3, 1, 1)
        return self.resnet(x.to(self.device))


================================================
FILE: allenact/embodiedai/sensors/__init__.py
================================================


================================================
FILE: allenact/embodiedai/sensors/vision_sensors.py
================================================
from abc import abstractmethod, ABC
from typing import Optional, Tuple, Any, cast, Union, Sequence

import PIL
import gym
import numpy as np
from torchvision import transforms

from allenact.base_abstractions.misc import EnvType
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import SubTaskType
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact.utils.tensor_utils import ScaleBothSides

IMAGENET_RGB_MEANS: Tuple[float, float, float] = (0.485, 0.456, 0.406)
IMAGENET_RGB_STDS: Tuple[float, float, float] = (0.229, 0.224, 0.225)


class VisionSensor(Sensor[EnvType, SubTaskType]):
    def __init__(
        self,
        mean: Union[Sequence[float], np.ndarray, None] = None,
        stdev: Union[Sequence[float], np.ndarray, None] = None,
        height: Optional[int] = None,
        width: Optional[int] = None,
        uuid: str = "vision",
        output_shape: Optional[Tuple[int, ...]] = None,
        output_channels: Optional[int] = None,
        unnormalized_infimum: float = -np.inf,
        unnormalized_supremum: float = np.inf,
        scale_first: bool = True,
        **kwargs: Any
    ):
        """Initializer.

        # Parameters

        mean : The images will be normalized with the given mean
        stdev : The images will be normalized with the given standard deviations.
        height : If it's a non-negative integer and `width` is also non-negative integer, the image returned from the
                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
        width : If it's a non-negative integer and `height` is also non-negative integer, the image returned from the
                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
        uuid : The universally unique identifier for the sensor.
        output_shape : Optional observation space shape (alternative to `output_channels`).
        output_channels : Optional observation space number of channels (alternative to `output_shape`).
        unnormalized_infimum : Lower limit(s) for the observation space range.
        unnormalized_supremum : Upper limit(s) for the observation space range.
        scale_first : Whether to scale image before normalization (if needed).
        kwargs : Extra kwargs. Currently unused.
        """

        self._norm_means = np.array(mean) if mean is not None else None
        self._norm_sds = np.array(stdev) if stdev is not None else None

        assert (self._norm_means is None) == (self._norm_sds is None), (
            "In VisionSensor's config, "
            "either both mean/stdev must be None or neither."
        )
        self._should_normalize = self._norm_means is not None

        self._height = height
        self._width = width
        assert (self._width is None) == (self._height is None), (
            "In VisionSensor's config, "
            "either both height/width must be None or neither."
        )

        self._scale_first = scale_first

        self.scaler: Optional[ScaleBothSides] = None
        if self._width is not None:
            self.scaler = ScaleBothSides(
                width=cast(int, self._width), height=cast(int, self._height)
            )

        self.to_pil = transforms.ToPILImage()  # assumes mode="RGB" for 3 channels

        self._observation_space = self._make_observation_space(
            output_shape=output_shape,
            output_channels=output_channels,
            unnormalized_infimum=unnormalized_infimum,
            unnormalized_supremum=unnormalized_supremum,
        )

        assert int(PIL.__version__.split(".")[0]) != 7, (
            "We found that Pillow version >=7.* has broken scaling,"
            " please downgrade to version 6.2.1 or upgrade to >=8.0.0"
        )

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _make_observation_space(
        self,
        output_shape: Optional[Tuple[int, ...]],
        output_channels: Optional[int],
        unnormalized_infimum: float,
        unnormalized_supremum: float,
    ) -> gym.spaces.Box:
        assert output_shape is None or output_channels is None, (
            "In VisionSensor's config, "
            "only one of output_shape and output_channels can be not None."
        )

        shape: Optional[Tuple[int, ...]] = None
        if output_shape is not None:
            shape = output_shape
        elif self._height is not None and output_channels is not None:
            shape = (
                cast(int, self._height),
                cast(int, self._width),
                cast(int, output_channels),
            )

        if not self._should_normalize or shape is None or len(shape) == 1:
            return gym.spaces.Box(
                low=np.float32(unnormalized_infimum),
                high=np.float32(unnormalized_supremum),
                shape=shape,
            )
        else:
            out_shape = shape[:-1] + (1,)
            low = np.tile(
                (unnormalized_infimum - cast(np.ndarray, self._norm_means))
                / cast(np.ndarray, self._norm_sds),
                out_shape,
            )
            high = np.tile(
                (unnormalized_supremum - cast(np.ndarray, self._norm_means))
                / cast(np.ndarray, self._norm_sds),
                out_shape,
            )
            return gym.spaces.Box(low=np.float32(low), high=np.float32(high))

    def _get_observation_space(self):
        return self._observation_space

    @property
    def height(self) -> Optional[int]:
        """Height that input image will be rescale to have.

        # Returns

        The height as a non-negative integer or `None` if no rescaling is done.
        """
        return self._height

    @property
    def width(self) -> Optional[int]:
        """Width that input image will be rescale to have.

        # Returns

        The width as a non-negative integer or `None` if no rescaling is done.
        """
        return self._width

    @abstractmethod
    def frame_from_env(self, env: EnvType, task: Optional[SubTaskType]) -> np.ndarray:
        raise NotImplementedError

    def process_img(self, img: np.ndarray):
        assert (
            np.issubdtype(img.dtype, np.float32)
            and (len(img.shape) == 2 or img.shape[-1] == 1)
        ) or (img.shape[-1] == 3 and np.issubdtype(img.dtype, np.uint8)), (
            "Input frame must either have 3 channels and be of"
            " type np.uint8 or have one channel and be of type np.float32"
        )

        if (
            self._scale_first
            and self.scaler is not None
            and img.shape[:2] != (self._height, self._width)
        ):
            img = np.array(self.scaler(self.to_pil(img)), dtype=img.dtype)  # hwc
        elif np.issubdtype(img.dtype, np.float32):
            img = img.copy()

        assert img.dtype in [np.uint8, np.float32]

        if np.issubdtype(img.dtype, np.uint8):
            img = img.astype(np.float32) / 255.0

        if self._should_normalize:
            img -= self._norm_means
            img /= self._norm_sds

        if (
            (not self._scale_first)
            and self.scaler is not None
            and img.shape[:2] != (self._height, self._width)
        ):
            img = np.array(self.scaler(self.to_pil(img)), dtype=np.float32)  # hwc

        return img

    def get_observation(
        self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any
    ) -> Any:
        return self.process_img(self.frame_from_env(env=env, task=task))


class RGBSensor(VisionSensor[EnvType, SubTaskType], ABC):
    def __init__(
        self,
        use_resnet_normalization: bool = False,
        mean: Optional[Union[np.ndarray, Sequence[float]]] = IMAGENET_RGB_MEANS,
        stdev: Optional[Union[np.ndarray, Sequence[float]]] = IMAGENET_RGB_STDS,
        height: Optional[int] = None,
        width: Optional[int] = None,
        uuid: str = "rgb",
        output_shape: Optional[Tuple[int, ...]] = None,
        output_channels: int = 3,
        unnormalized_infimum: float = 0.0,
        unnormalized_supremum: float = 1.0,
        scale_first: bool = True,
        **kwargs: Any
    ):
        """Initializer.

        # Parameters

        use_resnet_normalization : Whether to apply image normalization with the given `mean` and `stdev`.
        mean : The images will be normalized with the given mean if `use_resnet_normalization` is True (default
               `[0.485, 0.456, 0.406]`, i.e. the standard resnet normalization mean).
        stdev : The images will be normalized with the given standard deviation if `use_resnet_normalization` is True
                (default `[0.229, 0.224, 0.225]`, i.e. the standard resnet normalization standard deviation).
        height: If it's a non-negative integer and `width` is also non-negative integer, the image returned from the
                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
        width: If it's a non-negative integer and `height` is also non-negative integer, the image returned from the
                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
        uuid: The universally unique identifier for the sensor.
        output_shape: Optional observation space shape (alternative to `output_channels`).
        output_channels: Optional observation space number of channels (alternative to `output_shape`).
        unnormalized_infimum: Lower limit(s) for the observation space range.
        unnormalized_supremum: Upper limit(s) for the observation space range.
        scale_first: Whether to scale image before normalization (if needed).
        kwargs : Extra kwargs. Currently unused.
        """

        if not use_resnet_normalization:
            mean, stdev = None, None

        if isinstance(mean, tuple):
            mean = np.array(mean, dtype=np.float32).reshape((1, 1, len(mean)))
        if isinstance(stdev, tuple):
            stdev = np.array(stdev, dtype=np.float32).reshape((1, 1, len(stdev)))

        super().__init__(**prepare_locals_for_super(locals()))


class DepthSensor(VisionSensor[EnvType, SubTaskType], ABC):
    def __init__(
        self,
        use_normalization: bool = False,
        mean: Optional[Union[np.ndarray, float]] = 0.5,
        stdev: Optional[Union[np.ndarray, float]] = 0.25,
        height: Optional[int] = None,
        width: Optional[int] = None,
        uuid: str = "depth",
        output_shape: Optional[Tuple[int, ...]] = None,
        output_channels: int = 1,
        unnormalized_infimum: float = 0.0,
        unnormalized_supremum: float = 5.0,
        scale_first: bool = True,
        **kwargs: Any
    ):
        """Initializer.

        # Parameters

        config : If `config["use_normalization"]` is `True` then the depth images will be normalized
            with mean 0.5 and standard deviation 0.25. If both `config["height"]` and `config["width"]` are
            non-negative integers then the depth image returned from the environment will be rescaled to have shape
            (config["height"], config["width"]) using bilinear sampling.
        use_normalization : Whether to apply image normalization with the given `mean` and `stdev`.
        mean : The images will be normalized with the given mean if `use_normalization` is True (default 0.5).
        stdev : The images will be normalized with the given standard deviation if `use_normalization` is True
                (default 0.25).
        height: If it's a non-negative integer and `width` is also non-negative integer, the image returned from the
                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
        width: If it's a non-negative integer and `height` is also non-negative integer, the image returned from the
                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.
        uuid: The universally unique identifier for the sensor.
        output_shape: Optional observation space shape (alternative to `output_channels`).
        output_channels: Optional observation space number of channels (alternative to `output_shape`).
        unnormalized_infimum: Lower limit(s) for the observation space range.
        unnormalized_supremum: Upper limit(s) for the observation space range.
        scale_first: Whether to scale image before normalization (if needed).
        kwargs : Extra kwargs. Currently unused.
        """

        if not use_normalization:
            mean, stdev = None, None

        if isinstance(mean, float):
            mean = np.array(mean, dtype=np.float32).reshape(1, 1)
        if isinstance(stdev, float):
            stdev = np.array(stdev, dtype=np.float32).reshape(1, 1)

        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(  # type: ignore
        self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any
    ) -> Any:
        depth = super().get_observation(env, task, *args, **kwargs)
        depth = np.expand_dims(depth, 2)

        return depth


================================================
FILE: allenact/embodiedai/storage/__init__.py
================================================


================================================
FILE: allenact/embodiedai/storage/vdr_storage.py
================================================
import math
import random
from collections import defaultdict
from typing import Union, Tuple, Optional, Dict, Callable, cast, Sequence

import torch
import torch.nn.functional as F

from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.algorithms.onpolicy_sync.storage import (
    MiniBatchStorageMixin,
    ExperienceStorage,
)
from allenact.base_abstractions.misc import (
    GenericAbstractLoss,
    ModelType,
    Memory,
    LossOutput,
)
from allenact.utils.misc_utils import unzip, partition_sequence


def _index_recursive(d: Dict, key: Union[str, Tuple[str, ...]]):
    if isinstance(key, str):
        return d[key]
    for k in key:
        d = d[k]
    return d


class InverseDynamicsVDRLoss(GenericAbstractLoss):
    def __init__(
        self,
        compute_action_logits_fn: Callable,
        img0_key: str,
        img1_key: str,
        action_key: str,
    ):
        self.compute_action_logits_fn = compute_action_logits_fn
        self.img0_key = img0_key
        self.img1_key = img1_key
        self.action_key = action_key

    def loss(
        self,
        *,
        model: ModelType,
        batch: ObservationType,
        batch_memory: Memory,
        stream_memory: Memory,
    ) -> LossOutput:
        action_logits = self.compute_action_logits_fn(
            model=model,
            img0=batch[self.img0_key],
            img1=batch[self.img1_key],
        )
        loss = F.cross_entropy(action_logits, target=batch[self.action_key])
        return LossOutput(
            value=loss,
            info={"cross_entropy": loss.item()},
            per_epoch_info={},
            batch_memory=batch_memory,
            stream_memory=stream_memory,
            bsize=int(batch[self.img0_key].shape[0]),
        )


class DiscreteVisualDynamicsReplayStorage(ExperienceStorage, MiniBatchStorageMixin):
    def __init__(
        self,
        image_uuid: Union[str, Tuple[str, ...]],
        action_success_uuid: Optional[Union[str, Tuple[str, ...]]],
        nactions: int,
        num_to_store_per_action: int,
        max_to_save_per_episode: int,
        target_batch_size: int,
        extra_targets: Optional[Sequence] = None,
    ):
        self.image_uuid = image_uuid
        self.action_success_uuid = action_success_uuid
        self.nactions = nactions
        self.num_to_store_per_action = num_to_store_per_action
        self.max_to_save_per_episode = max_to_save_per_episode
        self.target_batch_size = target_batch_size
        self.extra_targets = extra_targets if extra_targets is not None else []

        self._prev_imgs: Optional[torch.Tensor] = None

        self.action_to_saved_transitions = {i: [] for i in range(nactions)}
        self.action_to_num_seen = {i: 0 for i in range(nactions)}
        self.task_sampler_to_actions_already_sampled = defaultdict(lambda: set())

        self.device = torch.device("cpu")

        self._total_samples_returned_in_batches = 0

    @property
    def total_experiences(self):
        return self._total_samples_returned_in_batches

    def set_partition(self, index: int, num_parts: int):
        self.num_to_store_per_action = math.ceil(
            self.num_to_store_per_action / num_parts
        )
        self.target_batch_size = math.ceil(self.target_batch_size / num_parts)

    def initialize(self, *, observations: ObservationType, **kwargs):
        self._prev_imgs = None
        self.add(observations=observations, actions=None, masks=None)

    def batched_experience_generator(self, num_mini_batch: int):
        triples = [
            (i0, a, i1)
            for a, v in self.action_to_saved_transitions.items()
            for (i0, i1) in v
        ]
        random.shuffle(triples)

        if len(triples) == 0:
            return

        parts = partition_sequence(
            triples, math.ceil(len(triples) / self.target_batch_size)
        )
        for part in parts:
            img0s, actions, img1s = unzip(part, n=3)

            img0 = torch.stack([i0.to(self.device) for i0 in img0s], 0)
            action = torch.tensor(actions, device=self.device)
            img1 = torch.stack([i1.to(self.device) for i1 in img1s], 0)

            self._total_samples_returned_in_batches += img0.shape[0]
            yield {"img0": img0, "action": action, "img1": img1}

    def add(
        self,
        *,
        observations: ObservationType,
        actions: Optional[torch.Tensor],
        masks: Optional[torch.Tensor],
        **kwargs,
    ):
        cur_imgs = cast(
            torch.Tensor, _index_recursive(d=observations, key=self.image_uuid).cpu()
        )

        if self._prev_imgs is not None:
            actions = actions.view(-1).cpu().numpy()
            masks = masks.view(-1).cpu().numpy()

            if self.action_success_uuid is not None:
                action_successes = (
                    observations[self.action_success_uuid].cpu().view(-1).numpy()
                )
            else:
                action_successes = [True] * actions.shape[0]

            extra = {}
            for et in self.extra_targets:
                extra[et] = observations[et][0].cpu().numpy()

            nsamplers = actions.shape[0]
            assert nsamplers == masks.shape[0]

            for i, (a, m, action_success) in enumerate(
                zip(actions, masks, action_successes)
            ):
                actions_already_sampled_in_ep = (
                    self.task_sampler_to_actions_already_sampled[i]
                )

                if (
                    m != 0
                    and action_success
                    and (
                        len(actions_already_sampled_in_ep)
                        <= self.max_to_save_per_episode
                    )
                    and a not in actions_already_sampled_in_ep
                ):  # Not the start of a new episode/task -> self._prev_imgs[i] corresponds to cur_imgs[i]
                    saved_transitions = self.action_to_saved_transitions[a]

                    if len(saved_transitions) < self.num_to_store_per_action:
                        saved_transitions.append((self._prev_imgs[i], cur_imgs[i]))
                    else:
                        saved_transitions[
                            random.randint(0, len(saved_transitions) - 1)
                        ] = (
                            self._prev_imgs[i],
                            cur_imgs[i],
                        )

                    # Reservoir sampling transitions
                    # a = int(a)
                    # saved_transitions = self.action_to_saved_transitions[a]
                    # num_seen = self.action_to_num_seen[a]
                    # if num_seen < self.triples_to_save_per_action:
                    #     saved_transitions.append((self._prev_imgs[i], cur_imgs[i]))
                    # else:
                    #     index = random.randint(0, num_seen)
                    #     if index < self.triples_to_save_per_action:
                    #         saved_transitions[index] = (self._prev_imgs[i], cur_imgs[i])

                    actions_already_sampled_in_ep.add(a)
                    self.action_to_num_seen[a] += 1
                else:
                    actions_already_sampled_in_ep.clear()

        self._prev_imgs = cur_imgs

    def before_updates(self, **kwargs):
        pass

    def after_updates(self, **kwargs):
        pass

    def to(self, device: torch.device):
        self.device = device


================================================
FILE: allenact/main.py
================================================
"""Entry point to training/validating/testing for a user given experiment
name."""

import os

if "CUDA_DEVICE_ORDER" not in os.environ:
    # Necessary to order GPUs correctly in some cases
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

import argparse
import ast
import importlib
import inspect
import json
from typing import Dict, List, Optional, Tuple, Type

from setproctitle import setproctitle as ptitle

from allenact import __version__
from allenact.algorithms.onpolicy_sync.runner import (
    CONFIG_KWARGS_STR,
    OnPolicyRunner,
    SaveDirFormat,
)
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.utils.system import HUMAN_LOG_LEVELS, get_logger, init_logging


def get_argument_parser():
    """Creates the argument parser."""

    # noinspection PyTypeChecker
    parser = argparse.ArgumentParser(
        description="allenact",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        "experiment",
        type=str,
        help="the path to experiment config file relative the 'experiment_base' directory"
        " (see the `--experiment_base` flag).",
    )

    parser.add_argument(
        "--eval",
        dest="eval",
        action="store_true",
        required=False,
        help="if you pass the `--eval` flag, AllenAct will run inference on your experiment configuration."
        " You will need to specify which experiment checkpoints to run evaluation using the `--checkpoint`"
        " flag.",
    )
    parser.set_defaults(eval=False)

    parser.add_argument(
        "--config_kwargs",
        type=str,
        default=None,
        required=False,
        help="sometimes it is useful to be able to pass additional key-word arguments"
        " to `__init__` when initializing an experiment configuration. This flag can be used"
        " to pass such key-word arugments by specifying them with json, e.g."
        '\n\t--config_kwargs \'{"gpu_id": 0, "my_important_variable": [1,2,3]}\''
        "\nTo see which arguments are supported for your experiment see the experiment"
        " config's `__init__` function. If the value passed to this function is a file path"
        " then we will try to load this file path as a json object and use this json object"
        " as key-word arguments.",
    )

    parser.add_argument(
        "--extra_tag",
        type=str,
        default="",
        required=False,
        help="Add an extra tag to the experiment when trying out new ideas (will be used"
        " as a subdirectory of the tensorboard path so you will be able to"
        " search tensorboard logs using this extra tag). This can also be used to add an extra"
        " organization when running evaluation (e.g. `--extra_tag running_eval_on_great_idea_12`)",
    )

    parser.add_argument(
        "-o",
        "--output_dir",
        required=False,
        type=str,
        default="experiment_output",
        help="experiment output folder",
    )

    parser.add_argument(
        "--save_dir_fmt",
        required=False,
        type=lambda s: SaveDirFormat[s.upper()],
        default="flat",
        help="The file structure to use when saving results from allenact."
        " See documentation o f`SaveDirFormat` for more details."
        " Allowed values are ('flat' and 'nested'). Default: 'flat'.",
    )

    parser.add_argument(
        "-s",
        "--seed",
        required=False,
        default=None,
        type=int,
        help="random seed",
    )
    parser.add_argument(
        "-b",
        "--experiment_base",
        required=False,
        default=os.getcwd(),
        type=str,
        help="experiment configuration base folder (default: working directory)",
    )
    parser.add_argument(
        "-c",
        "--checkpoint",
        required=False,
        default=None,
        type=str,
        help="optional checkpoint file name to resume training on or run testing with. When testing (see the `--eval` flag) this"
        " argument can be used very flexibly as:"
        "\n(1) the path to a particular individual checkpoint file,"
        "\n(2) the path to a directory of checkpoint files all of which you'd like to be evaluated"
        " (checkpoints are expected to have a `.pt` file extension),"
        '\n(3) a "glob" pattern (https://tldp.org/LDP/abs/html/globbingref.html) that will be expanded'
        " using python's `glob.glob` function and should return a collection of checkpoint files."
        "\nIf you'd like to only evaluate a subset of the checkpoints specified by the above directory/glob"
        " (e.g. every checkpoint saved after 5mil steps) you'll likely want to use the `--approx_ckpt_step_interval`"
        " flag.",
    )
    parser.add_argument(
        "--infer_output_dir",
        dest="infer_output_dir",
        action="store_true",
        required=False,
        help="applied when evaluating checkpoint(s) in nested save_dir_fmt: if specified, the output dir will be inferred from checkpoint path.",
    )
    parser.add_argument(
        "--approx_ckpt_step_interval",
        required=False,
        default=None,
        type=float,
        help="if running tests on a collection of checkpoints (see the `--checkpoint` flag) this argument can be"
        " used to skip checkpoints. In particular, if this value is specified and equals `n` then we will"
        " only evaluate checkpoints whose step count is closest to each of `0*n`, `1*n`, `2*n`, `3*n`, ... "
        " n * ceil(max training steps in ckpts / n). Note that 'closest to' is important here as AllenAct does"
        " not generally save checkpoints at exact intervals (doing so would result in performance degregation"
        " in distributed training).",
    )
    parser.add_argument(
        "-r",
        "--restart_pipeline",
        dest="restart_pipeline",
        action="store_true",
        required=False,
        help="for training, if checkpoint is specified, DO NOT continue the training pipeline from where"
        " training had previously ended. Instead restart the training pipeline from scratch but"
        " with the model weights from the checkpoint.",
    )
    parser.set_defaults(restart_pipeline=False)

    parser.add_argument(
        "-d",
        "--deterministic_cudnn",
        dest="deterministic_cudnn",
        action="store_true",
        required=False,
        help="sets CuDNN to deterministic mode",
    )
    parser.set_defaults(deterministic_cudnn=False)

    parser.add_argument(
        "-m",
        "--max_sampler_processes_per_worker",
        required=False,
        default=None,
        type=int,
        help="maximal number of sampler processes to spawn for each worker",
    )

    parser.add_argument(
        "-e",
        "--deterministic_agents",
        dest="deterministic_agents",
        action="store_true",
        required=False,
        help="enable deterministic agents (i.e. always taking the mode action) during validation/testing",
    )
    parser.set_defaults(deterministic_agents=False)

    parser.add_argument(
        "-l",
        "--log_level",
        default="info",
        type=str,
        required=False,
        help="sets the log_level. it must be one of {}.".format(
            ", ".join(HUMAN_LOG_LEVELS)
        ),
    )

    parser.add_argument(
        "-i",
        "--disable_tensorboard",
        dest="disable_tensorboard",
        action="store_true",
        required=False,
        help="disable tensorboard logging",
    )
    parser.set_defaults(disable_tensorboard=False)

    parser.add_argument(
        "-a",
        "--disable_config_saving",
        dest="disable_config_saving",
        action="store_true",
        required=False,
        help="disable saving the used config in the output directory",
    )
    parser.set_defaults(disable_config_saving=False)

    parser.add_argument(
        "--collect_valid_results",
        dest="collect_valid_results",
        action="store_true",
        required=False,
        help="enables returning and saving valid results during training",
    )
    parser.set_defaults(collect_valid_results=False)

    parser.add_argument(
        "--valid_on_initial_weights",
        dest="valid_on_initial_weights",
        action="store_true",
        required=False,
        help="enables running validation on the model with initial weights",
    )
    parser.set_defaults(valid_on_initial_weights=False)

    parser.add_argument(
        "--test_expert",
        dest="test_expert",
        action="store_true",
        required=False,
        help="use expert during test",
    )
    parser.set_defaults(test_expert=False)

    parser.add_argument(
        "--version", action="version", version=f"allenact {__version__}"
    )

    parser.add_argument(
        "--distributed_ip_and_port",
        dest="distributed_ip_and_port",
        required=False,
        type=str,
        default="127.0.0.1:0",
        help="IP address and port of listener for distributed process with rank 0."
        " Port number 0 lets runner choose a free port. For more details, please follow the"
        " tutorial https://allenact.org/tutorials/distributed-objectnav-tutorial/.",
    )

    parser.add_argument(
        "--machine_id",
        dest="machine_id",
        required=False,
        type=int,
        default=0,
        help="ID for machine in distributed runs. For more details, please follow the"
        " tutorial https://allenact.org/tutorials/distributed-objectnav-tutorial/",
    )

    parser.add_argument(
        "--save_ckpt_at_every_host",
        dest="save_ckpt_at_every_host",
        action="store_true",
        required=False,
        help="if you pass the `--save_ckpt_at_every_host` flag, AllenAct will save checkpoints at every host as the"
        " the training progresses in distributed training mode.",
    )
    parser.set_defaults(save_ckpt_at_every_host=False)

    parser.add_argument(
        "--callbacks",
        dest="callbacks",
        required=False,
        type=str,
        default="",
        help="Comma-separated list of files with Callback classes to use.",
    )

    parser.add_argument(
        "--enable_crash_recovery",
        dest="enable_crash_recovery",
        default=False,
        action="store_true",
        required=False,
        help="Whether or not to try recovering when a task crashes (use at your own risk).",
    )

    ### DEPRECATED FLAGS
    parser.add_argument(
        "-t",
        "--test_date",
        default=None,
        type=str,
        required=False,
        help="`--test_date` has been deprecated. Please use `--eval` instead.",
    )
    parser.add_argument(
        "--approx_ckpt_steps_count",
        required=False,
        default=None,
        type=float,
        help="`--approx_ckpt_steps_count` has been deprecated."
        " Please specify the checkpoint directly using the '--checkpoint' flag.",
    )
    parser.add_argument(
        "-k",
        "--skip_checkpoints",
        required=False,
        default=0,
        type=int,
        help="`--skip_checkpoints` has been deprecated. Please use `--approx_ckpt_steps_count` instead.",
    )
    ### END DEPRECATED FLAGS

    return parser


def get_args():
    """Creates the argument parser and parses any input arguments."""

    parser = get_argument_parser()
    args = parser.parse_args()

    # check for deprecated
    deprecated_flags = ["test_date", "skip_checkpoints", "approx_ckpt_steps_count"]
    for df in deprecated_flags:
        df_info = parser._option_string_actions[f"--{df}"]
        if getattr(args, df) is not df_info.default:
            raise RuntimeError(df_info.help)

    return args


def _config_source(config_type: Type) -> Dict[str, str]:
    if config_type is ExperimentConfig:
        return {}

    try:
        module_file_path = inspect.getfile(config_type)
        module_dot_path = config_type.__module__
        sources_dict = {module_file_path: module_dot_path}
        for super_type in config_type.__bases__:
            sources_dict.update(_config_source(super_type))

        return sources_dict
    except TypeError as _:
        return {}


def find_sub_modules(path: str, module_list: Optional[List] = None):
    if module_list is None:
        module_list = []

    path = os.path.abspath(path)
    if path[-3:] == ".py":
        module_list.append(path)
    elif os.path.isdir(path):
        contents = os.listdir(path)
        if any(key in contents for key in ["__init__.py", "setup.py"]):
            new_paths = [os.path.join(path, f) for f in os.listdir(path)]
            for new_path in new_paths:
                find_sub_modules(new_path, module_list)
    return module_list


def load_config(args) -> Tuple[ExperimentConfig, Dict[str, str]]:
    assert os.path.exists(
        args.experiment_base
    ), "The path '{}' does not seem to exist (your current working directory is '{}').".format(
        args.experiment_base, os.getcwd()
    )
    rel_base_dir = os.path.relpath(  # Normalizing string representation of path
        os.path.abspath(args.experiment_base), os.getcwd()
    )
    rel_base_dot_path = rel_base_dir.replace("/", ".")
    if rel_base_dot_path == ".":
        rel_base_dot_path = ""

    exp_dot_path = args.experiment
    if exp_dot_path[-3:] == ".py":
        exp_dot_path = exp_dot_path[:-3]
    exp_dot_path = exp_dot_path.replace("/", ".")

    module_path = (
        f"{rel_base_dot_path}.{exp_dot_path}"
        if len(rel_base_dot_path) != 0
        else exp_dot_path
    )

    try:
        importlib.invalidate_caches()
        module = importlib.import_module(module_path)
    except ModuleNotFoundError as e:
        if not any(isinstance(arg, str) and module_path in arg for arg in e.args):
            raise e
        all_sub_modules = set(find_sub_modules(os.getcwd()))
        desired_config_name = module_path.split(".")[-1]
        relevant_submodules = [
            sm for sm in all_sub_modules if desired_config_name in os.path.basename(sm)
        ]
        raise ModuleNotFoundError(
            f"Could not import experiment '{module_path}', are you sure this is the right path?"
            f" Possibly relevant files include {relevant_submodules}."
            f" Note that the experiment must be reachable along your `PYTHONPATH`, it might"
            f" be helpful for you to run `export PYTHONPATH=$PYTHONPATH:$PWD` in your"
            f" project's top level directory."
        ) from e

    experiments = [
        m[1]
        for m in inspect.getmembers(module, inspect.isclass)
        if m[1].__module__ == module.__name__ and issubclass(m[1], ExperimentConfig)
    ]
    assert (
        len(experiments) == 1
    ), "Too many or two few experiments defined in {}".format(module_path)

    config_kwargs = {}
    if args.config_kwargs is not None:
        if os.path.exists(args.config_kwargs):
            with open(args.config_kwargs, "r") as f:
                config_kwargs = json.load(f)
        else:
            try:
                config_kwargs = json.loads(args.config_kwargs)
            except json.JSONDecodeError:
                get_logger().warning(
                    f"The input for --config_kwargs ('{args.config_kwargs}')"
                    f" does not appear to be valid json. Often this is due to"
                    f" json requiring very specific syntax (e.g. double quoted strings)"
                    f" we'll try to get around this by evaluating with `ast.literal_eval`"
                    f" (a safer version of the standard `eval` function)."
                )
                config_kwargs = ast.literal_eval(args.config_kwargs)

        assert isinstance(
            config_kwargs, Dict
        ), "`--config_kwargs` must be a json string (or a path to a .json file) that evaluates to a dictionary."

    config = experiments[0](**config_kwargs)
    sources = _config_source(config_type=experiments[0])
    sources[CONFIG_KWARGS_STR] = json.dumps(config_kwargs)
    return config, sources


def main():
    args = get_args()

    init_logging(args.log_level)

    get_logger().info("Running with args {}".format(args))

    ptitle("Master: {}".format("Training" if args.eval is None else "Evaluation"))

    cfg, srcs = load_config(args)

    if not args.eval:
        OnPolicyRunner(
            config=cfg,
            output_dir=args.output_dir,
            save_dir_fmt=args.save_dir_fmt,
            loaded_config_src_files=srcs,
            seed=args.seed,
            mode="train",
            deterministic_cudnn=args.deterministic_cudnn,
            deterministic_agents=args.deterministic_agents,
            extra_tag=args.extra_tag,
            disable_tensorboard=args.disable_tensorboard,
            disable_config_saving=args.disable_config_saving,
            distributed_ip_and_port=args.distributed_ip_and_port,
            machine_id=args.machine_id,
            callbacks_paths=args.callbacks,
        ).start_train(
            checkpoint=args.checkpoint,
            restart_pipeline=args.restart_pipeline,
            max_sampler_processes_per_worker=args.max_sampler_processes_per_worker,
            collect_valid_results=args.collect_valid_results,
            valid_on_initial_weights=args.valid_on_initial_weights,
            try_restart_after_task_error=args.enable_crash_recovery,
            save_ckpt_at_every_host=save_ckpt_at_every_host,
        )
    else:
        OnPolicyRunner(
            config=cfg,
            output_dir=args.output_dir,
            save_dir_fmt=args.save_dir_fmt,
            loaded_config_src_files=srcs,
            seed=args.seed,
            mode="test",
            deterministic_cudnn=args.deterministic_cudnn,
            deterministic_agents=args.deterministic_agents,
            extra_tag=args.extra_tag,
            disable_tensorboard=args.disable_tensorboard,
            disable_config_saving=args.disable_config_saving,
            distributed_ip_and_port=args.distributed_ip_and_port,
            machine_id=args.machine_id,
            callbacks_paths=args.callbacks,
        ).start_test(
            checkpoint_path_dir_or_pattern=args.checkpoint,
            infer_output_dir=args.infer_output_dir,
            approx_ckpt_step_interval=args.approx_ckpt_step_interval,
            max_sampler_processes_per_worker=args.max_sampler_processes_per_worker,
            inference_expert=args.test_expert,
        )


if __name__ == "__main__":
    main()


================================================
FILE: allenact/setup.py
================================================
import os
from pathlib import Path

from setuptools import find_packages, setup


def parse_req_file(fname, initial=None):
    """Reads requires.txt file generated by setuptools and outputs a
    new/updated dict of extras as keys and corresponding lists of dependencies
    as values.

    The input file's contents are similar to a `ConfigParser` file, e.g.
    pkg_1
    pkg_2
    pkg_3

    [extras1]
    pkg_4
    pkg_5

    [extras2]
    pkg_6
    pkg_7
    """
    reqs = {} if initial is None else initial
    cline = None
    with open(fname, "r") as f:
        for line in f.readlines():
            line = line[:-1].strip()
            if len(line) == 0:
                continue
            if line[0] == "[":
                # Add new key for current extras (if missing in dict)
                cline = line[1:-1].strip()
                if cline not in reqs:
                    reqs[cline] = []
            else:
                # Only keep dependencies from extras
                if cline is not None:
                    reqs[cline].append(line)
    return reqs


def get_version(fname):
    """Reads PKG-INFO file generated by setuptools and extracts the Version
    number."""
    res = "UNK"
    with open(fname, "r") as f:
        for line in f.readlines():
            line = line[:-1]
            if line.startswith("Version:"):
                res = line.replace("Version:", "").strip()
                break
    if res in ["UNK", ""]:
        raise ValueError(f"Missing Version number in {fname}")
    return res


def _do_setup():
    base_dir = os.path.abspath(os.path.dirname(Path(__file__)))

    if not os.path.exists(
        os.path.join(base_dir, "allenact.egg-info/dependency_links.txt")
    ):
        # Build mode for sdist
        os.chdir(os.path.join(base_dir, ".."))

        with open(".VERSION", "r") as f:
            __version__ = f.readline().strip()

        # Extra dependencies for development (actually unnecessary)
        extras = {
            "dev": [
                l.strip()
                for l in open("dev_requirements.txt", "r").readlines()
                if l.strip() != ""
            ]
        }
    else:
        # Install mode from sdist
        __version__ = get_version(os.path.join(base_dir, "allenact.egg-info/PKG-INFO"))
        extras = parse_req_file(
            os.path.join(base_dir, "allenact.egg-info/requires.txt")
        )

    setup(
        name="allenact",
        version=__version__,
        description="AllenAct framework",
        long_description=(
            "AllenAct is a modular and flexible learning framework designed with"
            " a focus on the unique requirements of Embodied-AI research."
        ),
        classifiers=[
            "Intended Audience :: Science/Research",
            "Development Status :: 3 - Alpha",
            "License :: OSI Approved :: MIT License",
            "Topic :: Scientific/Engineering :: Artificial Intelligence",
            "Programming Language :: Python",
            "Programming Language :: Python :: 3.6",
            "Programming Language :: Python :: 3.7",
            "Programming Language :: Python :: 3.8",
            "Programming Language :: Python :: 3.9",
            "Programming Language :: Python :: 3.10",
        ],
        keywords=["reinforcement learning", "embodied-AI", "AI", "RL", "SLAM"],
        url="https://github.com/allenai/allenact",
        author="Allen Institute for Artificial Intelligence",
        author_email="lucaw@allenai.org",
        license="MIT",
        packages=find_packages(include=["allenact", "allenact.*"]),
        install_requires=[
            "gym==0.17.*",  # Newer versions of gym are now broken with updates to setuptools
            "torch>=1.6.0,!=1.8.0",
            "torchvision>=0.7.0,<=0.16.2",
            "tensorboardx>=2.1",
            "setproctitle",
            "moviepy>=1.0.3",
            "filelock",
            "numpy>=1.19.1",
            "Pillow>=8.2.0,<10.3.0",
            "matplotlib>=3.3.1",
            "networkx",
            "opencv-python",
            "wheel>=0.36.2",
            "attrs>=21.4.0",
            "scipy>=1.5.4",
        ],
        setup_requires=["pytest-runner"],
        tests_require=["pytest", "pytest-cov", "compress_pickle"],
        entry_points={"console_scripts": ["allenact=allenact.main:main"]},
        extras_require=extras,
    )


if __name__ == "__main__":
    _do_setup()


================================================
FILE: allenact/utils/__init__.py
================================================


================================================
FILE: allenact/utils/cache_utils.py
================================================
import math
from typing import Dict, Any, Union, Callable, Optional

from allenact.utils.system import get_logger


def pos_to_str_for_cache(pos: Dict[str, float]) -> str:
    return "_".join([str(pos["x"]), str(pos["y"]), str(pos["z"])])


def str_to_pos_for_cache(s: str) -> Dict[str, float]:
    split = s.split("_")
    return {"x": float(split[0]), "y": float(split[1]), "z": float(split[2])}


def get_distance(
    cache: Dict[str, Any], pos: Dict[str, float], target: Dict[str, float]
) -> float:
    pos = {
        "x": 0.25 * math.ceil(pos["x"] / 0.25),
        "y": pos["y"],
        "z": 0.25 * math.ceil(pos["z"] / 0.25),
    }
    sp = _get_shortest_path_distance_from_cache(cache, pos, target)
    if sp == -1.0:
        pos = {
            "x": 0.25 * math.floor(pos["x"] / 0.25),
            "y": pos["y"],
            "z": 0.25 * math.ceil(pos["z"] / 0.25),
        }
        sp = _get_shortest_path_distance_from_cache(cache, pos, target)
    if sp == -1.0:
        pos = {
            "x": 0.25 * math.ceil(pos["x"] / 0.25),
            "y": pos["y"],
            "z": 0.25 * math.floor(pos["z"] / 0.25),
        }
        sp = _get_shortest_path_distance_from_cache(cache, pos, target)
    if sp == -1.0:
        pos = {
            "x": 0.25 * math.floor(pos["x"] / 0.25),
            "y": pos["y"],
            "z": 0.25 * math.floor(pos["z"] / 0.25),
        }
        sp = _get_shortest_path_distance_from_cache(cache, pos, target)
    if sp == -1.0:
        pos = find_nearest_point_in_cache(cache, pos)
        sp = _get_shortest_path_distance_from_cache(cache, pos, target)
    if sp == -1.0:
        target = find_nearest_point_in_cache(cache, target)
        sp = _get_shortest_path_distance_from_cache(cache, pos, target)
    if sp == -1.0:
        print("Your cache is incomplete!")
        exit()
    return sp


def get_distance_to_object(
    cache: Dict[str, Any], pos: Dict[str, float], target_class: str
) -> float:

    dists = []
    weights = []
    for rounder_func_0 in [math.ceil, math.floor]:
        for rounder_func_1 in [math.ceil, math.floor]:
            rounded_pos = {
                "x": 0.25 * rounder_func_0(pos["x"] / 0.25),
                "y": pos["y"],
                "z": 0.25 * rounder_func_1(pos["z"] / 0.25),
            }
            dist = _get_shortest_path_distance_to_object_from_cache(
                cache, rounded_pos, target_class
            )
            if dist >= 0:
                dists.append(dist)
                weights.append(
                    1.0
                    / (
                        math.sqrt(
                            (pos["x"] - rounded_pos["x"]) ** 2
                            + (pos["z"] - rounded_pos["z"]) ** 2
                        )
                        + 1e6
                    )
                )

    if len(dists) == 0:
        raise RuntimeError("Your cache is incomplete!")

    total_weight = sum(weights)
    weights = [w / total_weight for w in weights]

    return sum(d * w for d, w in zip(dists, weights))


def _get_shortest_path_distance_from_cache(
    cache: Dict[str, Any], position: Dict[str, float], target: Dict[str, float]
) -> float:
    try:
        return cache[pos_to_str_for_cache(position)][pos_to_str_for_cache(target)][
            "distance"
        ]
    except KeyError:
        return -1.0


def _get_shortest_path_distance_to_object_from_cache(
    cache: Dict[str, Any], position: Dict[str, float], target_class: str
) -> float:
    try:
        return cache[pos_to_str_for_cache(position)][target_class]["distance"]
    except KeyError:
        return -1.0


def find_nearest_point_in_cache(
    cache: Dict[str, Any], point: Dict[str, float]
) -> Dict[str, float]:
    best_delta = float("inf")
    closest_point: Dict[str, float] = {}
    for p in cache:
        pos = str_to_pos_for_cache(p)
        delta = (
            abs(point["x"] - pos["x"])
            + abs(point["y"] - pos["y"])
            + abs(point["z"] - pos["z"])
        )
        if delta < best_delta:
            best_delta = delta
            closest_point = pos
    return closest_point


class DynamicDistanceCache(object):
    def __init__(self, rounding: Optional[int] = None):
        self.cache: Dict[str, Any] = {}
        self.rounding = rounding
        self.hits = 0
        self.misses = 0
        self.num_accesses = 0

    def find_distance(
        self,
        scene_name: str,
        position: Dict[str, Any],
        target: Union[Dict[str, Any], str],
        native_distance_function: Callable[
            [Dict[str, Any], Union[Dict[str, Any], str]], float
        ],
    ) -> float:
        # Convert the position to its rounded string representation
        position_str = scene_name + self._pos_to_str(position)
        # If the target is also a position, convert it to its rounded string representation
        if isinstance(target, str):
            target_str = target
        else:
            target_str = self._pos_to_str(target)

        if position_str not in self.cache:
            self.cache[position_str] = {}
        if target_str not in self.cache[position_str]:
            self.cache[position_str][target_str] = native_distance_function(
                position, target
            )
            self.misses += 1
        else:
            self.hits += 1
        self.num_accesses += 1
        if self.num_accesses % 1000 == 0:
            get_logger().debug("Cache Miss-Hit Ratio: %.4f" % (self.misses / self.hits))
        return self.cache[position_str][target_str]

    def invalidate(self):
        self.cache = []

    def _pos_to_str(self, pos: Dict[str, Any]) -> str:
        if self.rounding:
            pos = {k: round(v, self.rounding) for k, v in pos.items()}
        return str(pos)


================================================
FILE: allenact/utils/cacheless_frcnn.py
================================================
from typing import List, Any

import torch
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.faster_rcnn import FasterRCNN

# noinspection PyProtectedMember
from torchvision.models.detection.faster_rcnn import model_urls
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.utils import load_state_dict_from_url


class CachelessAnchorGenerator(AnchorGenerator):
    def forward(self, image_list: Any, feature_maps: Any):
        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])
        image_size = image_list.tensors.shape[-2:]
        strides = [
            [int(image_size[0] / g[0]), int(image_size[1] / g[1])] for g in grid_sizes
        ]
        dtype, device = feature_maps[0].dtype, feature_maps[0].device
        self.set_cell_anchors(dtype, device)
        anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides)
        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])  # type:ignore
        for i, (image_height, image_width) in enumerate(image_list.image_sizes):
            anchors_in_image = []
            for anchors_per_feature_map in anchors_over_all_feature_maps:
                anchors_in_image.append(anchors_per_feature_map)
            anchors.append(anchors_in_image)
        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]

        return anchors


def fasterrcnn_resnet50_fpn(
    pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs
):
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone("resnet50", pretrained_backbone)

    anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios)
    model = FasterRCNN(
        backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs
    )

    # min_size = 300
    # max_size = 400
    # anchor_sizes = ((12,), (24,), (48,), (96,), (192,))
    # aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    # rpn_anchor_generator = CachelessAnchorGenerator(
    #     anchor_sizes, aspect_ratios
    # )
    # model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs)

    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress
        )
        model.load_state_dict(state_dict)
    return model


================================================
FILE: allenact/utils/experiment_utils.py
================================================
"""Utility classes and functions for running and designing experiments."""

import abc
import collections.abc
import copy
import numbers
import random
from collections import OrderedDict, defaultdict
from typing import (
    Callable,
    NamedTuple,
    Dict,
    Any,
    Union,
    Iterator,
    Optional,
    List,
    cast,
    Sequence,
    TypeVar,
    Generic,
    Tuple,
)

import attr
import numpy as np
import torch
import torch.optim as optim
import wandb
import shutil

from allenact.algorithms.offpolicy_sync.losses.abstract_offpolicy_loss import Memory
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.storage import (
    ExperienceStorage,
    RolloutStorage,
    RolloutBlockStorage,
)
from allenact.base_abstractions.misc import Loss, GenericAbstractLoss
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact.utils.system import get_logger

try:
    # noinspection PyProtectedMember,PyUnresolvedReferences
    from torch.optim.lr_scheduler import _LRScheduler
except (ImportError, ModuleNotFoundError):
    raise ImportError("`_LRScheduler` was not found in `torch.optim.lr_scheduler`")

_DEFAULT_ONPOLICY_UUID = "onpolicy"


def evenly_distribute_count_into_bins(count: int, nbins: int) -> List[int]:
    """Distribute a count into a number of bins.

    # Parameters
    count: A positive integer to be distributed, should be `>= nbins`.
    nbins: The number of bins.

    # Returns
    A list of positive integers which sum to `count`. These values will be
    as close to equal as possible (may differ by at most 1).
    """
    assert count >= nbins, f"count ({count}) < nbins ({nbins})"
    res = [0] * nbins
    for it in range(count):
        res[it % nbins] += 1
    return res


def recursive_update(
    original: Union[Dict, collections.abc.MutableMapping],
    update: Union[Dict, collections.abc.MutableMapping],
):
    """Recursively updates original dictionary with entries form update dict.

    # Parameters

    original : Original dictionary to be updated.
    update : Dictionary with additional or replacement entries.

    # Returns

    Updated original dictionary.
    """
    for k, v in update.items():
        if isinstance(v, collections.abc.MutableMapping):
            original[k] = recursive_update(original.get(k, {}), v)
        else:
            original[k] = v
    return original


ToBuildType = TypeVar("ToBuildType")


class Builder(tuple, Generic[ToBuildType]):
    """Used to instantiate a given class with (default) parameters.

    Helper class that stores a class, default parameters for that
    class, and key word arguments that (possibly) overwrite the defaults.
    When calling this an object of the Builder class it generates
    a class of type `class_type` with parameters specified by
    the attributes `default` and `kwargs` (and possibly additional, overwriting,
    keyword arguments).

    # Attributes

    class_type : The class to be instantiated when calling the object.
    kwargs : Keyword arguments used to instantiate an object of type `class_type`.
    default : Default parameters used when instantiating the class.
    """

    class_type: ToBuildType
    kwargs: Dict[str, Any]
    default: Dict[str, Any]

    # noinspection PyTypeChecker
    def __new__(
        cls,
        class_type: ToBuildType,
        kwargs: Optional[Dict[str, Any]] = None,
        default: Optional[Dict[str, Any]] = None,
    ):
        """Create a new Builder.

        For parameter descriptions see the class documentation. Note
        that `kwargs` and `default` can be None in which case they are
        set to be empty dictionaries.
        """
        self = tuple.__new__(
            cls,
            (
                class_type,
                kwargs if kwargs is not None else {},
                default if default is not None else {},
            ),
        )
        self.class_type = class_type
        self.kwargs = self[1]
        self.default = self[2]
        return self

    def __repr__(self) -> str:
        return (
            f"Group(class_type={self.class_type},"
            f" kwargs={self.kwargs},"
            f" default={self.default})"
        )

    def __call__(self, **kwargs) -> ToBuildType:
        """Build and return a new class.

        # Parameters
        kwargs : additional keyword arguments to use when instantiating
            the object. These overwrite all arguments already in the `self.kwargs`
            and `self.default` attributes.

        # Returns

        Class of type `self.class_type` with parameters
        taken from `self.default`, `self.kwargs`, and
        any keyword arguments additionally passed to `__call__`.
        """
        allkwargs = copy.deepcopy(self.default)
        recursive_update(allkwargs, self.kwargs)
        recursive_update(allkwargs, kwargs)
        return cast(Callable, self.class_type)(**allkwargs)


class ScalarMeanTracker(object):
    """Track a collection `scalar key -> mean` pairs."""

    def __init__(self) -> None:
        self._sums: Dict[str, float] = OrderedDict()
        self._counts: Dict[str, int] = OrderedDict()

    def add_scalars(
        self, scalars: Dict[str, Union[float, int]], n: Union[int, Dict[str, int]] = 1
    ) -> None:
        """Add additional scalars to track.

        # Parameters

        scalars : A dictionary of `scalar key -> value` pairs.
        """
        ndict = cast(
            Dict[str, int], (n if isinstance(n, Dict) else defaultdict(lambda: n))  # type: ignore
        )

        for k in scalars:
            if k not in self._sums:
                self._sums[k] = ndict[k] * scalars[k]
                self._counts[k] = ndict[k]
            else:
                self._sums[k] += ndict[k] * scalars[k]
                self._counts[k] += ndict[k]

    def pop_and_reset(self) -> Dict[str, float]:
        """Return tracked means and reset.

        On resetting all previously tracked values are discarded.

        # Returns

        A dictionary of `scalar key -> current mean` pairs corresponding to those
        values added with `add_scalars`.
        """
        means = OrderedDict(
            [(k, float(self._sums[k] / self._counts[k])) for k in self._sums]
        )
        self.reset()
        return means

    def reset(self):
        self._sums = OrderedDict()
        self._counts = OrderedDict()

    def sums(self):
        return copy.copy(self._sums)

    def counts(self) -> Dict[str, int]:
        return copy.copy(self._counts)

    def means(self) -> Dict[str, float]:
        return OrderedDict(
            [(k, float(self._sums[k] / self._counts[k])) for k in self._sums]
        )

    @property
    def empty(self):
        assert len(self._sums) == len(
            self._counts
        ), "Mismatched length of _sums {} and _counts {}".format(
            len(self._sums), len(self._counts)
        )
        return len(self._sums) == 0


class LoggingPackage:
    """Data package used for logging."""

    def __init__(
        self,
        mode: str,
        training_steps: Optional[int],
        storage_uuid_to_total_experiences: Dict[str, int],
        pipeline_stage: Optional[int] = None,
        checkpoint_file_name: Optional[str] = None,
    ) -> None:
        self.mode = mode

        self.training_steps: int = training_steps
        self.storage_uuid_to_total_experiences: Dict[str, int] = (
            storage_uuid_to_total_experiences
        )
        self.pipeline_stage = pipeline_stage

        self.metrics_tracker = ScalarMeanTracker()
        self.info_trackers: Dict[Tuple[str, str], ScalarMeanTracker] = {}

        self.metric_dicts: List[Any] = []
        self.viz_data: Optional[Dict[str, List[Dict[str, Any]]]] = None
        self.checkpoint_file_name: Optional[str] = checkpoint_file_name
        self.task_callback_data: List[Any] = []

        self.num_empty_metrics_dicts_added: int = 0

    @property
    def num_non_empty_metrics_dicts_added(self) -> int:
        return len(self.metric_dicts)

    @staticmethod
    def _metrics_dict_is_empty(
        single_task_metrics_dict: Dict[str, Union[float, int]]
    ) -> bool:
        return (
            len(single_task_metrics_dict) == 0
            or (
                len(single_task_metrics_dict) == 1
                and "task_info" in single_task_metrics_dict
            )
            or (
                "success" in single_task_metrics_dict
                and single_task_metrics_dict["success"] is None
            )
        )

    def add_metrics_dict(
        self, single_task_metrics_dict: Dict[str, Union[float, int]]
    ) -> bool:
        if self._metrics_dict_is_empty(single_task_metrics_dict):
            self.num_empty_metrics_dicts_added += 1
            return False

        self.metric_dicts.append(single_task_metrics_dict)
        self.metrics_tracker.add_scalars(
            {k: v for k, v in single_task_metrics_dict.items() if k != "task_info"}
        )
        return True

    def add_info_dict(
        self,
        info_dict: Dict[str, Union[int, float]],
        n: int,
        stage_component_uuid: str,
        storage_uuid: str,
    ):
        key = (stage_component_uuid, storage_uuid)
        if key not in self.info_trackers:
            self.info_trackers[key] = ScalarMeanTracker()

        assert n >= 0
        self.info_trackers[key].add_scalars(scalars=info_dict, n=n)


class LinearDecay(object):
    """Linearly decay between two values over some number of steps.

    Obtain the value corresponding to the `i`-th step by calling
    an instance of this class with the value `i`.

    # Parameters

    steps : The number of steps over which to decay.
    startp : The starting value.
    endp : The ending value.
    """

    def __init__(self, steps: int, startp: float = 1.0, endp: float = 0.0) -> None:
        """Initializer.

        See class documentation for parameter definitions.
        """
        self.steps = steps
        self.startp = startp
        self.endp = endp

    def __call__(self, epoch: int) -> float:
        """Get the decayed value for `epoch` number of steps.

        # Parameters

        epoch : The number of steps.

        # Returns

        Decayed value for `epoch` number of steps.
        """
        epoch = max(min(epoch, self.steps), 0)
        return self.startp + (self.endp - self.startp) * (epoch / float(self.steps))


class MultiLinearDecay(object):
    """Container for multiple stages of LinearDecay.

    Obtain the value corresponding to the `i`-th step by calling
    an instance of this class with the value `i`.

    # Parameters

    stages: List of `LinearDecay` objects to be sequentially applied
        for the number of steps in each stage.
    """

    def __init__(self, stages: Sequence[LinearDecay]) -> None:
        """Initializer.

        See class documentation for parameter definitions.
        """
        self.stages = stages
        self.steps = np.cumsum([stage.steps for stage in self.stages])
        self.total_steps = self.steps[-1]
        self.stage_idx = -1
        self.min_steps = 0
        self.max_steps = 0
        self.stage = None

    def __call__(self, epoch: int) -> float:
        """Get the decayed value factor for `epoch` number of steps.

        # Parameters

        epoch : The number of steps.

        # Returns

        Decayed value for `epoch` number of steps.
        """
        epoch = max(min(epoch, self.total_steps), 0)

        while epoch >= self.max_steps and self.max_steps < self.total_steps:
            self.stage_idx += 1
            assert self.stage_idx < len(self.stages)

            self.min_steps = self.max_steps
            self.max_steps = self.steps[self.stage_idx]
            self.stage = self.stages[self.stage_idx]

        return self.stage(epoch - self.min_steps)


# noinspection PyTypeHints,PyUnresolvedReferences
def set_deterministic_cudnn() -> None:
    """Makes cudnn deterministic.

    This may slow down computations.
    """
    if torch.cuda.is_available():
        torch.backends.cudnn.deterministic = True  # type: ignore
        torch.backends.cudnn.benchmark = False  # type: ignore


def set_seed(seed: Optional[int] = None) -> None:
    """Set seeds for multiple (cpu) sources of randomness.

    Sets seeds for (cpu) `pytorch`, base `random`, and `numpy`.

    # Parameters

    seed : The seed to set. If set to None, keep using the current seed.
    """
    if seed is None:
        return

    torch.manual_seed(seed)  # seeds the RNG for all devices (CPU and GPUs)
    random.seed(seed)
    np.random.seed(seed)


class EarlyStoppingCriterion(abc.ABC):
    """Abstract class for class who determines if training should stop early in
    a particular pipeline stage."""

    @abc.abstractmethod
    def __call__(
        self,
        stage_steps: int,
        total_steps: int,
        training_metrics: ScalarMeanTracker,
    ) -> bool:
        """Returns `True` if training should be stopped early.

        # Parameters

        stage_steps: Total number of steps taken in the current pipeline stage.
        total_steps: Total number of steps taken during training so far (includes steps
            taken in prior pipeline stages).
        training_metrics: Metrics recovered over some fixed number of steps
            (see the `metric_accumulate_interval` attribute in the `TrainingPipeline` class)
            training.
        """
        raise NotImplementedError


class NeverEarlyStoppingCriterion(EarlyStoppingCriterion):
    """Implementation of `EarlyStoppingCriterion` which never stops early."""

    def __call__(
        self,
        stage_steps: int,
        total_steps: int,
        training_metrics: ScalarMeanTracker,
    ) -> bool:
        return False


class OffPolicyPipelineComponent(NamedTuple):
    """An off-policy component for a PipeLineStage.

    # Attributes

    data_iterator_builder: A function to instantiate a Data Iterator (with a __next__(self) method)
    loss_names: list of unique names assigned to off-policy losses
    updates: number of off-policy updates between on-policy rollout collections
    loss_weights : A list of floating point numbers describing the relative weights
        applied to the losses referenced by `loss_names`. Should be the same length
        as `loss_names`. If this is `None`, all weights will be assumed to be one.
    data_iterator_kwargs_generator: Optional generator of keyword arguments for data_iterator_builder (useful for
        distributed training. It takes
        a `cur_worker` int value,
        a `rollouts_per_worker` list of number of samplers per training worker,
        and an optional random `seed` shared by all workers, which can be None.
    """

    data_iterator_builder: Callable[..., Iterator]
    loss_names: List[str]
    updates: int
    loss_weights: Optional[Sequence[float]] = None
    data_iterator_kwargs_generator: Callable[
        [int, Sequence[int], Optional[int]], Dict
    ] = lambda cur_worker, rollouts_per_worker, seed: {}


class TrainingSettings:
    """Class defining parameters used for training (within a stage or the
    entire pipeline).

    # Attributes

    num_mini_batch : The number of mini-batches to break a rollout into.
    update_repeats : The number of times we will cycle through the mini-batches corresponding
        to a single rollout doing gradient updates.
    max_grad_norm : The maximum "inf" norm of any gradient step (gradients are clipped to not exceed this).
    num_steps : Total number of steps a single agent takes in a rollout.
    gamma : Discount factor applied to rewards (should be in [0, 1]).
    use_gae : Whether or not to use generalized advantage estimation (GAE).
    gae_lambda : The additional parameter used in GAE.
    advance_scene_rollout_period: Optional number of rollouts before enforcing an advance scene in all samplers.
    save_interval : The frequency with which to save (in total agent steps taken). If `None` then *no*
        checkpoints will be saved. Otherwise, in addition to the checkpoints being saved every
        `save_interval` steps, a checkpoint will *always* be saved at the end of each pipeline stage.
        If `save_interval <= 0` then checkpoints will only be saved at the end of each pipeline stage.
    metric_accumulate_interval : The frequency with which training/validation metrics are accumulated
        (in total agent steps). Metrics accumulated in an interval are logged (if `should_log` is `True`)
        and used by the stage's early stopping criterion (if any).
    """

    num_mini_batch: Optional[int]
    update_repeats: Optional[Union[int, Sequence[int]]]
    max_grad_norm: Optional[float]
    num_steps: Optional[int]
    gamma: Optional[float]
    use_gae: Optional[bool]
    gae_lambda: Optional[float]
    advance_scene_rollout_period: Optional[int]
    save_interval: Optional[int]
    metric_accumulate_interval: Optional[int]

    # noinspection PyUnresolvedReferences
    def __init__(
        self,
        num_mini_batch: Optional[int] = None,
        update_repeats: Optional[int] = None,
        max_grad_norm: Optional[float] = None,
        num_steps: Optional[int] = None,
        gamma: Optional[float] = None,
        use_gae: Optional[bool] = None,
        gae_lambda: Optional[float] = None,
        advance_scene_rollout_period: Optional[int] = None,
        save_interval: Optional[int] = None,
        metric_accumulate_interval: Optional[int] = None,
    ):
        self._key_to_setting = prepare_locals_for_super(locals(), ignore_kwargs=True)
        self._training_setting_keys = tuple(sorted(self._key_to_setting.keys()))

        self._defaults: Optional["TrainingSettings"] = None

    def keys(self) -> Tuple[str, ...]:
        return self._training_setting_keys

    def has_key(self, key: str) -> bool:
        return key in self._key_to_setting

    def set_defaults(self, defaults: "TrainingSettings"):
        assert self._defaults is None, "Defaults can only be set once."
        self._defaults = defaults

    def __getattr__(self, item: str):
        if item in self._key_to_setting:
            val = self._key_to_setting[item]
            if val is None and self._defaults is not None:
                val = getattr(self._defaults, item)
            return val
        else:
            super(TrainingSettings, self).__getattribute__(item)


@attr.s(kw_only=True)
class StageComponent:
    """A custom component for a PipelineStage, possibly including overrides to
    the `TrainingSettings` from the `TrainingPipeline` and `PipelineStage`.

    # Attributes

    uuid: the name of this component
    storage_uuid: the name of the `ExperienceStorage` that will be used with this component.
    loss_names: list of unique names assigned to off-policy losses
    training_settings: Instance of `TrainingSettings`
    loss_weights : A list of floating point numbers describing the relative weights
        applied to the losses referenced by `loss_names`. Should be the same length
        as `loss_names`. If this is `None`, all weights will be assumed to be one.
    """

    uuid: str = attr.ib()
    storage_uuid: str = attr.ib()
    loss_names: Sequence[str] = attr.ib()
    training_settings: TrainingSettings = attr.ib(
        default=attr.Factory(TrainingSettings)
    )

    @training_settings.validator
    def _validate_training_settings(self, attribute, value: TrainingSettings):
        must_be_none = [
            "num_steps",
            "gamma",
            "use_gae",
            "gae_lambda",
            "advance_scene_rollout_period",
            "save_interval",
            "metric_accumulate_interval",
        ]
        for key in must_be_none:
            assert getattr(value, key) is None, (
                f"`{key}` must be `None` in `TrainingSettings` passed to"
                f" `StageComponent` (as such values will be ignored). Pass such"
                f" settings to the `PipelineStage` or `TrainingPipeline` objects instead.",
            )


class PipelineStage:
    """A single stage in a training pipeline, possibly including overrides to
    the global `TrainingSettings` in `TrainingPipeline`.

    # Attributes

    loss_name : A collection of unique names assigned to losses. These will
        reference the `Loss` objects in a `TrainingPipeline` instance.
    max_stage_steps : Either the total number of steps agents should take in this stage or
        a Callable object (e.g. a function)
    loss_weights : A list of floating point numbers describing the relative weights
        applied to the losses referenced by `loss_name`. Should be the same length
        as `loss_name`. If this is `None`, all weights will be assumed to be one.
    teacher_forcing : If applicable, defines the probability an agent will take the
        expert action (as opposed to its own sampled action) at a given time point.
    early_stopping_criterion: An `EarlyStoppingCriterion` object which determines if
        training in this stage should be stopped early. If `None` then no early stopping
        occurs. If `early_stopping_criterion` is not `None` then we do not guarantee
        reproducibility when restarting a model from a checkpoint (as the
        `EarlyStoppingCriterion` object may store internal state which is not
        saved in the checkpoint). Currently, AllenAct only supports using early stopping
        criterion when **not** using distributed training.
    training_settings: Instance of `TrainingSettings`.
    training_settings_kwargs: For backwards compatability: arguments to instantiate TrainingSettings when
     `training_settings` is `None`.
    """

    def __init__(
        self,
        *,  # Disables positional arguments. Please provide arguments as keyword arguments.
        max_stage_steps: Union[int, Callable],
        loss_names: List[str],
        loss_weights: Optional[Sequence[float]] = None,
        teacher_forcing: Optional[Callable[[int], float]] = None,
        stage_components: Optional[Sequence[StageComponent]] = None,
        early_stopping_criterion: Optional[EarlyStoppingCriterion] = None,
        training_settings: Optional[TrainingSettings] = None,
        callback_to_change_engine_attributes: Optional[Dict[str, Any]] = None,
        **training_settings_kwargs,
    ):
        self.callback_to_change_engine_attributes = callback_to_change_engine_attributes

        # Populate TrainingSettings members
        # THIS MUST COME FIRST IN `__init__` as otherwise `__getattr__` will loop infinitely.
        assert training_settings is None or len(training_settings_kwargs) == 0
        if training_settings is None:
            training_settings = TrainingSettings(**training_settings_kwargs)
        self.training_settings = training_settings
        assert self.training_settings.update_repeats is None or isinstance(
            self.training_settings.update_repeats, numbers.Integral
        ), (
            "`training_settings` passed to `PipelineStage` must have `training_settings.update_repeats`"
            " equal to `None` or an integer. If you'd like to specify per-loss `update_repeats` then please"
            " do so in the training settings of a `StageComponent`."
        )

        self.loss_names = loss_names
        self.max_stage_steps = max_stage_steps

        self.loss_weights = (
            [1.0] * len(loss_names) if loss_weights is None else loss_weights
        )
        assert len(self.loss_weights) == len(self.loss_names)

        self.teacher_forcing = teacher_forcing

        self.early_stopping_criterion = early_stopping_criterion

        self.steps_taken_in_stage: int = 0
        self.rollout_count = 0
        self.early_stopping_criterion_met = False

        self.uuid_to_loss_weight: Dict[str, float] = {
            loss_uuid: loss_weight
            for loss_uuid, loss_weight in zip(loss_names, self.loss_weights)
        }

        self._stage_components: List[StageComponent] = []
        self.uuid_to_stage_component: Dict[str, StageComponent] = {}
        self.storage_uuid_to_steps_taken_in_stage: Dict[str, int] = {}
        self.stage_component_uuid_to_stream_memory: Dict[str, Memory] = {}

        if stage_components is not None:
            for stage_component in stage_components:
                self.add_stage_component(stage_component)

        # Sanity check
        for key in training_settings.keys():
            assert not hasattr(
                self, key
            ), f"`{key}` should be defined in `TrainingSettings`, not in `PipelineStage`."

    def reset(self):
        self.steps_taken_in_stage: int = 0
        self.rollout_count = 0
        self.early_stopping_criterion_met = False

        for k in self.storage_uuid_to_steps_taken_in_stage:
            self.storage_uuid_to_steps_taken_in_stage[k] = 0

        for memory in self.stage_component_uuid_to_stream_memory.values():
            memory.clear()

    # TODO: Replace Any with the correct type
    def change_engine_attributes(self, engine: Any):
        if self.callback_to_change_engine_attributes is not None:
            for key, value in self.callback_to_change_engine_attributes.items():
                # check if the engine has the attribute
                assert hasattr(engine, key)

                func = value["func"]
                args = value["args"]
                setattr(engine, key, func(engine, **args))

    @property
    def stage_components(self) -> Tuple[StageComponent]:
        return tuple(self._stage_components)

    def add_stage_component(self, stage_component: StageComponent):
        assert stage_component.uuid not in self.uuid_to_stage_component

        # Setting default training settings for the `stage_component`
        sc_ts = stage_component.training_settings
        sc_ts.set_defaults(self.training_settings)

        # Handling the case where different losses should be updated different
        # numbers of times
        stage_update_repeats = self.training_settings.update_repeats
        if stage_update_repeats is not None and sc_ts.update_repeats is None:
            loss_to_update_repeats = dict(zip(self.loss_names, stage_update_repeats))
            if isinstance(stage_update_repeats, Sequence):
                sc_ts.update_repeats = [
                    loss_to_update_repeats[uuid] for uuid in stage_component.loss_names
                ]
            else:
                sc_ts.update_repeats = stage_update_repeats

        self._stage_components.append(stage_component)
        self.uuid_to_stage_component[stage_component.uuid] = stage_component

        if (
            stage_component.storage_uuid
            not in self.storage_uuid_to_steps_taken_in_stage
        ):
            self.storage_uuid_to_steps_taken_in_stage[stage_component.storage_uuid] = 0
        else:
            raise NotImplementedError(
                "Cannot have multiple stage components which"
                f" use the same storage (reused storage uuid: '{stage_component.storage_uuid}'."
            )

        self.stage_component_uuid_to_stream_memory[stage_component.uuid] = Memory()

    def __setattr__(self, key: str, value: Any):
        if key not in [
            "training_settings",
            "callback_to_change_engine_attributes",
        ] and self.training_settings.has_key(key):
            raise NotImplementedError(
                f"Cannot set {key} in {self.__name__}, update the"
                f" `training_settings` attribute of {self.__name__} instead."
            )
        else:
            return super(PipelineStage, self).__setattr__(key, value)

    @property
    def is_complete(self):
        return (
            self.early_stopping_criterion_met
            or self.steps_taken_in_stage >= self.max_stage_steps
        )


class TrainingPipeline:
    """Class defining the stages (and global training settings) in a training
    pipeline.

    The training pipeline can be used as an iterator to go through the pipeline
    stages in, for instance, a loop.

    # Parameters

    named_losses : Dictionary mapping a the name of a loss to either an instantiation
        of that loss or a `Builder` that, when called, will return that loss.
    pipeline_stages : A list of PipelineStages. Each of these define how the agent
        will be trained and are executed sequentially.
    optimizer_builder : Builder object to instantiate the optimizer to use during training.
    named_storages: Map of storage names to corresponding `ExperienceStorage` instances or `Builder` objects.
        If this is `None` (or does not contain a value of (sub)type `RolloutStorage`) then a new
        `Builder[RolloutBlockStorage]` will be created and added by default.
    rollout_storage_uuid: Optional name of `RolloutStorage`, if `None` given, it will be assigned to the
    `ExperienceStorage`  of subclass `RolloutStorage` in `named_storages`. Note that this assumes that there
    is only a single `RolloutStorage` object in the values of `named_storages`.
    should_log: `True` if metrics accumulated during training should be logged to the console as well
        as to a tensorboard file.
    lr_scheduler_builder : Optional builder object to instantiate the learning rate scheduler used
        through the pipeline.
    training_settings: Instance of `TrainingSettings`
    training_settings_kwargs: For backwards compatability: arguments to instantiate TrainingSettings when
        `training_settings` is `None`.
    """

    # noinspection PyUnresolvedReferences
    def __init__(
        self,
        *,
        named_losses: Dict[str, Union[Loss, Builder[Loss]]],
        pipeline_stages: List[PipelineStage],
        optimizer_builder: Builder[optim.Optimizer],  # type: ignore
        named_storages: Optional[
            Dict[str, Union[ExperienceStorage, Builder[ExperienceStorage]]]
        ] = None,
        rollout_storage_uuid: Optional[str] = None,
        should_log: bool = True,
        lr_scheduler_builder: Optional[Builder[_LRScheduler]] = None,  # type: ignore
        training_settings: Optional[TrainingSettings] = None,
        valid_pipeline_stage: Optional[PipelineStage] = None,
        test_pipeline_stage: Optional[PipelineStage] = None,
        **training_settings_kwargs,
    ):
        """Initializer.

        See class docstring for parameter definitions.
        """

        # Populate TrainingSettings members
        assert training_settings is None or len(training_settings_kwargs) == 0
        if training_settings is None:
            training_settings = TrainingSettings(**training_settings_kwargs)
        self.training_settings = training_settings

        assert self.training_settings.update_repeats is None or isinstance(
            self.training_settings.update_repeats, numbers.Integral
        ), (
            "`training_settings` passed to `TrainingPipeline` must have `training_settings.update_repeats`"
            " equal to `None` or an integer. If you'd like to specify per-loss `update_repeats` then please"
            " do so in the training settings of a `StageComponent`."
        )
        self.training_settings = training_settings

        self.optimizer_builder = optimizer_builder
        self.lr_scheduler_builder = lr_scheduler_builder

        self._named_losses = named_losses
        self._named_storages = self._initialize_named_storages(
            named_storages=named_storages
        )
        self.rollout_storage_uuid = self._initialize_rollout_storage_uuid(
            rollout_storage_uuid
        )

        if self.rollout_storage_uuid is None:
            get_logger().warning(
                f"No rollout storage was specified in the TrainingPipeline. This need not be an issue"
                f" if you are performing off-policy training but, otherwise, please ensure you have"
                f" defined a rollout storage in the `named_storages` argument of the TrainingPipeline."
            )

        self.should_log = should_log

        self.pipeline_stages = pipeline_stages

        def if_none_then_empty_stage(stage: Optional[PipelineStage]) -> PipelineStage:
            return (
                stage
                if stage is not None
                else PipelineStage(max_stage_steps=-1, loss_names=[])
            )

        self.valid_pipeline_stage = if_none_then_empty_stage(valid_pipeline_stage)
        self.test_pipeline_stage = if_none_then_empty_stage(test_pipeline_stage)

        assert (
            len(self.pipeline_stages) == len(set(id(ps) for ps in pipeline_stages))
            and self.valid_pipeline_stage not in self.pipeline_stages
            and self.test_pipeline_stage not in self.pipeline_stages
        ), (
            "Duplicate `PipelineStage` object instances found in the pipeline stages input"
            " to `TrainingPipeline`. `PipelineStage` objects are not immutable, if you'd"
            " like to have multiple pipeline stages of the same type, please instantiate"
            " multiple separate instances."
        )

        self._ensure_pipeline_stages_all_have_at_least_one_stage_component()

        self._current_stage: Optional[PipelineStage] = None
        self.rollout_count = 0
        self._refresh_current_stage(force_stage_search_from_start=True)

    def _initialize_rollout_storage_uuid(
        self, rollout_storage_uuid: Optional[str]
    ) -> str:
        if rollout_storage_uuid is None:
            rollout_storage_uuids = self._get_uuids_of_rollout_storages(
                self._named_storages
            )
            assert len(rollout_storage_uuids) <= 1, (
                f"`rollout_storage_uuid` cannot be automatically inferred as there are multiple storages defined"
                f" (ids: {rollout_storage_uuids}) of type `RolloutStorage`."
            )
            rollout_storage_uuid = next(iter(rollout_storage_uuids), None)
        assert (
            rollout_storage_uuid is None or rollout_storage_uuid in self._named_storages
        )
        return rollout_storage_uuid

    def _ensure_pipeline_stages_all_have_at_least_one_stage_component(self):
        rollout_storages_uuids = self._get_uuids_of_rollout_storages(
            self._named_storages
        )

        named_pipeline_stages = {
            f"{i}th": ps for i, ps in enumerate(self.pipeline_stages)
        }

        named_pipeline_stages["valid"] = self.valid_pipeline_stage
        named_pipeline_stages["test"] = self.test_pipeline_stage

        for stage_name, stage in named_pipeline_stages.items():
            # Forward default `TrainingSettings` to all `PipelineStage`s settings:
            stage.training_settings.set_defaults(defaults=self.training_settings)

            if len(stage.stage_components) == 0:
                assert len(rollout_storages_uuids) <= 1, (
                    f"In {stage_name} pipeline stage: you have several storages specified ({rollout_storages_uuids}) which"
                    f" are subclasses of `RolloutStorage`. This is only allowed when stage components are explicitly"
                    f" defined in every `PipelineStage` instance. You have `PipelineStage`s for which stage components"
                    f" are not specified."
                )
                if len(rollout_storages_uuids) > 0:
                    stage.add_stage_component(
                        StageComponent(
                            uuid=rollout_storages_uuids[0],
                            storage_uuid=rollout_storages_uuids[0],
                            loss_names=stage.loss_names,
                            training_settings=TrainingSettings(),
                        )
                    )

            for sc in stage.stage_components:
                assert sc.storage_uuid in self._named_storages, (
                    f"In {stage_name} pipeline stage: storage with name '{sc.storage_uuid}' not found in collection of"
                    f" defined storages names: {list(self._named_storages.keys())}"
                )

            if (
                self.rollout_storage_uuid is not None
                and self.rollout_storage_uuid
                not in stage.storage_uuid_to_steps_taken_in_stage
            ):
                stage.storage_uuid_to_steps_taken_in_stage[
                    self.rollout_storage_uuid
                ] = 0

    @classmethod
    def _get_uuids_of_rollout_storages(
        cls,
        named_storages: Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]],
    ) -> List[str]:
        return [
            uuid
            for uuid, storage in named_storages.items()
            if isinstance(storage, RolloutStorage)
            or (
                isinstance(storage, Builder)
                and issubclass(storage.class_type, RolloutStorage)
            )
        ]

    @classmethod
    def _initialize_named_storages(
        cls,
        named_storages: Optional[
            Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]]
        ],
    ) -> Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]]:
        named_storages = {} if named_storages is None else {**named_storages}

        rollout_storages_uuids = cls._get_uuids_of_rollout_storages(named_storages)
        if len(named_storages) == 0:
            assert (
                _DEFAULT_ONPOLICY_UUID not in named_storages
            ), f"Storage uuid '{_DEFAULT_ONPOLICY_UUID}' is reserved, please pick a different uuid."
            named_storages[_DEFAULT_ONPOLICY_UUID] = Builder(RolloutBlockStorage)
            rollout_storages_uuids.append(_DEFAULT_ONPOLICY_UUID)
        return named_storages

    def _refresh_current_stage(
        self, force_stage_search_from_start: bool = False
    ) -> Optional[PipelineStage]:
        if force_stage_search_from_start:
            self._current_stage = None

        if self._current_stage is None or self._current_stage.is_complete:
            if self._current_stage is None:
                start_index = 0
            else:
                start_index = self.pipeline_stages.index(self._current_stage) + 1

            self._current_stage = None
            for ps in self.pipeline_stages[start_index:]:
                if not ps.is_complete:
                    self._current_stage = ps
                    break
        return self._current_stage

    @property
    def total_steps(self) -> int:
        return sum(ps.steps_taken_in_stage for ps in self.pipeline_stages)

    @property
    def storage_uuid_to_total_experiences(self) -> Dict[str, int]:
        totals = {k: 0 for k in self._named_storages}
        for ps in self.pipeline_stages:
            for k in ps.storage_uuid_to_steps_taken_in_stage:
                totals[k] += ps.storage_uuid_to_steps_taken_in_stage[k]

        for k in totals:
            split = k.split("__")
            if len(split) == 2 and split[1] in ["valid", "test"]:
                assert totals[k] == 0, (
                    "Total experiences should be 0 for validation/test storages, i.e."
                    " storages who have `__valid` or `__test` as their suffix. These storages"
                    " will copy their `total_experiences` from the corresponding training"
                    " storage i.e.:\n"
                    " 1. the storage without the above suffix if it exists, else\n"
                    " 2. the total number of steps."
                )
                totals[k] = totals.get(split[0], self.total_steps)

        return totals

    @property
    def current_stage(self) -> Optional[PipelineStage]:
        return self._current_stage

    @property
    def current_stage_index(self) -> Optional[int]:
        if self.current_stage is None:
            return None
        return self.pipeline_stages.index(self.current_stage)

    def before_rollout(self, train_metrics: Optional[ScalarMeanTracker] = None) -> bool:
        if (
            train_metrics is not None
            and self.current_stage.early_stopping_criterion is not None
        ):
            self.current_stage.early_stopping_criterion_met = (
                self.current_stage.early_stopping_criterion(
                    stage_steps=self.current_stage.steps_taken_in_stage,
                    total_steps=self.total_steps,
                    training_metrics=train_metrics,
                )
            )
        if self.current_stage.early_stopping_criterion_met:
            get_logger().debug(
                f"Early stopping criterion met after {self.total_steps} total steps "
                f"({self.current_stage.steps_taken_in_stage} in current stage, stage index {self.current_stage_index})."
            )
        return self.current_stage is not self._refresh_current_stage(
            force_stage_search_from_start=False
        )

    def restart_pipeline(self):
        for ps in self.pipeline_stages:
            ps.reset()

        if self.valid_pipeline_stage:
            self.valid_pipeline_stage.reset()

        if self.test_pipeline_stage:
            self.test_pipeline_stage.reset()

        self._current_stage = None
        self._refresh_current_stage(force_stage_search_from_start=True)

    def state_dict(self):
        return dict(
            stage_info_list=[
                {
                    "early_stopping_criterion_met": ps.early_stopping_criterion_met,
                    "steps_taken_in_stage": ps.steps_taken_in_stage,
                    "storage_uuid_to_steps_taken_in_stage": ps.storage_uuid_to_steps_taken_in_stage,
                    "rollout_count": ps.rollout_count,
                }
                for ps in self.pipeline_stages
            ],
            rollout_count=self.rollout_count,
        )

    def load_state_dict(self, state_dict: Dict[str, Any]):
        if "off_policy_epochs" in state_dict:
            get_logger().warning(
                "Loaded state dict was saved using an older version of AllenAct."
                " If you are attempting to restart training for a model that had an off-policy component, be aware"
                " that logging for the off-policy component will not behave as it previously did."
                " Additionally, while the total step count will remain accurate, step counts"
                " associated with losses will be reset to step 0."
            )

        for ps, stage_info in zip(self.pipeline_stages, state_dict["stage_info_list"]):
            ps.early_stopping_criterion_met = stage_info["early_stopping_criterion_met"]
            ps.steps_taken_in_stage = stage_info["steps_taken_in_stage"]

            if "storage_uuid_to_steps_taken_in_stage" in stage_info:
                ps.storage_uuid_to_steps_taken_in_stage = stage_info[
                    "storage_uuid_to_steps_taken_in_stage"
                ]
                ps.rollout_count = stage_info["rollout_count"]

        self.rollout_count = state_dict["rollout_count"]

        self._refresh_current_stage(force_stage_search_from_start=True)

    @property
    def rollout_storage(self) -> Optional[RolloutStorage]:
        if self.rollout_storage_uuid is None:
            return None

        rs = self._named_storages[self.rollout_storage_uuid]
        if isinstance(rs, Builder):
            rs = rs()
            self._named_storages[self.rollout_storage_uuid] = rs

        return cast(RolloutStorage, rs)

    def get_stage_storage(
        self, stage: PipelineStage
    ) -> "OrderedDict[str, ExperienceStorage]":
        storage_uuids_for_current_stage_set = set(
            sc.storage_uuid for sc in stage.stage_components
        )

        # Always include self.rollout_storage_uuid in the current stage storage (when the uuid is defined)
        if self.rollout_storage_uuid is not None:
            storage_uuids_for_current_stage_set.add(self.rollout_storage_uuid)

        storage_uuids_for_current_stage = sorted(
            list(storage_uuids_for_current_stage_set)
        )

        for storage_uuid in storage_uuids_for_current_stage:
            if isinstance(self._named_storages[storage_uuid], Builder):
                self._named_storages[storage_uuid] = cast(
                    Builder["ExperienceStorage"],
                    self._named_storages[storage_uuid],
                )()

        return OrderedDict(
            (k, self._named_storages[k]) for k in storage_uuids_for_current_stage
        )

    @property
    def current_stage_storage(self) -> "OrderedDict[str, ExperienceStorage]":
        return self.get_stage_storage(self.current_stage)

    def get_loss(self, uuid: str):
        if isinstance(self._named_losses[uuid], Builder):
            self._named_losses[uuid] = cast(
                Builder[Union["AbstractActorCriticLoss", "GenericAbstractLoss"]],
                self._named_losses[uuid],
            )()
        return self._named_losses[uuid]

    @property
    def current_stage_losses(
        self,
    ) -> Dict[str, Union[AbstractActorCriticLoss, GenericAbstractLoss]]:
        for loss_name in self.current_stage.loss_names:
            if isinstance(self._named_losses[loss_name], Builder):
                self._named_losses[loss_name] = cast(
                    Builder[Union["AbstractActorCriticLoss", "GenericAbstractLoss"]],
                    self._named_losses[loss_name],
                )()

        return {
            loss_name: cast(
                Union[AbstractActorCriticLoss, GenericAbstractLoss],
                self._named_losses[loss_name],
            )
            for loss_name in self.current_stage.loss_names
        }


def download_checkpoint_from_wandb(
    checkpoint_path_dir_or_pattern, all_ckpt_dir, only_allow_one_ckpt=False
):
    api = wandb.Api()
    run_token = checkpoint_path_dir_or_pattern.split("//")[1]
    ckpt_steps = checkpoint_path_dir_or_pattern.split("//")[2:]
    if ckpt_steps[-1] == "":
        ckpt_steps = ckpt_steps[:-1]
    if not only_allow_one_ckpt:
        ckpts_paths = []
        for steps in ckpt_steps:
            ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
            artifact = api.artifact(ckpt_fn)
            _ = artifact.download(all_ckpt_dir)
            ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, steps)
            shutil.move("{}/ckpt.pt".format(all_ckpt_dir), ckpt_dir)
            ckpts_paths.append(ckpt_dir)
        return ckpts_paths
    else:
        assert len(ckpt_steps) == 1
        step = ckpt_steps[0]
        ckpt_fn = "{}-step-{}:latest".format(run_token, step)
        artifact = api.artifact(ckpt_fn)
        _ = artifact.download(all_ckpt_dir)
        ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, step)
        shutil.move("{}/ckpt.pt".format(all_ckpt_dir), ckpt_dir)
        return ckpt_dir


================================================
FILE: allenact/utils/inference.py
================================================
from typing import Optional, cast, Tuple, Any, Dict

import attr
import torch

from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel
from allenact.algorithms.onpolicy_sync.storage import RolloutStorage
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.misc import (
    Memory,
    ObservationType,
    ActorCriticOutput,
    DistributionType,
)
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.utils import spaces_utils as su
from allenact.utils.tensor_utils import batch_observations


@attr.s(kw_only=True)
class InferenceAgent:
    actor_critic: ActorCriticModel = attr.ib()
    rollout_storage: RolloutStorage = attr.ib()
    device: torch.device = attr.ib()
    sensor_preprocessor_graph: Optional[SensorPreprocessorGraph] = attr.ib()
    steps_before_rollout_refresh: int = attr.ib(default=128)
    memory: Optional[Memory] = attr.ib(default=None)
    steps_taken_in_task: int = attr.ib(default=0)
    last_action_flat: Optional = attr.ib(default=None)
    has_initialized: Optional = attr.ib(default=False)

    def __attrs_post_init__(self):
        self.actor_critic.eval()
        self.actor_critic.to(device=self.device)
        if self.memory is not None:
            self.memory.to(device=self.device)
        if self.sensor_preprocessor_graph is not None:
            self.sensor_preprocessor_graph.to(self.device)

        self.rollout_storage.to(self.device)
        self.rollout_storage.set_partition(index=0, num_parts=1)

    @classmethod
    def from_experiment_config(
        cls,
        exp_config: ExperimentConfig,
        device: torch.device,
        checkpoint_path: Optional[str] = None,
        model_state_dict: Optional[Dict[str, Any]] = None,
        mode: str = "test",
    ):
        assert (
            checkpoint_path is None or model_state_dict is None
        ), "Cannot have `checkpoint_path` and `model_state_dict` both non-None."
        rollout_storage = exp_config.training_pipeline().rollout_storage

        machine_params = exp_config.machine_params(mode)
        if not isinstance(machine_params, MachineParams):
            machine_params = MachineParams(**machine_params)

        sensor_preprocessor_graph = machine_params.sensor_preprocessor_graph

        actor_critic = cast(
            ActorCriticModel,
            exp_config.create_model(
                sensor_preprocessor_graph=sensor_preprocessor_graph
            ),
        )

        if checkpoint_path is not None:
            actor_critic.load_state_dict(
                torch.load(checkpoint_path, map_location="cpu")["model_state_dict"]
            )
        elif model_state_dict is not None:
            actor_critic.load_state_dict(
                model_state_dict
                if "model_state_dict" not in model_state_dict
                else model_state_dict["model_state_dict"]
            )

        return cls(
            actor_critic=actor_critic,
            rollout_storage=rollout_storage,
            device=device,
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    def reset(self):
        if self.has_initialized:
            self.rollout_storage.after_updates()
        self.steps_taken_in_task = 0
        self.memory = None

    def act(self, observations: ObservationType):
        # Batch of size 1
        obs_batch = batch_observations([observations], device=self.device)
        if self.sensor_preprocessor_graph is not None:
            obs_batch = self.sensor_preprocessor_graph.get_observations(obs_batch)

        if self.steps_taken_in_task == 0:
            self.has_initialized = True
            self.rollout_storage.initialize(
                observations=obs_batch,
                num_samplers=1,
                recurrent_memory_specification=self.actor_critic.recurrent_memory_specification,
                action_space=self.actor_critic.action_space,
            )
            self.rollout_storage.after_updates()
        else:
            dummy_val = torch.zeros((1, 1), device=self.device)  # Unused dummy value
            self.rollout_storage.add(
                observations=obs_batch,
                memory=self.memory,
                actions=self.last_action_flat[0],
                action_log_probs=dummy_val,
                value_preds=dummy_val,
                rewards=dummy_val,
                masks=torch.ones(
                    (1, 1), device=self.device
                ),  # Always == 1 as we're in a single task until `reset`
            )

        agent_input = self.rollout_storage.agent_input_for_next_step()

        actor_critic_output, self.memory = cast(
            Tuple[ActorCriticOutput[DistributionType], Optional[Memory]],
            self.actor_critic(**agent_input),
        )

        action = actor_critic_output.distributions.sample()
        self.last_action_flat = su.flatten(self.actor_critic.action_space, action)

        self.steps_taken_in_task += 1

        if self.steps_taken_in_task % self.steps_before_rollout_refresh == 0:
            self.rollout_storage.after_updates()

        return su.action_list(self.actor_critic.action_space, self.last_action_flat)[0]


================================================
FILE: allenact/utils/misc_utils.py
================================================
import copy
import functools
import hashlib
import inspect
import json
import math
import os
import pdb
import random
import subprocess
import sys
import urllib
import urllib.request
from collections import Counter
from contextlib import contextmanager
from typing import Sequence, List, Optional, Tuple, Hashable

import filelock
import numpy as np
import torch
from scipy.special import comb

from allenact.utils.system import get_logger

TABLEAU10_RGB = (
    (31, 119, 180),
    (255, 127, 14),
    (44, 160, 44),
    (214, 39, 40),
    (148, 103, 189),
    (140, 86, 75),
    (227, 119, 194),
    (127, 127, 127),
    (188, 189, 34),
    (23, 190, 207),
)


def multiprocessing_safe_download_file_from_url(url: str, save_path: str):
    with filelock.FileLock(save_path + ".lock"):
        if not os.path.isfile(save_path):
            get_logger().info(f"Downloading file from {url} to {save_path}.")
            urllib.request.urlretrieve(
                url,
                save_path,
            )
        else:
            get_logger().debug(f"{save_path} exists - skipping download.")


def experimental_api(to_decorate):
    """Decorate a function to note that it is part of the experimental API."""

    have_warned = [False]
    name = f"{inspect.getmodule(to_decorate).__name__}.{to_decorate.__qualname__}"
    if to_decorate.__name__ == "__init__":
        name = name.replace(".__init__", "")

    @functools.wraps(to_decorate)
    def decorated(*args, **kwargs):
        if not have_warned[0]:
            get_logger().warning(
                f"'{name}' is a part of AllenAct's experimental API."
                f" This means: (1) there are likely bugs present and (2)"
                f" we may remove/change this functionality without warning."
                f" USE AT YOUR OWN RISK.",
            )
            have_warned[0] = True
        return to_decorate(*args, **kwargs)

    return decorated


def deprecated(to_decorate):
    """Decorate a function to note that it has been deprecated."""

    have_warned = [False]
    name = f"{inspect.getmodule(to_decorate).__name__}.{to_decorate.__qualname__}"
    if to_decorate.__name__ == "__init__":
        name = name.replace(".__init__", "")

    @functools.wraps(to_decorate)
    def decorated(*args, **kwargs):
        if not have_warned[0]:
            get_logger().warning(
                f"'{name}' has been deprecated and will soon be removed from AllenAct's API."
                f" Please discontinue your use of this function.",
            )
            have_warned[0] = True
        return to_decorate(*args, **kwargs)

    return decorated


class NumpyJSONEncoder(json.JSONEncoder):
    """JSON encoder for numpy objects.

    Based off the stackoverflow answer by Jie Yang here: https://stackoverflow.com/a/57915246.
    The license for this code is [BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/).
    """

    def default(self, obj):
        if isinstance(obj, np.void):
            return None
        elif isinstance(obj, np.bool_):
            return bool(obj)
        elif isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NumpyJSONEncoder, self).default(obj)


@contextmanager
def tensor_print_options(**print_opts):
    torch_print_opts = copy.deepcopy(torch._tensor_str.PRINT_OPTS)
    np_print_opts = np.get_printoptions()
    try:
        torch.set_printoptions(**print_opts)
        np.set_printoptions(**print_opts)
        yield None
    finally:
        torch.set_printoptions(**{k: getattr(torch_print_opts, k) for k in print_opts})
        np.set_printoptions(**np_print_opts)


def md5_hash_str_as_int(to_hash: str):
    return int(
        hashlib.md5(to_hash.encode()).hexdigest(),
        16,
    )


def get_git_diff_of_project() -> Tuple[str, str]:
    short_sha = (
        subprocess.check_output(["git", "describe", "--always"]).decode("utf-8").strip()
    )
    diff = subprocess.check_output(["git", "diff", short_sha]).decode("utf-8")
    return short_sha, diff


class HashableDict(dict):
    """A dictionary which is hashable so long as all of its values are
    hashable.

    A HashableDict object will allow setting / deleting of items until
    the first time that `__hash__()` is called on it after which
    attempts to set or delete items will throw `RuntimeError`
    exceptions.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self._hash_has_been_called = False

    def __key(self):
        return tuple((k, self[k]) for k in sorted(self))

    def __hash__(self):
        self._hash_has_been_called = True
        return hash(self.__key())

    def __eq__(self, other):
        return self.__key() == other.__key()

    def __setitem__(self, *args, **kwargs):
        if not self._hash_has_been_called:
            return super(HashableDict, self).__setitem__(*args, **kwargs)
        raise RuntimeError("Cannot set item in HashableDict after having called hash.")

    def __delitem__(self, *args, **kwargs):
        if not self._hash_has_been_called:
            return super(HashableDict, self).__delitem__(*args, **kwargs)
        raise RuntimeError(
            "Cannot delete item in HashableDict after having called hash."
        )


def partition_sequence(seq: Sequence, parts: int) -> List:
    assert 0 < parts, f"parts [{parts}] must be greater > 0"
    assert parts <= len(seq), f"parts [{parts}] > len(seq) [{len(seq)}]"
    n = len(seq)

    quotient = n // parts
    remainder = n % parts
    counts = [quotient + (i < remainder) for i in range(parts)]
    inds = np.cumsum([0] + counts)
    return [seq[ind0:ind1] for ind0, ind1 in zip(inds[:-1], inds[1:])]


def unzip(seq: Sequence[Tuple], n: Optional[int]):
    """Undoes a `zip` operation.

    # Parameters

    seq: The sequence of tuples that should be unzipped
    n: The number of items in each tuple. This is an optional value but is necessary if
       `len(seq) == 0` (as there is no other way to infer how many empty lists were zipped together
        in this case) and can otherwise be used to error check.

    # Returns

    A tuple (of length `n` if `n` is given) of lists where the ith list contains all
    the ith elements from the tuples in the input `seq`.
    """
    assert n is not None or len(seq) != 0
    if n is None:
        n = len(seq[0])
    lists = [[] for _ in range(n)]

    for t in seq:
        assert len(t) == n
        for i in range(n):
            lists[i].append(t[i])
    return lists


def uninterleave(seq: Sequence, parts: int) -> List:
    assert 0 < parts <= len(seq)
    n = len(seq)

    quotient = n // parts

    return [
        [seq[i + j * parts] for j in range(quotient + 1) if i + j * parts < len(seq)]
        for i in range(parts)
    ]


@functools.lru_cache(10000)
def cached_comb(n: int, m: int):
    return comb(n, m)


def expected_max_of_subset_statistic(vals: List[float], m: int):
    n = len(vals)
    assert m <= n

    vals_and_counts = list(Counter([round(val, 8) for val in vals]).items())
    vals_and_counts.sort()

    count_so_far = 0
    logdenom = math.log(comb(n, m))

    expected_max = 0.0
    for val, num_occurances_of_val in vals_and_counts:
        count_so_far += num_occurances_of_val
        if count_so_far < m:
            continue

        count_where_max = 0
        for i in range(1, min(num_occurances_of_val, m) + 1):
            count_where_max += cached_comb(num_occurances_of_val, i) * cached_comb(
                count_so_far - num_occurances_of_val, m - i
            )

        expected_max += val * math.exp(math.log(count_where_max) - logdenom)

    return expected_max


def bootstrap_max_of_subset_statistic(
    vals: List[float], m: int, reps=1000, seed: Optional[int] = None
):
    rstate = None
    if seed is not None:
        rstate = random.getstate()
        random.seed(seed)
    results = []
    for _ in range(reps):
        results.append(
            expected_max_of_subset_statistic(random.choices(vals, k=len(vals)), m)
        )

    if seed is not None:
        random.setstate(rstate)
    return results


def rand_float(low: float, high: float, shape):
    assert low <= high
    try:
        return np.random.rand(*shape) * (high - low) + low
    except TypeError as _:
        return np.random.rand(shape) * (high - low) + low


def all_unique(seq: Sequence[Hashable]):
    seen = set()
    for s in seq:
        if s in seen:
            return False
        seen.add(s)
    return True


def all_equal(s: Sequence):
    if len(s) <= 1:
        return True
    return all(s[0] == ss for ss in s[1:])


def prepare_locals_for_super(
    local_vars, args_name="args", kwargs_name="kwargs", ignore_kwargs=False
):
    assert (
        args_name not in local_vars
    ), "`prepare_locals_for_super` does not support {}.".format(args_name)
    new_locals = {k: v for k, v in local_vars.items() if k != "self" and "__" not in k}
    if kwargs_name in new_locals:
        if ignore_kwargs:
            new_locals.pop(kwargs_name)
        else:
            kwargs = new_locals.pop(kwargs_name)
            kwargs.update(new_locals)
            new_locals = kwargs
    return new_locals


def partition_limits(num_items: int, num_parts: int):
    return (
        np.round(np.linspace(0, num_items, num_parts + 1, endpoint=True))
        .astype(np.int32)
        .tolist()
    )


def str2bool(v: str):
    v = v.lower().strip()
    if v in ("yes", "true", "t", "y", "1"):
        return True
    elif v in ("no", "false", "f", "n", "0"):
        return False
    else:
        raise ValueError(f"{v} cannot be converted to a bool")


class ForkedPdb(pdb.Pdb):
    """A Pdb subclass that may be used from a forked multiprocessing child."""

    def interaction(self, *args, **kwargs):
        _stdin = sys.stdin
        try:
            sys.stdin = open("/dev/stdin")
            pdb.Pdb.interaction(self, *args, **kwargs)
        finally:
            sys.stdin = _stdin


================================================
FILE: allenact/utils/model_utils.py
================================================
"""Functions used to initialize and manipulate pytorch models."""

import hashlib
from typing import Sequence, Tuple, Union, Optional, Dict, Any, Callable

import numpy as np
import torch
import torch.nn as nn

from allenact.utils.misc_utils import md5_hash_str_as_int


def md5_hash_of_state_dict(state_dict: Dict[str, Any]):
    hashables = []
    for piece in sorted(state_dict.items()):
        if isinstance(piece[1], (np.ndarray, torch.Tensor, nn.Parameter)):
            hashables.append(piece[0])
            if not isinstance(piece[1], np.ndarray):
                p1 = piece[1].data.cpu().numpy()
            else:
                p1 = piece[1]
            hashables.append(
                int(
                    hashlib.md5(p1.tobytes()).hexdigest(),
                    16,
                )
            )
        else:
            hashables.append(md5_hash_str_as_int(str(piece)))

    return md5_hash_str_as_int(str(hashables))


class Flatten(nn.Module):
    """Flatten input tensor so that it is of shape (FLATTENED_BATCH x -1)."""

    # noinspection PyMethodMayBeStatic
    def forward(self, x):
        """Flatten input tensor.

        # Parameters
        x : Tensor of size (FLATTENED_BATCH x ...) to flatten to size (FLATTENED_BATCH x -1)
        # Returns
        Flattened tensor.
        """
        return x.reshape(x.size(0), -1)


def init_linear_layer(
    module: nn.Linear, weight_init: Callable, bias_init: Callable, gain=1
):
    """Initialize a torch.nn.Linear layer.

    # Parameters

    module : A torch linear layer.
    weight_init : Function used to initialize the weight parameters of the linear layer. Should take the weight data
        tensor and gain as input.
    bias_init : Function used to initialize the bias parameters of the linear layer. Should take the bias data
        tensor and gain as input.
    gain : The gain to apply.

    # Returns

    The initialized linear layer.
    """
    weight_init(module.weight.data, gain=gain)
    bias_init(module.bias.data)
    return module


def grad_norm(parameters, norm_type=2):
    if isinstance(parameters, torch.Tensor):
        parameters = [parameters]
    parameters = list(filter(lambda p: p.grad is not None, parameters))
    norm_type = float(norm_type)
    if norm_type == "inf":
        total_norm = max(p.grad.data.abs().max() for p in parameters)
    else:
        total_norm = 0
        for p in parameters:
            param_norm = p.grad.data.norm(norm_type)
            total_norm += param_norm.item() ** norm_type
        total_norm = total_norm ** (1.0 / norm_type)
    return total_norm


def make_cnn(
    input_channels: int,
    layer_channels: Sequence[int],
    kernel_sizes: Sequence[Union[int, Tuple[int, int]]],
    strides: Sequence[Union[int, Tuple[int, int]]],
    paddings: Sequence[Union[int, Tuple[int, int]]],
    dilations: Sequence[Union[int, Tuple[int, int]]],
    output_height: int,
    output_width: int,
    output_channels: int,
    flatten: bool = True,
    output_relu: bool = True,
) -> nn.Module:
    assert (
        len(layer_channels)
        == len(kernel_sizes)
        == len(strides)
        == len(paddings)
        == len(dilations)
    ), "Mismatched sizes: layers {} kernels {} strides {} paddings {} dilations {}".format(
        layer_channels, kernel_sizes, strides, paddings, dilations
    )

    net = nn.Sequential()

    input_channels_list = [input_channels] + list(layer_channels)

    for it, current_channels in enumerate(layer_channels):
        net.add_module(
            "conv_{}".format(it),
            nn.Conv2d(
                in_channels=input_channels_list[it],
                out_channels=current_channels,
                kernel_size=kernel_sizes[it],
                stride=strides[it],
                padding=paddings[it],
                dilation=dilations[it],
            ),
        )
        if it < len(layer_channels) - 1:
            net.add_module("relu_{}".format(it), nn.ReLU(inplace=True))

    if flatten:
        net.add_module("flatten", Flatten())
        net.add_module(
            "fc",
            nn.Linear(
                layer_channels[-1] * output_width * output_height, output_channels
            ),
        )
    if output_relu:
        net.add_module("out_relu", nn.ReLU(True))

    return net


def compute_cnn_output(
    cnn: nn.Module,
    cnn_input: torch.Tensor,
    permute_order: Optional[Tuple[int, ...]] = (
        0,  # FLAT_BATCH (flattening steps, samplers and agents)
        3,  # CHANNEL
        1,  # ROW
        2,  # COL
    ),  # from [FLAT_BATCH x ROW x COL x CHANNEL] flattened input
):
    """Computes CNN outputs for given inputs.

    # Parameters

    cnn : A torch CNN.
    cnn_input: A torch Tensor with inputs.
    permute_order: A permutation Tuple to provide PyTorch dimension order, default (0, 3, 1, 2), where 0 corresponds to
                   the flattened batch dimensions (combining step, sampler and agent)

    # Returns

    CNN output with dimensions [STEP, SAMPLER, AGENT, CHANNEL, (HEIGHT, WIDTH)].
    """
    nsteps: int
    nsamplers: int
    nagents: int

    assert len(cnn_input.shape) in [
        5,
        6,
    ], "CNN input must have shape [STEP, SAMPLER, (AGENT,) dim1, dim2, dim3]"

    nagents: Optional[int] = None
    if len(cnn_input.shape) == 6:
        nsteps, nsamplers, nagents = cnn_input.shape[:3]
    else:
        nsteps, nsamplers = cnn_input.shape[:2]

    # Make FLAT_BATCH = nsteps * nsamplers (* nagents)
    cnn_input = cnn_input.view((-1,) + cnn_input.shape[2 + int(nagents is not None) :])

    if permute_order is not None:
        cnn_input = cnn_input.permute(*permute_order)
    cnn_output = cnn(cnn_input)

    if nagents is not None:
        cnn_output = cnn_output.reshape(
            (
                nsteps,
                nsamplers,
                nagents,
            )
            + cnn_output.shape[1:]
        )
    else:
        cnn_output = cnn_output.reshape(
            (
                nsteps,
                nsamplers,
            )
            + cnn_output.shape[1:]
        )

    return cnn_output


def simple_conv_and_linear_weights_init(m):
    if type(m) in [
        nn.Conv1d,
        nn.Conv2d,
        nn.Conv3d,
        nn.ConvTranspose1d,
        nn.ConvTranspose2d,
        nn.ConvTranspose3d,
    ]:
        weight_shape = list(m.weight.data.size())
        fan_in = np.prod(weight_shape[1:4])
        fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]
        w_bound = np.sqrt(6.0 / (fan_in + fan_out))
        m.weight.data.uniform_(-w_bound, w_bound)
        if m.bias is not None:
            m.bias.data.fill_(0)
    elif type(m) == nn.Linear:
        simple_linear_weights_init(m)


def simple_linear_weights_init(m):
    if type(m) == nn.Linear:
        weight_shape = list(m.weight.data.size())
        fan_in = weight_shape[1]
        fan_out = weight_shape[0]
        w_bound = np.sqrt(6.0 / (fan_in + fan_out))
        m.weight.data.uniform_(-w_bound, w_bound)
        if m.bias is not None:
            m.bias.data.fill_(0)


class FeatureEmbedding(nn.Module):
    """A wrapper of nn.Embedding but support zero output Used for extracting
    features for actions/rewards."""

    def __init__(self, input_size, output_size):
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size
        if self.output_size != 0:
            self.fc = nn.Embedding(input_size, output_size)
        else:  # automatically be moved to a device
            self.null_embedding: torch.Tensor
            self.register_buffer(
                "null_embedding",
                torch.zeros(
                    0,
                ),
                persistent=False,
            )

    def forward(self, inputs):
        if self.output_size != 0:
            return self.fc(inputs)
        else:
            return self.null_embedding


================================================
FILE: allenact/utils/multi_agent_viz_utils.py
================================================
from typing import Sequence, Any

import numpy as np
from matplotlib import pyplot as plt, markers
from matplotlib.collections import LineCollection

from allenact.utils.viz_utils import TrajectoryViz


class MultiTrajectoryViz(TrajectoryViz):
    def __init__(
        self,
        path_to_trajectory_prefix: Sequence[str] = ("task_info", "followed_path"),
        agent_suffixes: Sequence[str] = ("1", "2"),
        label: str = "trajectories",
        trajectory_plt_colormaps: Sequence[str] = ("cool", "spring"),
        marker_plt_colors: Sequence[Any] = ("blue", "orange"),
        axes_equal: bool = True,
        **other_base_kwargs,
    ):
        super().__init__(label=label, **other_base_kwargs)

        self.path_to_trajectory_prefix = list(path_to_trajectory_prefix)
        self.agent_suffixes = list(agent_suffixes)
        self.trajectory_plt_colormaps = list(trajectory_plt_colormaps)
        self.marker_plt_colors = marker_plt_colors
        self.axes_equal = axes_equal

    def make_fig(self, episode, episode_id):
        # From https://nbviewer.jupyter.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb
        def colorline(
            x,
            y,
            z=None,
            cmap=plt.get_cmap("cool"),
            norm=plt.Normalize(0.0, 1.0),
            linewidth=2,
            alpha=1.0,
            zorder=1,
        ):
            """Plot a colored line with coordinates x and y.

            Optionally specify colors in the array z

            Optionally specify a colormap, a norm function and a line width.
            """

            def make_segments(x, y):
                """Create list of line segments from x and y coordinates, in
                the correct format for LineCollection:

                an array of the form  numlines x (points per line) x 2
                (x and y) array
                """
                points = np.array([x, y]).T.reshape(-1, 1, 2)
                segments = np.concatenate([points[:-1], points[1:]], axis=1)
                return segments

            # Default colors equally spaced on [0,1]:
            if z is None:
                z = np.linspace(0.0, 1.0, len(x))

            # Special case if a single number:
            if not hasattr(
                z, "__iter__"
            ):  # to check for numerical input -- this is a hack
                z = np.array([z])

            z = np.asarray(z)

            segments = make_segments(x, y)

            lc = LineCollection(
                segments,
                array=z,
                cmap=cmap,
                norm=norm,
                linewidth=linewidth,
                alpha=alpha,
                zorder=zorder,
            )

            ax = plt.gca()
            ax.add_collection(lc)

            return lc

        fig, ax = plt.subplots(figsize=self.figsize)
        for agent, cmap, marker_color in zip(
            self.agent_suffixes, self.trajectory_plt_colormaps, self.marker_plt_colors
        ):
            path = self.path_to_trajectory_prefix[:]
            path[-1] = path[-1] + agent
            trajectory = self._access(episode, path)

            x, y = [], []
            for xy in trajectory:
                x.append(float(self._access(xy, self.x)))
                y.append(float(self._access(xy, self.y)))

            colorline(x, y, zorder=1, cmap=cmap)

            start_marker = markers.MarkerStyle(marker=self.start_marker_shape)
            if self.path_to_rot_degrees is not None:
                rot_degrees = float(
                    self._access(trajectory[0], self.path_to_rot_degrees)
                )
                if self.adapt_rotation is not None:
                    rot_degrees = self.adapt_rotation(rot_degrees)
                start_marker._transform = start_marker.get_transform().rotate_deg(
                    rot_degrees
                )

            ax.scatter(
                [x[0]],
                [y[0]],
                marker=start_marker,
                zorder=2,
                s=self.start_marker_scale,
                color=marker_color,
            )
            ax.scatter(
                [x[-1]], [y[-1]], marker="s", color=marker_color
            )  # stop (square)

        if self.axes_equal:
            ax.set_aspect("equal", "box")
        ax.set_title(episode_id, fontsize=self.fontsize)
        ax.tick_params(axis="x", labelsize=self.fontsize)
        ax.tick_params(axis="y", labelsize=self.fontsize)

        return fig


================================================
FILE: allenact/utils/spaces_utils.py
================================================
# Original work Copyright (c) 2016 OpenAI (https://openai.com).
# Modified work Copyright (c) Allen Institute for AI
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import Union, Tuple, List, cast, Iterable, Callable
from collections import OrderedDict

import numpy as np
import torch
from gym import spaces as gym

ActionType = Union[torch.Tensor, OrderedDict, Tuple, int]


def flatdim(space):
    """Return the number of dimensions a flattened equivalent of this space
    would have.

    Accepts a space and returns an integer. Raises
    ``NotImplementedError`` if the space is not defined in
    ``gym.spaces``.
    """
    if isinstance(space, gym.Box):
        return int(np.prod(space.shape))
    elif isinstance(space, gym.Discrete):
        return 1  # we do not expand to one-hot
    elif isinstance(space, gym.Tuple):
        return int(sum([flatdim(s) for s in space.spaces]))
    elif isinstance(space, gym.Dict):
        return int(sum([flatdim(s) for s in space.spaces.values()]))
    elif isinstance(space, gym.MultiBinary):
        return int(space.n)
    elif isinstance(space, gym.MultiDiscrete):
        return int(np.prod(space.shape))
    else:
        raise NotImplementedError


def flatten(space, torch_x):
    """Flatten data points from a space."""
    if isinstance(space, gym.Box):
        if len(space.shape) > 0:
            return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,))
        else:
            return torch_x.view(torch_x.shape + (-1,))
    elif isinstance(space, gym.Discrete):
        # Assume tensor input does NOT contain a dimension for action
        if isinstance(torch_x, torch.Tensor):
            return torch_x.unsqueeze(-1)
        else:
            return torch.tensor(torch_x).view(1)
    elif isinstance(space, gym.Tuple):
        return torch.cat(
            [flatten(s, x_part) for x_part, s in zip(torch_x, space.spaces)], dim=-1
        )
    elif isinstance(space, gym.Dict):
        return torch.cat(
            [flatten(s, torch_x[key]) for key, s in space.spaces.items()], dim=-1
        )
    elif isinstance(space, gym.MultiBinary):
        return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,))
    elif isinstance(space, gym.MultiDiscrete):
        return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,))
    else:
        raise NotImplementedError


def unflatten(space, torch_x):
    """Unflatten a concatenated data points tensor from a space."""
    if isinstance(space, gym.Box):
        return torch_x.view(torch_x.shape[:-1] + space.shape).float()
    elif isinstance(space, gym.Discrete):
        res = torch_x.view(torch_x.shape[:-1] + space.shape).long()
        return res if len(res.shape) > 0 else res.item()
    elif isinstance(space, gym.Tuple):
        dims = [flatdim(s) for s in space.spaces]
        list_flattened = torch.split(torch_x, dims, dim=-1)
        list_unflattened = [
            unflatten(s, flattened)
            for flattened, s in zip(list_flattened, space.spaces)
        ]
        return tuple(list_unflattened)
    elif isinstance(space, gym.Dict):
        dims = [flatdim(s) for s in space.spaces.values()]
        list_flattened = torch.split(torch_x, dims, dim=-1)
        list_unflattened = [
            (key, unflatten(s, flattened))
            for flattened, (key, s) in zip(list_flattened, space.spaces.items())
        ]
        return OrderedDict(list_unflattened)
    elif isinstance(space, gym.MultiBinary):
        return torch_x.view(torch_x.shape[:-1] + space.shape).byte()
    elif isinstance(space, gym.MultiDiscrete):
        return torch_x.view(torch_x.shape[:-1] + space.shape).long()
    else:
        raise NotImplementedError


def torch_point(space, np_x):
    """Convert numpy space point into torch."""
    if isinstance(space, gym.Box):
        return torch.from_numpy(np_x)
    elif isinstance(space, gym.Discrete):
        return np_x
    elif isinstance(space, gym.Tuple):
        return tuple([torch_point(s, x_part) for x_part, s in zip(np_x, space.spaces)])
    elif isinstance(space, gym.Dict):
        return OrderedDict(
            [(key, torch_point(s, np_x[key])) for key, s in space.spaces.items()]
        )
    elif isinstance(space, gym.MultiBinary):
        return torch.from_numpy(np_x)
    elif isinstance(space, gym.MultiDiscrete):
        return torch.from_numpy(np.asarray(np_x))
    else:
        raise NotImplementedError


def numpy_point(
    space: gym.Space, torch_x: Union[int, torch.Tensor, OrderedDict, Tuple]
):
    """Convert torch space point into numpy."""
    if isinstance(space, gym.Box):
        return cast(torch.Tensor, torch_x).cpu().numpy()
    elif isinstance(space, gym.Discrete):
        return torch_x
    elif isinstance(space, gym.Tuple):
        return tuple(
            [
                numpy_point(s, x_part)
                for x_part, s in zip(cast(Iterable, torch_x), space.spaces)
            ]
        )
    elif isinstance(space, gym.Dict):
        return OrderedDict(
            [
                (key, numpy_point(s, cast(torch.Tensor, torch_x)[key]))
                for key, s in space.spaces.items()
            ]
        )
    elif isinstance(space, gym.MultiBinary):
        return cast(torch.Tensor, torch_x).cpu().numpy()
    elif isinstance(space, gym.MultiDiscrete):
        return cast(torch.Tensor, torch_x).cpu().numpy()
    else:
        raise NotImplementedError


def flatten_space(space: gym.Space):
    if isinstance(space, gym.Box):
        return gym.Box(space.low.flatten(), space.high.flatten())
    if isinstance(space, gym.Discrete):
        return gym.Box(low=0, high=space.n, shape=(1,))
    if isinstance(space, gym.Tuple):
        space = [flatten_space(s) for s in space.spaces]
        return gym.Box(
            low=np.concatenate([s.low for s in space]),
            high=np.concatenate([s.high for s in space]),
        )
    if isinstance(space, gym.Dict):
        space = [flatten_space(s) for s in space.spaces.values()]
        return gym.Box(
            low=np.concatenate([s.low for s in space]),
            high=np.concatenate([s.high for s in space]),
        )
    if isinstance(space, gym.MultiBinary):
        return gym.Box(low=0, high=1, shape=(space.n,))
    if isinstance(space, gym.MultiDiscrete):
        return gym.Box(
            low=np.zeros_like(space.nvec),
            high=space.nvec,
        )
    raise NotImplementedError


def policy_space(
    action_space: gym.Space,
    box_space_to_policy: Callable[[gym.Box], gym.Space] = None,
) -> gym.Space:
    if isinstance(action_space, gym.Box):
        if box_space_to_policy is None:
            # policy = mean (default)
            return action_space
        else:
            return box_space_to_policy(action_space)
    if isinstance(action_space, gym.Discrete):
        # policy = prob of each option
        return gym.Box(
            low=np.float32(0.0), high=np.float32(1.0), shape=(action_space.n,)
        )
    if isinstance(action_space, gym.Tuple):
        # policy = tuple of sub-policies
        spaces = [policy_space(s, box_space_to_policy) for s in action_space.spaces]
        return gym.Tuple(spaces)
    if isinstance(action_space, gym.Dict):
        # policy = dict of sub-policies
        spaces = [
            (
                name,
                policy_space(s, box_space_to_policy),
            )
            for name, s in action_space.spaces.items()
        ]
        return gym.Dict(spaces)
    if isinstance(action_space, gym.MultiBinary):
        # policy = prob of 0, 1 in each entry
        return gym.Box(
            low=np.float32(0.0), high=np.float32(1.0), shape=(action_space.n, 2)
        )
    if isinstance(action_space, gym.MultiDiscrete):
        # policy = Tuple of prob of each option for each discrete
        return gym.Tuple(
            [
                gym.Box(low=np.float32(0.0), high=np.float32(1.0), shape=(n,))
                for n in action_space.nvec
            ]
        )
    raise NotImplementedError


def action_list(
    action_space: gym.Space, flat_actions: torch.Tensor
) -> List[ActionType]:
    """Convert flattened actions to list.

    Assumes `flat_actions` are of shape `[step, sampler, flatdim]`.
    """

    def tolist(action):
        if isinstance(action, torch.Tensor):
            return action.tolist()
        if isinstance(action, Tuple):
            actions = [tolist(ac) for ac in action]
            return tuple(actions)
        if isinstance(action, OrderedDict):
            actions = [(key, tolist(action[key])) for key in action.keys()]
            return OrderedDict(actions)
        # else, it's a scalar
        return action

    return [tolist(unflatten(action_space, ac)) for ac in flat_actions[0]]


================================================
FILE: allenact/utils/system.py
================================================
import io
import logging
import os
import socket
import sys
from contextlib import closing
from typing import cast, Optional, Tuple

from torch import multiprocessing as mp

from allenact._constants import ALLENACT_INSTALL_DIR

HUMAN_LOG_LEVELS: Tuple[str, ...] = ("debug", "info", "warning", "error", "none")
"""
Available log levels: "debug", "info", "warning", "error", "none"
"""

_LOGGER: Optional[logging.Logger] = None


class ColoredFormatter(logging.Formatter):
    """Format a log string with colors.

    This implementation taken (with modifications) from
    https://stackoverflow.com/a/384125.
    """

    BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)

    RESET_SEQ = "\033[0m"
    COLOR_SEQ = "\033[1;%dm"
    BOLD_SEQ = "\033[1m"

    COLORS = {
        "WARNING": YELLOW,
        "INFO": GREEN,
        "DEBUG": BLUE,
        "ERROR": RED,
        "CRITICAL": MAGENTA,
    }

    def __init__(self, fmt: str, datefmt: Optional[str] = None, use_color=True):
        super().__init__(fmt=fmt, datefmt=datefmt)
        self.use_color = use_color

    def format(self, record: logging.LogRecord) -> str:
        levelname = record.levelname
        if self.use_color and levelname in self.COLORS:
            levelname_with_color = (
                self.COLOR_SEQ % (30 + self.COLORS[levelname])
                + levelname
                + self.RESET_SEQ
            )
            record.levelname = levelname_with_color
            formated_record = logging.Formatter.format(self, record)
            record.levelname = (
                levelname  # Resetting levelname as `record` might be used elsewhere
            )
            return formated_record
        else:
            return logging.Formatter.format(self, record)


def get_logger() -> logging.Logger:
    """Get a `logging.Logger` to stderr. It can be called whenever we wish to
    log some message. Messages can get mixed-up
    (https://docs.python.org/3.6/library/multiprocessing.html#logging), but it
    works well in most cases.

    # Returns

    logger: the `logging.Logger` object
    """
    if _new_logger():
        if mp.current_process().name == "MainProcess":
            _new_logger(logging.DEBUG)
        _set_log_formatter()
    return _LOGGER


def _human_log_level_to_int(human_log_level):

    human_log_level = human_log_level.lower().strip()
    assert human_log_level in HUMAN_LOG_LEVELS, "unknown human_log_level {}".format(
        human_log_level
    )

    if human_log_level == "debug":
        log_level = logging.DEBUG
    elif human_log_level == "info":
        log_level = logging.INFO
    elif human_log_level == "warning":
        log_level = logging.WARNING
    elif human_log_level == "error":
        log_level = logging.ERROR
    elif human_log_level == "none":
        log_level = logging.CRITICAL + 1
    else:
        raise NotImplementedError(f"Unknown log level {human_log_level}.")
    return log_level


def init_logging(human_log_level: str = "info") -> None:
    """Init the `logging.Logger`.

    It should be called only once in the app (e.g. in `main`). It sets
    the log_level to one of `HUMAN_LOG_LEVELS`. And sets up a handler
    for stderr. The logging level is propagated to all subprocesses.
    """
    _new_logger(_human_log_level_to_int(human_log_level))
    _set_log_formatter()


def update_log_level(logger, human_log_level: str):
    logger.setLevel(_human_log_level_to_int(human_log_level))


def find_free_port(address: str = "127.0.0.1") -> int:
    """Finds a free port for distributed training.

    # Returns

    port: port number that can be used to listen
    """
    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
        s.bind((address, 0))
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        port = s.getsockname()[1]
    return port


def _new_logger(log_level: Optional[int] = None):
    global _LOGGER
    if _LOGGER is None:
        _LOGGER = mp.get_logger()
        if log_level is not None:
            get_logger().setLevel(log_level)
        return True
    if log_level is not None:
        get_logger().setLevel(log_level)
    return False


def _set_log_formatter():
    assert _LOGGER is not None

    if _LOGGER.getEffectiveLevel() <= logging.CRITICAL:
        add_style_to_logs = True  # In case someone wants to turn this off manually.

        if add_style_to_logs:
            default_format = "$BOLD[%(asctime)s$RESET %(levelname)s$BOLD:]$RESET %(message)s\t[%(filename)s: %(lineno)d]"
            default_format = default_format.replace(
                "$BOLD", ColoredFormatter.BOLD_SEQ
            ).replace("$RESET", ColoredFormatter.RESET_SEQ)
        else:
            default_format = (
                "%(asctime)s %(levelname)s: %(message)s\t[%(filename)s: %(lineno)d]"
            )
        short_date_format = "%m/%d %H:%M:%S"
        log_format = "default"

        if log_format == "default":
            fmt = default_format
            datefmt = short_date_format
        elif log_format == "defaultMilliseconds":
            fmt = default_format
            datefmt = None
        else:
            fmt = log_format
            datefmt = short_date_format

        if add_style_to_logs:
            formatter = ColoredFormatter(
                fmt=fmt,
                datefmt=datefmt,
            )
        else:
            formatter = logging.Formatter(fmt=fmt, datefmt=datefmt)

        ch = logging.StreamHandler()
        ch.setFormatter(formatter)
        ch.addFilter(cast(logging.Filter, _AllenActMessageFilter(os.getcwd())))
        _LOGGER.addHandler(ch)

        sys.excepthook = _excepthook
        sys.stdout = cast(io.TextIOWrapper, _StreamToLogger())

    return _LOGGER


class _StreamToLogger:
    def __init__(self):
        self.linebuf = ""

    def write(self, buf):
        temp_linebuf = self.linebuf + buf
        self.linebuf = ""
        for line in temp_linebuf.splitlines(True):
            if line[-1] == "\n":
                cast(logging.Logger, _LOGGER).info(line.rstrip())
            else:
                self.linebuf += line

    def flush(self):
        if self.linebuf != "":
            cast(logging.Logger, _LOGGER).info(self.linebuf.rstrip())
        self.linebuf = ""


def _excepthook(*args):
    # noinspection PyTypeChecker
    get_logger().error(msg="Uncaught exception:", exc_info=args)


class _AllenActMessageFilter:
    def __init__(self, working_directory: str):
        self.working_directory = working_directory

    # noinspection PyMethodMayBeStatic
    def filter(self, record):
        # TODO: Does this work when pip-installing AllenAct?
        return int(
            self.working_directory in record.pathname
            or ALLENACT_INSTALL_DIR in record.pathname
            or "main" in record.pathname
        )


class ImportChecker:
    def __init__(self, msg=None):
        self.msg = msg

    def __enter__(self):
        pass

    def __exit__(self, exc_type, value, traceback):
        if exc_type == ModuleNotFoundError and self.msg is not None:
            value.msg += self.msg
        return exc_type is None


================================================
FILE: allenact/utils/tensor_utils.py
================================================
"""Functions used to manipulate pytorch tensors and numpy arrays."""

import numbers
import os
import tempfile
from collections import defaultdict
from typing import List, Dict, Optional, DefaultDict, Union, Any, cast

import PIL
import numpy as np
import torch
from PIL import Image
from moviepy import editor as mpy
from moviepy.editor import concatenate_videoclips
from tensorboardX import SummaryWriter as TBXSummaryWriter, summary as tbxsummary
from tensorboardX.proto.summary_pb2 import Summary as TBXSummary

# noinspection PyProtectedMember
from tensorboardX.utils import _prepare_video as tbx_prepare_video
from tensorboardX.x2num import make_np as tbxmake_np

from allenact.utils.system import get_logger


def to_device_recursively(
    input: Any, device: Union[str, torch.device, int], inplace: bool = True
):
    """Recursively places tensors on the appropriate device."""
    if input is None:
        return input
    elif isinstance(input, torch.Tensor):
        return input.to(device)  # type: ignore
    elif isinstance(input, tuple):
        return tuple(
            to_device_recursively(input=subinput, device=device, inplace=inplace)
            for subinput in input
        )
    elif isinstance(input, list):
        if inplace:
            for i in range(len(input)):
                input[i] = to_device_recursively(
                    input=input[i], device=device, inplace=inplace
                )
            return input
        else:
            return [
                to_device_recursively(input=subpart, device=device, inplace=inplace)
                for subpart in input
            ]
    elif isinstance(input, dict):
        if inplace:
            for key in input:
                input[key] = to_device_recursively(
                    input=input[key], device=device, inplace=inplace
                )
            return input
        else:
            return {
                k: to_device_recursively(input=input[k], device=device, inplace=inplace)
                for k in input
            }
    elif isinstance(input, set):
        if inplace:
            for element in list(input):
                input.remove(element)
                input.add(
                    to_device_recursively(element, device=device, inplace=inplace)
                )
        else:
            return set(
                to_device_recursively(k, device=device, inplace=inplace) for k in input
            )
    elif isinstance(input, np.ndarray) or np.isscalar(input) or isinstance(input, str):
        return input
    elif hasattr(input, "to"):
        # noinspection PyCallingNonCallable
        return input.to(device=device, inplace=inplace)
    else:
        raise NotImplementedError(
            "Sorry, value of type {} is not supported.".format(type(input))
        )


def detach_recursively(input: Any, inplace=True):
    """Recursively detaches tensors in some data structure from their
    computation graph."""
    if input is None:
        return input
    elif isinstance(input, torch.Tensor):
        return input.detach()
    elif isinstance(input, tuple):
        return tuple(
            detach_recursively(input=subinput, inplace=inplace) for subinput in input
        )
    elif isinstance(input, list):
        if inplace:
            for i in range(len(input)):
                input[i] = detach_recursively(input[i], inplace=inplace)
            return input
        else:
            return [
                detach_recursively(input=subinput, inplace=inplace)
                for subinput in input
            ]
    elif isinstance(input, dict):
        if inplace:
            for key in input:
                input[key] = detach_recursively(input[key], inplace=inplace)
            return input
        else:
            return {k: detach_recursively(input[k], inplace=inplace) for k in input}
    elif isinstance(input, set):
        if inplace:
            for element in list(input):
                input.remove(element)
                input.add(detach_recursively(element, inplace=inplace))
        else:
            return set(detach_recursively(k, inplace=inplace) for k in input)
    elif isinstance(input, np.ndarray) or np.isscalar(input) or isinstance(input, str):
        return input
    elif hasattr(input, "detach_recursively"):
        # noinspection PyCallingNonCallable
        return input.detach_recursively(inplace=inplace)
    else:
        raise NotImplementedError(
            "Sorry, hidden state of type {} is not supported.".format(type(input))
        )


def batch_observations(
    observations: List[Dict], device: Optional[torch.device] = None
) -> Dict[str, Union[Dict, torch.Tensor]]:
    """Transpose a batch of observation dicts to a dict of batched
    observations.

    # Arguments

    observations :  List of dicts of observations.
    device : The torch.device to put the resulting tensors on.
        Will not move the tensors if None.

    # Returns

    Transposed dict of lists of observations.
    """

    def dict_from_observation(
        observation: Dict[str, Any]
    ) -> Dict[str, Union[Dict, List]]:
        batch_dict: DefaultDict = defaultdict(list)

        for sensor in observation:
            if isinstance(observation[sensor], Dict):
                batch_dict[sensor] = dict_from_observation(observation[sensor])
            else:
                batch_dict[sensor].append(to_tensor(observation[sensor]))

        return batch_dict

    def fill_dict_from_observations(
        input_batch: Any, observation: Dict[str, Any]
    ) -> None:
        for sensor in observation:
            if isinstance(observation[sensor], Dict):
                fill_dict_from_observations(input_batch[sensor], observation[sensor])
            else:
                input_batch[sensor].append(to_tensor(observation[sensor]))

    def dict_to_batch(input_batch: Any) -> None:
        for sensor in input_batch:
            if isinstance(input_batch[sensor], Dict):
                dict_to_batch(input_batch[sensor])
            else:
                input_batch[sensor] = torch.stack(
                    [batch.to(device=device) for batch in input_batch[sensor]], dim=0
                )

    if len(observations) == 0:
        return cast(Dict[str, Union[Dict, torch.Tensor]], observations)

    batch = dict_from_observation(observations[0])

    for obs in observations[1:]:
        fill_dict_from_observations(batch, obs)

    dict_to_batch(batch)

    return cast(Dict[str, Union[Dict, torch.Tensor]], batch)


def to_tensor(v) -> torch.Tensor:
    """Return a torch.Tensor version of the input.

    # Parameters

    v : Input values that can be coerced into being a tensor.

    # Returns

    A tensor version of the input.
    """
    if torch.is_tensor(v):
        return v
    elif isinstance(v, np.ndarray):
        return torch.from_numpy(v)
    else:
        return torch.tensor(
            v, dtype=torch.int64 if isinstance(v, numbers.Integral) else torch.float
        )


def tile_images(images: List[np.ndarray]) -> np.ndarray:
    """Tile multiple images into single image.

    # Parameters

    images : list of images where each image has dimension
        (height x width x channels)

    # Returns

    Tiled image (new_height x width x channels).
    """
    assert len(images) > 0, "empty list of images"
    np_images = np.asarray(images)
    n_images, height, width, n_channels = np_images.shape
    new_height = int(np.ceil(np.sqrt(n_images)))
    new_width = int(np.ceil(float(n_images) / new_height))
    # pad with empty images to complete the rectangle
    np_images = np.array(
        images + [images[0] * 0 for _ in range(n_images, new_height * new_width)]
    )
    # img_HWhwc
    out_image = np_images.reshape((new_height, new_width, height, width, n_channels))
    # img_HhWwc
    out_image = out_image.transpose(0, 2, 1, 3, 4)
    # img_Hh_Ww_c
    out_image = out_image.reshape((new_height * height, new_width * width, n_channels))
    return out_image


class SummaryWriter(TBXSummaryWriter):
    @staticmethod
    def _video(tag, vid):
        # noinspection PyProtectedMember
        tag = tbxsummary._clean_tag(tag)
        return TBXSummary(value=[TBXSummary.Value(tag=tag, image=vid)])

    def add_vid(self, tag, vid, global_step=None, walltime=None):
        self._get_file_writer().add_summary(
            self._video(tag, vid), global_step, walltime
        )

    def add_image(
        self, tag, img_tensor, global_step=None, walltime=None, dataformats="CHW"
    ):
        self._get_file_writer().add_summary(
            image(tag, img_tensor, dataformats=dataformats), global_step, walltime
        )


def image(tag, tensor, rescale=1, dataformats="CHW"):
    """Outputs a `Summary` protocol buffer with images. The summary has up to
    `max_images` summary values containing images. The images are built from
    `tensor` which must be 3-D with shape `[height, width, channels]` and where
    `channels` can be:

    *  1: `tensor` is interpreted as Grayscale.
    *  3: `tensor` is interpreted as RGB.
    *  4: `tensor` is interpreted as RGBA.

    # Parameters
    tag: A name for the generated node. Will also serve as a series name in
        TensorBoard.
    tensor: A 3-D `uint8` or `float32` `Tensor` of shape `[height, width,
        channels]` where `channels` is 1, 3, or 4.
        'tensor' can either have values in [0, 1] (float32) or [0, 255] (uint8).
        The image() function will scale the image values to [0, 255] by applying
        a scale factor of either 1 (uint8) or 255 (float32).
    rescale: The scale.
    dataformats: Input image shape format.


    # Returns
      A scalar `Tensor` of type `string`. The serialized `Summary` protocol
      buffer.
    """
    # noinspection PyProtectedMember
    tag = tbxsummary._clean_tag(tag)
    tensor = tbxmake_np(tensor)
    tensor = convert_to_HWC(tensor, dataformats)
    # Do not assume that user passes in values in [0, 255], use data type to detect
    if tensor.dtype != np.uint8:
        tensor = (tensor * 255.0).astype(np.uint8)

    img = tbxsummary.make_image(tensor, rescale=rescale)
    return TBXSummary(value=[TBXSummary.Value(tag=tag, image=img)])


def convert_to_HWC(tensor, input_format):  # tensor: numpy array
    assert len(set(input_format)) == len(
        input_format
    ), "You can not use the same dimension shordhand twice. \
        input_format: {}".format(
        input_format
    )
    assert len(tensor.shape) == len(
        input_format
    ), "size of input tensor and input format are different. \
        tensor shape: {}, input_format: {}".format(
        tensor.shape, input_format
    )
    input_format = input_format.upper()

    if len(input_format) == 4:
        index = [input_format.find(c) for c in "NCHW"]
        tensor_NCHW = tensor.transpose(index)
        tensor_CHW = make_grid(tensor_NCHW)
        # noinspection PyTypeChecker
        return tensor_CHW.transpose(1, 2, 0)

    if len(input_format) == 3:
        index = [input_format.find(c) for c in "HWC"]
        tensor_HWC = tensor.transpose(index)
        if tensor_HWC.shape[2] == 1:
            tensor_HWC = np.concatenate([tensor_HWC, tensor_HWC, tensor_HWC], 2)
        return tensor_HWC

    if len(input_format) == 2:
        index = [input_format.find(c) for c in "HW"]
        tensor = tensor.transpose(index)
        tensor = np.stack([tensor, tensor, tensor], 2)
        return tensor


def make_grid(I, ncols=8):
    # I: N1HW or N3HW

    assert isinstance(I, np.ndarray), "plugin error, should pass numpy array here"
    if I.shape[1] == 1:
        I = np.concatenate([I, I, I], 1)
    assert I.ndim == 4 and I.shape[1] == 3 or I.shape[1] == 4
    nimg = I.shape[0]
    H = I.shape[2]
    W = I.shape[3]
    ncols = min(nimg, ncols)
    nrows = int(np.ceil(float(nimg) / ncols))
    canvas = np.zeros((I.shape[1], H * nrows, W * ncols), dtype=I.dtype)
    i = 0
    for y in range(nrows):
        for x in range(ncols):
            if i >= nimg:
                break
            canvas[:, y * H : (y + 1) * H, x * W : (x + 1) * W] = I[i]
            i = i + 1
    return canvas


def tensor_to_video(tensor, fps=4):
    tensor = tbxmake_np(tensor)
    tensor = tbx_prepare_video(tensor)
    # If user passes in uint8, then we don't need to rescale by 255
    if tensor.dtype != np.uint8:
        tensor = (tensor * 255.0).astype(np.uint8)

    return tbxsummary.make_video(tensor, fps)


def tensor_to_clip(tensor, fps=4):
    tensor = tbxmake_np(tensor)
    tensor = tbx_prepare_video(tensor)
    # If user passes in uint8, then we don't need to rescale by 255
    if tensor.dtype != np.uint8:
        tensor = (tensor * 255.0).astype(np.uint8)

    t, h, w, c = tensor.shape

    clip = mpy.ImageSequenceClip(list(tensor), fps=fps)

    return clip, (h, w, c)


def clips_to_video(clips, h, w, c):
    # encode sequence of images into gif string
    clip = concatenate_videoclips(clips)

    filename = tempfile.NamedTemporaryFile(suffix=".gif", delete=False).name

    # moviepy >= 1.0.0 use logger=None to suppress output.
    try:
        clip.write_gif(filename, verbose=False, logger=None)
    except TypeError:
        get_logger().warning(
            "Upgrade to moviepy >= 1.0.0 to suppress the progress bar."
        )
        clip.write_gif(filename, verbose=False)

    with open(filename, "rb") as f:
        tensor_string = f.read()

    try:
        os.remove(filename)
    except OSError:
        get_logger().warning("The temporary file used by moviepy cannot be deleted.")

    return TBXSummary.Image(
        height=h, width=w, colorspace=c, encoded_image_string=tensor_string
    )


def process_video(render, max_clip_len=500, max_video_len=-1, fps=4):
    output = []
    hwc = None
    if len(render) > 0:
        if len(render) > max_video_len > 0:
            get_logger().warning(
                "Clipping video to first {} frames out of {} original frames".format(
                    max_video_len, len(render)
                )
            )
            render = render[:max_video_len]
        for clipstart in range(0, len(render), max_clip_len):
            clip = render[clipstart : clipstart + max_clip_len]
            try:
                current = np.stack(clip, axis=0)  # T, H, W, C
                current = current.transpose((0, 3, 1, 2))  # T, C, H, W
                current = np.expand_dims(current, axis=0)  # 1, T, C, H, W
                current, cur_hwc = tensor_to_clip(current, fps=fps)

                if hwc is None:
                    hwc = cur_hwc
                else:
                    assert (
                        hwc == cur_hwc
                    ), "Inconsistent clip shape: previous {} current {}".format(
                        hwc, cur_hwc
                    )

                output.append(current)
            except MemoryError:
                get_logger().error(
                    "Skipping video due to memory error with clip of length {}".format(
                        len(clip)
                    )
                )
                return None
    else:
        get_logger().warning("Calling process_video with 0 frames")
        return None

    assert len(output) > 0, "No clips to concatenate"
    assert hwc is not None, "No tensor dims assigned"

    try:
        result = clips_to_video(output, *hwc)
    except MemoryError:
        get_logger().error("Skipping video due to memory error calling clips_to_video")
        result = None

    return result


class ScaleBothSides(object):
    """Rescales the input PIL.Image to the given 'width' and `height`.

    Attributes
        width: new width
        height: new height
        interpolation: Default: PIL.Image.BILINEAR
    """

    def __init__(self, width: int, height: int, interpolation=Image.BILINEAR):
        self.width = width
        self.height = height
        self.interpolation = interpolation

    def __call__(self, img: PIL.Image) -> PIL.Image:
        return img.resize((self.width, self.height), self.interpolation)


================================================
FILE: allenact/utils/viz_utils.py
================================================
import abc
import json
import os
import sys
from collections import defaultdict
from typing import (
    Dict,
    Any,
    Union,
    Optional,
    List,
    Tuple,
    Sequence,
    Callable,
    cast,
    Set,
)

import numpy as np

from allenact.utils.experiment_utils import Builder
from allenact.utils.tensor_utils import SummaryWriter, tile_images, process_video

try:
    # Tensorflow not installed for testing
    from tensorflow.core.util import event_pb2
    from tensorflow.python.lib.io import tf_record

    _TF_AVAILABLE = True
except ImportError as _:
    event_pb2 = None
    tf_record = None

    _TF_AVAILABLE = False

import matplotlib

try:
    # When debugging we don't want to use the interactive version of matplotlib
    # as it causes all sorts of problems.

    # noinspection PyPackageRequirements
    import pydevd

    matplotlib.use("agg")
except ImportError as _:
    pass

import matplotlib.pyplot as plt
import matplotlib.markers as markers
import cv2

from allenact.utils.system import get_logger


class AbstractViz:
    def __init__(
        self,
        label: Optional[str] = None,
        vector_task_sources: Sequence[Tuple[str, Dict[str, Any]]] = (),
        rollout_sources: Sequence[Union[str, Sequence[str]]] = (),
        actor_critic_source: bool = False,
        **kwargs,  # accepts `max_episodes_in_group`
    ):
        self.label = label
        self.vector_task_sources = list(vector_task_sources)
        self.rollout_sources = [
            [entry] if isinstance(entry, str) else list(entry)
            for entry in rollout_sources
        ]
        self.actor_critic_source = actor_critic_source

        self.mode: Optional[str] = None
        self.path_to_id: Optional[Sequence[str]] = None
        self.episode_ids: Optional[List[Sequence[str]]] = None

        if "max_episodes_in_group" in kwargs:
            self.max_episodes_in_group = kwargs["max_episodes_in_group"]
            self.assigned_max_eps_in_group = True
        else:
            self.max_episodes_in_group = 8
            self.assigned_max_eps_in_group = False

    @staticmethod
    def _source_to_str(source, is_vector_task):
        source_type = "vector_task" if is_vector_task else "rollout_or_actor_critic"
        return "{}__{}".format(
            source_type,
            "__{}_sep__".format(source_type).join(["{}".format(s) for s in source]),
        )

    @staticmethod
    def _access(dictionary, path):
        path = path[::-1]
        while len(path) > 0:
            dictionary = dictionary[path.pop()]
        return dictionary

    def _auto_viz_order(self, task_outputs):
        if task_outputs is None:
            return None, None

        all_episodes = {
            self._access(episode, self.path_to_id): episode for episode in task_outputs
        }

        if self.episode_ids is None:
            all_episode_keys = list(all_episodes.keys())
            viz_order = []
            for page_start in range(
                0, len(all_episode_keys), self.max_episodes_in_group
            ):
                viz_order.append(
                    all_episode_keys[
                        page_start : page_start + self.max_episodes_in_group
                    ]
                )
            get_logger().debug("visualizing with order {}".format(viz_order))
        else:
            viz_order = self.episode_ids

        return viz_order, all_episodes

    def _setup(
        self,
        mode: str,
        path_to_id: Sequence[str],
        episode_ids: Optional[Sequence[Union[Sequence[str], str]]],
        max_episodes_in_group: int,
        force: bool = False,
    ):
        self.mode = mode
        self.path_to_id = list(path_to_id)
        if (self.episode_ids is None or force) and episode_ids is not None:
            self.episode_ids = (
                list(episode_ids)
                if not isinstance(episode_ids[0], str)
                else [list(cast(List[str], episode_ids))]
            )
        if not self.assigned_max_eps_in_group or force:
            self.max_episodes_in_group = max_episodes_in_group

    @abc.abstractmethod
    def log(
        self,
        log_writer: SummaryWriter,
        task_outputs: Optional[List[Any]],
        render: Optional[Dict[str, List[Dict[str, Any]]]],
        num_steps: int,
    ):
        raise NotImplementedError()


class TrajectoryViz(AbstractViz):
    def __init__(
        self,
        path_to_trajectory: Sequence[str] = ("task_info", "followed_path"),
        path_to_target_location: Optional[Sequence[str]] = (
            "task_info",
            "target_position",
        ),
        path_to_x: Sequence[str] = ("x",),
        path_to_y: Sequence[str] = ("z",),
        path_to_rot_degrees: Optional[Sequence[str]] = ("rotation", "y"),
        adapt_rotation: Optional[Callable[[float], float]] = None,
        label: str = "trajectory",
        figsize: Tuple[float, float] = (2, 2),
        fontsize: float = 5,
        start_marker_shape: str = r"$\spadesuit$",
        start_marker_scale: int = 100,
        **other_base_kwargs,
    ):
        super().__init__(label, **other_base_kwargs)
        self.path_to_trajectory = list(path_to_trajectory)
        self.path_to_target_location = (
            list(path_to_target_location)
            if path_to_target_location is not None
            else None
        )
        self.adapt_rotation = adapt_rotation
        self.x = list(path_to_x)
        self.y = list(path_to_y)
        self.path_to_rot_degrees = (
            list(path_to_rot_degrees) if path_to_rot_degrees is not None else None
        )
        self.figsize = figsize
        self.fontsize = fontsize
        self.start_marker_shape = start_marker_shape
        self.start_marker_scale = start_marker_scale

    def log(
        self,
        log_writer: SummaryWriter,
        task_outputs: Optional[List[Any]],
        render: Optional[Dict[str, List[Dict[str, Any]]]],
        num_steps: int,
    ):
        viz_order, all_episodes = self._auto_viz_order(task_outputs)
        if viz_order is None:
            get_logger().debug("trajectory viz returning without visualizing")
            return

        for page, current_ids in enumerate(viz_order):
            figs = []
            for episode_id in current_ids:
                # assert episode_id in all_episodes
                if episode_id not in all_episodes:
                    get_logger().warning(
                        "skipping viz for missing episode {}".format(episode_id)
                    )
                    continue
                figs.append(self.make_fig(all_episodes[episode_id], episode_id))
            if len(figs) == 0:
                continue
            log_writer.add_figure(
                "{}/{}_group{}".format(self.mode, self.label, page),
                figs,
                global_step=num_steps,
            )
            plt.close(
                "all"
            )  # close all current figures (SummaryWriter already closes all figures we log)

    def make_fig(self, episode, episode_id):
        # From https://nbviewer.jupyter.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb
        def colorline(
            x,
            y,
            z=None,
            cmap=plt.get_cmap("cool"),
            norm=plt.Normalize(0.0, 1.0),
            linewidth=2,
            alpha=1.0,
            zorder=1,
        ):
            """Plot a colored line with coordinates x and y.

            Optionally specify colors in the array z

            Optionally specify a colormap, a norm function and a line width.
            """

            def make_segments(x, y):
                """Create list of line segments from x and y coordinates, in
                the correct format for LineCollection:

                an array of the form  numlines x (points per line) x 2
                (x and y) array
                """
                points = np.array([x, y]).T.reshape(-1, 1, 2)
                segments = np.concatenate([points[:-1], points[1:]], axis=1)
                return segments

            # Default colors equally spaced on [0,1]:
            if z is None:
                z = np.linspace(0.0, 1.0, len(x))

            # Special case if a single number:
            if not hasattr(
                z, "__iter__"
            ):  # to check for numerical input -- this is a hack
                z = np.array([z])

            z = np.asarray(z)

            segments = make_segments(x, y)
            lc = matplotlib.collections.LineCollection(
                segments,
                array=z,
                cmap=cmap,
                norm=norm,
                linewidth=linewidth,
                alpha=alpha,
                zorder=zorder,
            )

            ax = plt.gca()
            ax.add_collection(lc)

            return lc

        trajectory = self._access(episode, self.path_to_trajectory)

        x, y = [], []
        for xy in trajectory:
            x.append(float(self._access(xy, self.x)))
            y.append(float(self._access(xy, self.y)))

        fig, ax = plt.subplots(figsize=self.figsize)
        colorline(x, y, zorder=1)

        start_marker = markers.MarkerStyle(marker=self.start_marker_shape)
        if self.path_to_rot_degrees is not None:
            rot_degrees = float(self._access(trajectory[0], self.path_to_rot_degrees))
            if self.adapt_rotation is not None:
                rot_degrees = self.adapt_rotation(rot_degrees)
            start_marker._transform = start_marker.get_transform().rotate_deg(
                rot_degrees
            )

        ax.scatter(
            [x[0]], [y[0]], marker=start_marker, zorder=2, s=self.start_marker_scale
        )
        ax.scatter([x[-1]], [y[-1]], marker="s")  # stop

        if self.path_to_target_location is not None:
            target = self._access(episode, self.path_to_target_location)
            ax.scatter(
                [float(self._access(target, self.x))],
                [float(self._access(target, self.y))],
                marker="*",
            )

        ax.set_title(episode_id, fontsize=self.fontsize)
        ax.tick_params(axis="x", labelsize=self.fontsize)
        ax.tick_params(axis="y", labelsize=self.fontsize)

        return fig


class AgentViewViz(AbstractViz):
    def __init__(
        self,
        label: str = "agent_view",
        max_clip_length: int = 100,  # control memory used when converting groups of images into clips
        max_video_length: int = -1,  # no limit, if > 0, limit the maximum video length (discard last frames)
        vector_task_source: Tuple[str, Dict[str, Any]] = (
            "render",
            {"mode": "raw_rgb_list"},
        ),
        episode_ids: Optional[Sequence[Union[Sequence[str], str]]] = None,
        fps: int = 4,
        max_render_size: int = 400,
        **other_base_kwargs,
    ):
        super().__init__(
            label,
            vector_task_sources=[vector_task_source],
            **other_base_kwargs,
        )
        self.max_clip_length = max_clip_length
        self.max_video_length = max_video_length
        self.fps = fps
        self.max_render_size = max_render_size

        self.episode_ids = (
            (
                list(episode_ids)
                if not isinstance(episode_ids[0], str)
                else [list(cast(List[str], episode_ids))]
            )
            if episode_ids is not None
            else None
        )

    def log(
        self,
        log_writer: SummaryWriter,
        task_outputs: Optional[List[Any]],
        render: Optional[Dict[str, List[Dict[str, Any]]]],
        num_steps: int,
    ):
        if render is None:
            return

        datum_id = self._source_to_str(self.vector_task_sources[0], is_vector_task=True)

        viz_order, _ = self._auto_viz_order(task_outputs)
        if viz_order is None:
            get_logger().debug("agent view viz returning without visualizing")
            return

        for page, current_ids in enumerate(viz_order):
            images = []  # list of lists of rgb frames
            for episode_id in current_ids:
                # assert episode_id in render
                if episode_id not in render:
                    get_logger().warning(
                        "skipping viz for missing episode {}".format(episode_id)
                    )
                    continue
                images.append(
                    [
                        self._overlay_label(step[datum_id], episode_id)
                        for step in render[episode_id]
                    ]
                )
            if len(images) == 0:
                continue
            vid = self.make_vid(images)
            if vid is not None:
                log_writer.add_vid(
                    f"{self.mode}/{self.label}_group{page}",
                    vid,
                    global_step=num_steps,
                )

    @staticmethod
    def _overlay_label(
        img,
        text,
        pos=(0, 0),
        bg_color=(255, 255, 255),
        fg_color=(0, 0, 0),
        scale=0.4,
        thickness=1,
        margin=2,
        font_face=cv2.FONT_HERSHEY_SIMPLEX,
    ):
        txt_size = cv2.getTextSize(text, font_face, scale, thickness)

        end_x = pos[0] + txt_size[0][0] + margin
        end_y = pos[1]

        pos = (pos[0], pos[1] + txt_size[0][1] + margin)

        cv2.rectangle(img, pos, (end_x, end_y), bg_color, cv2.FILLED)
        cv2.putText(
            img=img,
            text=text,
            org=pos,
            fontFace=font_face,
            fontScale=scale,
            color=fg_color,
            thickness=thickness,
            lineType=cv2.LINE_AA,
        )
        return img

    def make_vid(self, images):
        max_length = max([len(ep) for ep in images])

        if max_length == 0:
            return None

        valid_im = None
        for ep in images:
            if len(ep) > 0:
                valid_im = ep[0]
                break

        frames = []
        for it in range(max_length):
            current_images = []
            for ep in images:
                if it < len(ep):
                    current_images.append(ep[it])
                else:
                    if it == 0:
                        current_images.append(np.zeros_like(valid_im))
                    else:
                        gray = ep[-1].copy()
                        gray[:, :, 0] = gray[:, :, 2] = gray[:, :, 1]
                        current_images.append(gray)
            frames.append(tile_images(current_images))

        return process_video(
            frames, self.max_clip_length, self.max_video_length, fps=self.fps
        )


class AbstractTensorViz(AbstractViz):
    def __init__(
        self,
        rollout_source: Union[str, Sequence[str]],
        label: Optional[str] = None,
        figsize: Tuple[float, float] = (3, 3),
        **other_base_kwargs,
    ):
        if label is None:
            if isinstance(rollout_source, str):
                label = rollout_source[:]
            else:
                label = "/".join(rollout_source)

        super().__init__(label, rollout_sources=[rollout_source], **other_base_kwargs)

        self.figsize = figsize
        self.datum_id = self._source_to_str(
            self.rollout_sources[0], is_vector_task=False
        )

    def log(
        self,
        log_writer: SummaryWriter,
        task_outputs: Optional[List[Any]],
        render: Optional[Dict[str, List[Dict[str, Any]]]],
        num_steps: int,
    ):
        if render is None:
            return

        viz_order, _ = self._auto_viz_order(task_outputs)
        if viz_order is None:
            get_logger().debug("tensor viz returning without visualizing")
            return

        for page, current_ids in enumerate(viz_order):
            figs = []
            for episode_id in current_ids:
                if episode_id not in render or len(render[episode_id]) == 0:
                    get_logger().warning(
                        "skipping viz for missing or 0-length episode {}".format(
                            episode_id
                        )
                    )
                    continue
                episode_src = [
                    step[self.datum_id]
                    for step in render[episode_id]
                    if self.datum_id in step
                ]
                if len(episode_src) > 0:
                    # If the last episode for an inference worker is of length 1, there's no captured rollout sources
                    figs.append(self.make_fig(episode_src, episode_id))
            if len(figs) == 0:
                continue
            log_writer.add_figure(
                "{}/{}_group{}".format(self.mode, self.label, page),
                figs,
                global_step=num_steps,
            )
            plt.close(
                "all"
            )  # close all current figures (SummaryWriter already closes all figures we log)

    @abc.abstractmethod
    def make_fig(
        self, episode_src: Sequence[np.ndarray], episode_id: str
    ) -> matplotlib.figure.Figure:
        raise NotImplementedError()


class TensorViz1D(AbstractTensorViz):
    def __init__(
        self,
        rollout_source: Union[str, Sequence[str]] = "action_log_probs",
        label: Optional[str] = None,
        figsize: Tuple[float, float] = (3, 3),
        **other_base_kwargs,
    ):
        super().__init__(rollout_source, label, figsize, **other_base_kwargs)

    def make_fig(self, episode_src, episode_id):
        assert episode_src[0].size == 1

        # Concatenate along step axis (0)
        seq = np.concatenate(episode_src, axis=0).squeeze()  # remove all singleton dims

        fig, ax = plt.subplots(figsize=self.figsize)
        ax.plot(seq)
        ax.set_title(episode_id)

        ax.set_aspect("auto")
        plt.tight_layout()

        return fig


class TensorViz2D(AbstractTensorViz):
    def __init__(
        self,
        rollout_source: Union[str, Sequence[str]] = ("memory_first_last", "rnn"),
        label: Optional[str] = None,
        figsize: Tuple[float, float] = (10, 10),
        fontsize: float = 5,
        **other_base_kwargs,
    ):
        super().__init__(rollout_source, label, figsize, **other_base_kwargs)
        self.fontsize = fontsize

    def make_fig(self, episode_src, episode_id):
        # Concatenate along step axis (0)
        seq = np.concatenate(
            episode_src, axis=0
        ).squeeze()  # remove num_layers if it's equal to 1, else die
        assert len(seq.shape) == 2, "No support for higher-dimensions"

        # get_logger().debug("basic {} h render {}".format(episode_id, seq[:10, 0]))

        fig, ax = plt.subplots(figsize=self.figsize)
        ax.matshow(seq)

        ax.set_xlabel(episode_id, fontsize=self.fontsize)
        ax.tick_params(axis="x", labelsize=self.fontsize)
        ax.tick_params(axis="y", labelsize=self.fontsize)
        ax.tick_params(bottom=False)

        ax.set_aspect("auto")
        plt.tight_layout()

        return fig


class ActorViz(AbstractViz):
    def __init__(
        self,
        label: str = "action_probs",
        action_names_path: Optional[Sequence[str]] = ("task_info", "action_names"),
        figsize: Tuple[float, float] = (1, 5),
        fontsize: float = 5,
        **other_base_kwargs,
    ):
        super().__init__(label, actor_critic_source=True, **other_base_kwargs)
        self.action_names_path: Optional[Sequence[str]] = (
            list(action_names_path) if action_names_path is not None else None
        )
        self.figsize = figsize
        self.fontsize = fontsize
        self.action_names: Optional[List[str]] = None

    def log(
        self,
        log_writer: SummaryWriter,
        task_outputs: Optional[List[Any]],
        render: Optional[Dict[str, List[Dict[str, Any]]]],
        num_steps: int,
    ):
        if render is None:
            return

        if (
            self.action_names is None
            and task_outputs is not None
            and len(task_outputs) > 0
            and self.action_names_path is not None
        ):
            self.action_names = list(
                self._access(task_outputs[0], self.action_names_path)
            )

        viz_order, _ = self._auto_viz_order(task_outputs)
        if viz_order is None:
            get_logger().debug("actor viz returning without visualizing")
            return

        for page, current_ids in enumerate(viz_order):
            figs = []
            for episode_id in current_ids:
                # assert episode_id in render
                if episode_id not in render:
                    get_logger().warning(
                        "skipping viz for missing episode {}".format(episode_id)
                    )
                    continue
                episode_src = [
                    step["actor_probs"]
                    for step in render[episode_id]
                    if "actor_probs" in step
                ]
                assert len(episode_src) == len(render[episode_id])
                figs.append(self.make_fig(episode_src, episode_id))
            if len(figs) == 0:
                continue
            log_writer.add_figure(
                "{}/{}_group{}".format(self.mode, self.label, page),
                figs,
                global_step=num_steps,
            )
            plt.close(
                "all"
            )  # close all current figures (SummaryWriter already closes all figures we log)

    def make_fig(self, episode_src, episode_id):
        # Concatenate along step axis (0, reused from kept sampler axis)
        mat = np.concatenate(episode_src, axis=0)

        fig, ax = plt.subplots(figsize=self.figsize)
        ax.matshow(mat)

        if self.action_names is not None:
            assert len(self.action_names) == mat.shape[-1]
            ax.set_xticklabels([""] + self.action_names, rotation="vertical")

        ax.set_xlabel(episode_id, fontsize=self.fontsize)
        ax.tick_params(axis="x", labelsize=self.fontsize)
        ax.tick_params(axis="y", labelsize=self.fontsize)
        ax.tick_params(bottom=False)

        # Gridlines based on minor ticks
        ax.set_yticks(np.arange(-0.5, mat.shape[0], 1), minor=True)
        ax.set_xticks(np.arange(-0.5, mat.shape[1], 1), minor=True)
        ax.grid(which="minor", color="w", linestyle="-", linewidth=0.05)
        ax.tick_params(
            axis="both", which="minor", left=False, top=False, right=False, bottom=False
        )

        ax.set_aspect("auto")
        plt.tight_layout()
        return fig


class VizSuite(AbstractViz):
    def __init__(
        self,
        episode_ids: Optional[Sequence[Union[Sequence[str], str]]] = None,
        path_to_id: Sequence[str] = ("task_info", "id"),
        mode: str = "valid",
        force_episodes_and_max_episodes_in_group: bool = False,
        max_episodes_in_group: int = 8,
        *viz,
        **kw_viz,
    ):
        super().__init__(max_episodes_in_group=max_episodes_in_group)
        self._setup(
            mode=mode,
            path_to_id=path_to_id,
            episode_ids=episode_ids,
            max_episodes_in_group=max_episodes_in_group,
        )
        self.force_episodes_and_max_episodes_in_group = (
            force_episodes_and_max_episodes_in_group
        )

        self.all_episode_ids = self._episodes_set()

        self.viz = [
            v() if isinstance(v, Builder) else v
            for v in viz
            if isinstance(v, Builder) or isinstance(v, AbstractViz)
        ] + [
            v() if isinstance(v, Builder) else v
            for k, v in kw_viz.items()
            if isinstance(v, Builder) or isinstance(v, AbstractViz)
        ]

        self.max_render_size: Optional[int] = None

        (
            self.rollout_sources,
            self.vector_task_sources,
            self.actor_critic_source,
        ) = self._setup_sources()

        self.data: Dict[str, List[Dict]] = (
            {}
        )  # dict of episode id to list of dicts with collected data
        self.last_it2epid: List[str] = []

    def _setup_sources(self):
        rollout_sources, vector_task_sources = [], []
        labels = []
        actor_critic_source = False
        new_episodes = []
        for v in self.viz:
            labels.append(v.label)
            rollout_sources += v.rollout_sources
            vector_task_sources += v.vector_task_sources
            actor_critic_source |= v.actor_critic_source

            if (
                v.episode_ids is not None
                and not self.force_episodes_and_max_episodes_in_group
            ):
                cur_episodes = self._episodes_set(v.episode_ids)
                for ep in cur_episodes:
                    if (
                        self.all_episode_ids is not None
                        and ep not in self.all_episode_ids
                    ):
                        new_episodes.append(ep)
                        get_logger().info(
                            "Added new episode {} from {}".format(ep, v.label)
                        )

            v._setup(
                mode=self.mode,
                path_to_id=self.path_to_id,
                episode_ids=self.episode_ids,
                max_episodes_in_group=self.max_episodes_in_group,
                force=self.force_episodes_and_max_episodes_in_group,
            )

            if isinstance(v, AgentViewViz):
                self.max_render_size = v.max_render_size

        get_logger().info("Logging labels {}".format(labels))

        if len(new_episodes) > 0:
            get_logger().info("Added new episodes {}".format(new_episodes))
            self.episode_ids.append(new_episodes)  # new group with all added episodes
            self.all_episode_ids = self._episodes_set()

        rol_flat = {json.dumps(src, sort_keys=True): src for src in rollout_sources}
        vt_flat = {json.dumps(src, sort_keys=True): src for src in vector_task_sources}

        rol_keys = list(set(rol_flat.keys()))
        vt_keys = list(set(vt_flat.keys()))

        return (
            [rol_flat[k] for k in rol_keys],
            [vt_flat[k] for k in vt_keys],
            actor_critic_source,
        )

    def _episodes_set(self, episode_list=None) -> Optional[Set[str]]:
        source = self.episode_ids if episode_list is None else episode_list
        if source is None:
            return None

        all_episode_ids: List[str] = []
        for group in source:
            all_episode_ids += group
        return set(all_episode_ids)

    def empty(self):
        return len(self.data) == 0

    def _update(self, collected_data):
        for epid in collected_data:
            assert epid in self.data
            self.data[epid][-1].update(collected_data[epid])

    def _append(self, vector_task_data):
        for epid in vector_task_data:
            if epid in self.data:
                self.data[epid].append(vector_task_data[epid])
            else:
                self.data[epid] = [vector_task_data[epid]]

    def _collect_actor_critic(self, actor_critic):
        actor_critic_data = {
            epid: dict()
            for epid in self.last_it2epid
            if self.all_episode_ids is None or epid in self.all_episode_ids
        }
        if len(actor_critic_data) > 0 and actor_critic is not None:
            if self.actor_critic_source:
                # TODO this code only supports Discrete action spaces!
                probs = (
                    actor_critic.distributions.probs
                )  # step (=1) x sampler x agent (=1) x action
                values = actor_critic.values  # step x sampler x agent x 1
                for it, epid in enumerate(self.last_it2epid):
                    if epid in actor_critic_data:
                        # Select current episode (sampler axis will be reused as step axis)
                        prob = (
                            # probs.narrow(dim=0, start=it, length=1)  # works for sampler x action
                            probs.narrow(
                                dim=1, start=it, length=1
                            )  # step x sampler x agent x action -> step x 1 x agent x action
                            .squeeze(
                                0
                            )  # step x 1 x agent x action -> 1 x agent x action
                            # .squeeze(-2)  # 1 x agent x action -> 1 x action
                            .to("cpu")
                            .detach()
                            .numpy()
                        )
                        assert "actor_probs" not in actor_critic_data[epid]
                        actor_critic_data[epid]["actor_probs"] = prob
                        val = (
                            # values.narrow(dim=0, start=it, length=1)  # works for sampler x 1
                            values.narrow(
                                dim=1, start=it, length=1
                            )  # step x sampler x agent x 1 -> step x 1 x agent x 1
                            .squeeze(0)  # step x 1 x agent x 1 -> 1 x agent x 1
                            # .squeeze(-2)  # 1 x agent x 1 -> 1 x 1
                            .to("cpu")
                            .detach()
                            .numpy()
                        )
                        assert "critic_value" not in actor_critic_data[epid]
                        actor_critic_data[epid]["critic_value"] = val

        self._update(actor_critic_data)

    def _collect_rollout(self, rollout, alive):
        alive_set = set(alive)
        assert len(alive_set) == len(alive)
        alive_it2epid = [
            epid for it, epid in enumerate(self.last_it2epid) if it in alive_set
        ]
        rollout_data = {
            epid: dict()
            for epid in alive_it2epid
            if self.all_episode_ids is None or epid in self.all_episode_ids
        }
        if len(rollout_data) > 0 and rollout is not None:
            for source in self.rollout_sources:
                datum_id = self._source_to_str(source, is_vector_task=False)

                storage, path = source[0], source[1:]

                # Access storage
                res = getattr(rollout, storage)
                episode_dim = rollout.dim_names.index("sampler")

                # Access sub-storage if path not empty
                if len(path) > 0:
                    if storage == "memory_first_last":
                        storage = "memory"

                    flattened_name = rollout.unflattened_to_flattened[storage][
                        tuple(path)
                    ]
                    # for path_step in path:
                    #     res = res[path_step]
                    res = res[flattened_name]
                    res, episode_dim = res

                if rollout.step > 0:
                    if rollout.step > res.shape[0]:
                        # e.g. rnn with only latest memory saved
                        rollout_step = res.shape[0] - 1
                    else:
                        rollout_step = rollout.step - 1
                else:
                    if rollout.num_steps - 1 < res.shape[0]:
                        rollout_step = rollout.num_steps - 1
                    else:
                        # e.g. rnn with only latest memory saved
                        rollout_step = res.shape[0] - 1

                # Select latest step
                res = res.narrow(
                    dim=0,
                    start=rollout_step,
                    length=1,  # step dimension
                )  # 1 x ... x sampler x ...

                # get_logger().debug("basic collect h {}".format(res[..., 0]))

                for it, epid in enumerate(alive_it2epid):
                    if epid in rollout_data:
                        # Select current episode and remove episode/sampler axis
                        datum = (
                            res.narrow(dim=episode_dim, start=it, length=1)
                            .squeeze(axis=episode_dim)
                            .to("cpu")
                            .detach()
                            .numpy()
                        )  # 1 x ... (no sampler dim)
                        # get_logger().debug("basic collect ep {} h {}".format(epid, res[..., 0]))
                        assert datum_id not in rollout_data[epid]
                        rollout_data[epid][
                            datum_id
                        ] = datum.copy()  # copy needed when running on CPU!

        self._update(rollout_data)

    def _collect_vector_task(self, vector_task):
        it2epid = [
            self._access(info, self.path_to_id[1:])
            for info in vector_task.attr("task_info")
        ]
        # get_logger().debug("basic epids {}".format(it2epid))

        def limit_spatial_res(data: np.ndarray, max_size=400):
            if data.shape[0] <= max_size and data.shape[1] <= max_size:
                return data
            else:
                f = float(max_size) / max(data.shape[0], data.shape[1])
                size = (int(data.shape[1] * f), int(data.shape[0] * f))
                return cv2.resize(data, size, 0, 0, interpolation=cv2.INTER_AREA)

        vector_task_data = {
            epid: dict()
            for epid in it2epid
            if self.all_episode_ids is None or epid in self.all_episode_ids
        }
        if len(vector_task_data) > 0:
            for (
                source
            ) in self.vector_task_sources:  # these are observations for next step!
                datum_id = self._source_to_str(source, is_vector_task=True)
                method, kwargs = source
                res = getattr(vector_task, method)(**kwargs)
                if not isinstance(res, Sequence):
                    assert len(it2epid) == 1
                    res = [res]
                if method == "render":
                    res = [limit_spatial_res(r, self.max_render_size) for r in res]
                assert len(res) == len(it2epid)
                for datum, epid in zip(res, it2epid):
                    if epid in vector_task_data:
                        assert datum_id not in vector_task_data[epid]
                        vector_task_data[epid][datum_id] = datum

        self._append(vector_task_data)

        return it2epid

    # to be called by engine
    def collect(self, vector_task=None, alive=None, rollout=None, actor_critic=None):
        if actor_critic is not None:
            # in phase with last_it2epid
            try:
                self._collect_actor_critic(actor_critic)
            except (AssertionError, RuntimeError):
                get_logger().debug(
                    msg=f"Failed collect (actor_critic) for viz due to exception:",
                    exc_info=sys.exc_info(),
                )
                get_logger().error(f"Failed collect (actor_critic) for viz")

        if alive is not None and rollout is not None:
            # in phase with last_it2epid that stay alive
            try:
                self._collect_rollout(rollout=rollout, alive=alive)
            except (AssertionError, RuntimeError):
                get_logger().debug(
                    msg=f"Failed collect (rollout) for viz due to exception:",
                    exc_info=sys.exc_info(),
                )
                get_logger().error(f"Failed collect (rollout) for viz")

        # Always call this one last!
        if vector_task is not None:
            # in phase with identifiers of current episodes from vector_task
            try:
                self.last_it2epid = self._collect_vector_task(vector_task)
            except (AssertionError, RuntimeError):
                get_logger().debug(
                    msg=f"Failed collect (vector_task) for viz due to exception:",
                    exc_info=sys.exc_info(),
                )
                get_logger().error(f"Failed collect (vector_task) for viz")

    def read_and_reset(self) -> Dict[str, List[Dict[str, Any]]]:
        res = self.data
        self.data = {}
        # get_logger().debug("Returning episodes {}".format(list(res.keys())))
        return res

    # to be called by logger
    def log(
        self,
        log_writer: SummaryWriter,
        task_outputs: Optional[List[Any]],
        render: Optional[Dict[str, List[Dict[str, Any]]]],
        num_steps: int,
    ):
        for v in self.viz:
            try:
                v.log(log_writer, task_outputs, render, num_steps)
            except (AssertionError, RuntimeError):
                get_logger().debug(
                    msg=f"Dropped {v.label} viz due to exception:",
                    exc_info=sys.exc_info(),
                )
                get_logger().error(f"Dropped {v.label} viz")


class TensorboardSummarizer:
    """Assumption: tensorboard tags/labels include a valid/test/train substr indicating the data modality"""

    def __init__(
        self,
        experiment_to_train_events_paths_map: Dict[str, Sequence[str]],
        experiment_to_test_events_paths_map: Dict[str, Sequence[str]],
        eval_min_mega_steps: Optional[Sequence[float]] = None,
        tensorboard_tags_to_labels_map: Optional[Dict[str, str]] = None,
        tensorboard_output_summary_folder: str = "tensorboard_plotter_output",
    ):
        if not _TF_AVAILABLE:
            raise ImportError(
                "Please install tensorflow e.g. with `pip install tensorflow` to enable TensorboardSummarizer"
            )

        self.experiment_to_train_events_paths_map = experiment_to_train_events_paths_map
        self.experiment_to_test_events_paths_map = experiment_to_test_events_paths_map
        train_experiments = set(list(experiment_to_train_events_paths_map.keys()))
        test_experiments = set(list(experiment_to_test_events_paths_map.keys()))
        assert (train_experiments - test_experiments) in [
            set(),
            train_experiments,
        ], (
            f"`experiment_to_test_events_paths_map` must have identical keys (experiment names) to those"
            f" in `experiment_to_train_events_paths_map`, or be empty."
            f" Got {train_experiments} train keys and {test_experiments} test keys."
        )

        self.eval_min_mega_steps = eval_min_mega_steps
        self.tensorboard_tags_to_labels_map = tensorboard_tags_to_labels_map
        if self.tensorboard_tags_to_labels_map is not None:
            for tag, label in self.tensorboard_tags_to_labels_map.items():
                assert ("valid" in label) + ("train" in label) + (
                    "test" in label
                ) == 1, (
                    f"One (and only one) of {'train', 'valid', 'test'} must be part of the label for"
                    f" tag {tag} ({label} given)."
                )
        self.tensorboard_output_summary_folder = tensorboard_output_summary_folder

        self.train_data = self._read_tensorflow_experiment_events(
            self.experiment_to_train_events_paths_map
        )
        self.test_data = self._read_tensorflow_experiment_events(
            self.experiment_to_test_events_paths_map
        )

    def _read_tensorflow_experiment_events(
        self, experiment_to_events_paths_map, skip_map=False
    ):
        def my_summary_iterator(path):
            try:
                for r in tf_record.tf_record_iterator(path):
                    yield event_pb2.Event.FromString(r)
            except IOError:
                get_logger().debug(f"IOError for path {path}")
                return None

        collected_data = {}
        for experiment_name, path_list in experiment_to_events_paths_map.items():
            experiment_data = defaultdict(list)
            for filename_path in path_list:
                for event in my_summary_iterator(filename_path):
                    if event is None:
                        break
                    for value in event.summary.value:
                        if self.tensorboard_tags_to_labels_map is None or skip_map:
                            label = value.tag
                        elif value.tag in self.tensorboard_tags_to_labels_map:
                            label = self.tensorboard_tags_to_labels_map[value.tag]
                        else:
                            continue
                        experiment_data[label].append(
                            dict(
                                score=value.simple_value,
                                time=event.wall_time,
                                steps=event.step,
                            )
                        )
            collected_data[experiment_name] = experiment_data

        return collected_data

    def _eval_vs_train_time_steps(self, eval_data, train_data):
        min_mega_steps = self.eval_min_mega_steps
        if min_mega_steps is None:
            min_mega_steps = [(item["steps"] - 1) / 1e6 for item in eval_data]

        scores, times, steps = [], [], []

        i, t, last_i = 0, 0, -1
        while len(times) < len(min_mega_steps):
            while eval_data[i]["steps"] / min_mega_steps[len(times)] / 1e6 < 1:
                i += 1
            while train_data[t]["steps"] / min_mega_steps[len(times)] / 1e6 < 1:
                t += 1

            # step might be missing in valid! (and would duplicate future value at previous steps!)
            # solution: move forward last entry's time if no change in i (instead of new entry)
            if i == last_i:
                times[-1] = train_data[t]["time"]
            else:
                scores.append(eval_data[i]["score"])
                times.append(train_data[t]["time"])
                steps.append(eval_data[i]["steps"])

            last_i = i

        scores.insert(0, train_data[0]["score"])
        times.insert(0, train_data[0]["time"])
        steps.insert(0, 0)

        return scores, times, steps

    def _train_vs_time_steps(self, train_data):
        last_eval_step = (
            self.eval_min_mega_steps[-1] * 1e6
            if self.eval_min_mega_steps is not None
            else float("inf")
        )

        scores = [train_data[0]["score"]]
        times = [train_data[0]["time"]]
        steps = [train_data[0]["steps"]]

        t = 1
        while steps[-1] < last_eval_step and t < len(train_data):
            scores.append(train_data[t]["score"])
            times.append(train_data[t]["time"])
            steps.append(train_data[t]["steps"])
            t += 1

        return scores, times, steps

    def make_tensorboard_summary(self):
        all_experiments = list(self.experiment_to_train_events_paths_map.keys())

        for experiment_name in all_experiments:
            summary_writer = SummaryWriter(
                os.path.join(self.tensorboard_output_summary_folder, experiment_name)
            )

            test_labels = (
                sorted(list(self.test_data[experiment_name].keys()))
                if len(self.test_data) > 0
                else []
            )
            for test_label in test_labels:
                train_label = test_label.replace("valid", "test").replace(
                    "test", "train"
                )
                if train_label not in self.train_data[experiment_name]:
                    print(
                        f"Missing matching 'train' label {train_label} for eval label {test_label}. Skipping"
                    )
                    continue
                train_data = self.train_data[experiment_name][train_label]
                test_data = self.test_data[experiment_name][test_label]
                scores, times, steps = self._eval_vs_train_time_steps(
                    test_data, train_data
                )
                for score, t, step in zip(scores, times, steps):
                    summary_writer.add_scalar(
                        test_label, score, global_step=step, walltime=t
                    )

            valid_labels = sorted(
                [
                    key
                    for key in list(self.train_data[experiment_name].keys())
                    if "valid" in key
                ]
            )
            for valid_label in valid_labels:
                train_label = valid_label.replace("valid", "train")
                assert (
                    train_label in self.train_data[experiment_name]
                ), f"Missing matching 'train' label {train_label} for valid label {valid_label}"
                train_data = self.train_data[experiment_name][train_label]
                valid_data = self.train_data[experiment_name][valid_label]
                scores, times, steps = self._eval_vs_train_time_steps(
                    valid_data, train_data
                )
                for score, t, step in zip(scores, times, steps):
                    summary_writer.add_scalar(
                        valid_label, score, global_step=step, walltime=t
                    )

            train_labels = sorted(
                [
                    key
                    for key in list(self.train_data[experiment_name].keys())
                    if "train" in key
                ]
            )
            for train_label in train_labels:
                scores, times, steps = self._train_vs_time_steps(
                    self.train_data[experiment_name][train_label]
                )
                for score, t, step in zip(scores, times, steps):
                    summary_writer.add_scalar(
                        train_label, score, global_step=step, walltime=t
                    )

            summary_writer.close()


================================================
FILE: allenact_plugins/__init__.py
================================================
try:
    # noinspection PyProtectedMember,PyUnresolvedReferences
    from allenact_plugins._version import __version__
except ModuleNotFoundError:
    __version__ = None


================================================
FILE: allenact_plugins/babyai_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker

with ImportChecker(
    "\n\nPlease install babyai with:\n\n"
    "pip install -e git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\n",
):
    # noinspection PyUnresolvedReferences
    import babyai


================================================
FILE: allenact_plugins/babyai_plugin/babyai_constants.py
================================================
import os
from pathlib import Path

BABYAI_EXPERT_TRAJECTORIES_DIR = os.path.abspath(
    os.path.join(os.path.dirname(Path(__file__)), "data", "demos")
)


================================================
FILE: allenact_plugins/babyai_plugin/babyai_models.py
================================================
from typing import Dict, Optional, List, cast, Tuple, Any

import babyai.model
import babyai.rl
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from gym.spaces.dict import Dict as SpaceDict

from allenact.algorithms.onpolicy_sync.policy import (
    ActorCriticModel,
    ObservationType,
    Memory,
    DistributionType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput


class BabyAIACModelWrapped(babyai.model.ACModel):
    def __init__(
        self,
        obs_space: Dict[str, int],
        action_space: gym.spaces.Discrete,
        image_dim=128,
        memory_dim=128,
        instr_dim=128,
        use_instr=False,
        lang_model="gru",
        use_memory=False,
        arch="cnn1",
        aux_info=None,
        include_auxiliary_head: bool = False,
    ):
        self.use_cnn2 = arch == "cnn2"
        super().__init__(
            obs_space=obs_space,
            action_space=action_space,
            image_dim=image_dim,
            memory_dim=memory_dim,
            instr_dim=instr_dim,
            use_instr=use_instr,
            lang_model=lang_model,
            use_memory=use_memory,
            arch="cnn1" if self.use_cnn2 else arch,
            aux_info=aux_info,
        )

        self.semantic_embedding = None
        if self.use_cnn2:
            self.semantic_embedding = nn.Embedding(33, embedding_dim=8)
            self.image_conv = nn.Sequential(
                nn.Conv2d(in_channels=24, out_channels=16, kernel_size=(2, 2)),
                *self.image_conv[1:]  # type:ignore
            )
            self.image_conv[0].apply(babyai.model.initialize_parameters)

        self.include_auxiliary_head = include_auxiliary_head
        if self.use_memory and self.lang_model == "gru":
            self.memory_rnn = nn.LSTM(self.image_dim, self.memory_dim)

        if self.include_auxiliary_head:
            self.aux = nn.Sequential(
                nn.Linear(self.memory_dim, 64),
                nn.Tanh(),
                nn.Linear(64, action_space.n),
            )
            self.aux.apply(babyai.model.initialize_parameters)

        self.train()

    def forward_once(self, obs, memory, instr_embedding=None):
        """Copied (with minor modifications) from
        `babyai.model.ACModel.forward(...)`."""
        if self.use_instr and instr_embedding is None:
            instr_embedding = self._get_instr_embedding(obs.instr)
        if self.use_instr and self.lang_model == "attgru":
            # outputs: B x L x D
            # memory: B x M
            mask = (obs.instr != 0).float()
            # The mask tensor has the same length as obs.instr, and
            # thus can be both shorter and longer than instr_embedding.
            # It can be longer if instr_embedding is computed
            # for a subbatch of obs.instr.
            # It can be shorter if obs.instr is a subbatch of
            # the batch that instr_embeddings was computed for.
            # Here, we make sure that mask and instr_embeddings
            # have equal length along dimension 1.
            mask = mask[:, : instr_embedding.shape[1]]
            instr_embedding = instr_embedding[:, : mask.shape[1]]

            keys = self.memory2key(memory)
            pre_softmax = (keys[:, None, :] * instr_embedding).sum(2) + 1000 * mask
            attention = F.softmax(pre_softmax, dim=1)
            instr_embedding = (instr_embedding * attention[:, :, None]).sum(1)

        x = torch.transpose(torch.transpose(obs.image, 1, 3), 2, 3)

        if self.arch.startswith("expert_filmcnn"):
            x = self.image_conv(x)
            for controler in self.controllers:
                x = controler(x, instr_embedding)
            x = F.relu(self.film_pool(x))
        else:
            x = self.image_conv(x.contiguous())

        x = x.reshape(x.shape[0], -1)

        if self.use_memory:
            hidden = (
                memory[:, : self.semi_memory_size],
                memory[:, self.semi_memory_size :],
            )
            hidden = self.memory_rnn(x, hidden)
            embedding = hidden[0]
            memory = torch.cat(hidden, dim=1)  # type: ignore
        else:
            embedding = x

        if self.use_instr and not "filmcnn" in self.arch:
            embedding = torch.cat((embedding, instr_embedding), dim=1)

        if hasattr(self, "aux_info") and self.aux_info:
            extra_predictions = {
                info: self.extra_heads[info](embedding) for info in self.extra_heads
            }
        else:
            extra_predictions = dict()

        return {
            "embedding": embedding,
            "memory": memory,
            "extra_predictions": extra_predictions,
        }

    def forward_loop(
        self,
        observations: ObservationType,
        recurrent_hidden_states: torch.FloatTensor,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ):
        results = []
        images = cast(torch.FloatTensor, observations["minigrid_ego_image"]).float()
        instrs: Optional[torch.Tensor] = None
        if "minigrid_mission" in observations:
            instrs = cast(torch.Tensor, observations["minigrid_mission"])

        _, nsamplers, _ = recurrent_hidden_states.shape
        rollouts_len = images.shape[0] // nsamplers
        obs = babyai.rl.DictList()

        images = images.view(rollouts_len, nsamplers, *images.shape[1:])
        masks = masks.view(rollouts_len, nsamplers, *masks.shape[1:])  # type:ignore

        # needs_reset = (masks != 1.0).view(nrollouts, -1).any(-1)
        if instrs is not None:
            instrs = instrs.view(rollouts_len, nsamplers, instrs.shape[-1])

        needs_instr_reset_mask = masks != 1.0
        needs_instr_reset_mask[0] = 1
        needs_instr_reset_mask = needs_instr_reset_mask.squeeze(-1)
        instr_embeddings: Optional[torch.Tensor] = None
        if self.use_instr:
            instr_reset_multi_inds = list(
                (int(a), int(b))
                for a, b in zip(*np.where(needs_instr_reset_mask.cpu().numpy()))
            )
            time_ind_to_which_need_instr_reset: List[List] = [
                [] for _ in range(rollouts_len)
            ]
            reset_multi_ind_to_index = {
                mi: i for i, mi in enumerate(instr_reset_multi_inds)
            }
            for a, b in instr_reset_multi_inds:
                time_ind_to_which_need_instr_reset[a].append(b)

            unique_instr_embeddings = self._get_instr_embedding(
                instrs[needs_instr_reset_mask]
            )

            instr_embeddings_list = [unique_instr_embeddings[:nsamplers]]
            current_instr_embeddings_list = list(instr_embeddings_list[-1])

            for time_ind in range(1, rollouts_len):
                if len(time_ind_to_which_need_instr_reset[time_ind]) == 0:
                    instr_embeddings_list.append(instr_embeddings_list[-1])
                else:
                    for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[
                        time_ind
                    ]:
                        current_instr_embeddings_list[sampler_needing_reset_ind] = (
                            unique_instr_embeddings[
                                reset_multi_ind_to_index[
                                    (time_ind, sampler_needing_reset_ind)
                                ]
                            ]
                        )

                    instr_embeddings_list.append(
                        torch.stack(current_instr_embeddings_list, dim=0)
                    )

            instr_embeddings = torch.stack(instr_embeddings_list, dim=0)

        assert recurrent_hidden_states.shape[0] == 1
        memory = recurrent_hidden_states[0]
        # instr_embedding: Optional[torch.Tensor] = None
        for i in range(rollouts_len):
            obs.image = images[i]
            if "minigrid_mission" in observations:
                obs.instr = instrs[i]

            # reset = needs_reset[i].item()
            # if self.baby_ai_model.use_instr and (reset or i == 0):
            #     instr_embedding = self.baby_ai_model._get_instr_embedding(obs.instr)

            results.append(
                self.forward_once(
                    obs, memory=memory * masks[i], instr_embedding=instr_embeddings[i]
                )
            )
            memory = results[-1]["memory"]

        embedding = torch.cat([r["embedding"] for r in results], dim=0)

        extra_predictions_list = [r["extra_predictions"] for r in results]
        extra_predictions = {
            key: torch.cat([ep[key] for ep in extra_predictions_list], dim=0)
            for key in extra_predictions_list[0]
        }
        return (
            ActorCriticOutput(
                distributions=CategoricalDistr(
                    logits=self.actor(embedding),
                ),
                values=self.critic(embedding),
                extras=(
                    extra_predictions
                    if not self.include_auxiliary_head
                    else {
                        **extra_predictions,
                        "auxiliary_distributions": cast(
                            Any, CategoricalDistr(logits=self.aux(embedding))
                        ),
                    }
                ),
            ),
            torch.stack([r["memory"] for r in results], dim=0),
        )

    # noinspection PyMethodOverriding
    def forward(
        self,
        observations: ObservationType,
        recurrent_hidden_states: torch.FloatTensor,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ):
        (
            observations,
            recurrent_hidden_states,
            prev_actions,
            masks,
            num_steps,
            num_samplers,
            num_agents,
            num_layers,
        ) = self.adapt_inputs(
            observations, recurrent_hidden_states, prev_actions, masks
        )

        if self.lang_model != "gru":
            ac_output, hidden_states = self.forward_loop(
                observations=observations,
                recurrent_hidden_states=recurrent_hidden_states,
                prev_actions=prev_actions,
                masks=masks,  # type: ignore
            )

            return self.adapt_result(
                ac_output,
                hidden_states[-1:],
                num_steps,
                num_samplers,
                num_agents,
                num_layers,
                observations,
            )

        assert recurrent_hidden_states.shape[0] == 1

        images = cast(torch.FloatTensor, observations["minigrid_ego_image"])
        if self.use_cnn2:
            images_shape = images.shape
            # noinspection PyArgumentList
            images = images + torch.LongTensor([0, 11, 22]).view(  # type:ignore
                1, 1, 1, 3
            ).to(images.device)
            images = self.semantic_embedding(images).view(  # type:ignore
                *images_shape[:3], 24
            )
        images = images.permute(0, 3, 1, 2).float()  # type:ignore

        _, nsamplers, _ = recurrent_hidden_states.shape
        rollouts_len = images.shape[0] // nsamplers

        masks = cast(
            torch.FloatTensor, masks.view(rollouts_len, nsamplers, *masks.shape[1:])
        )
        instrs: Optional[torch.Tensor] = None
        if "minigrid_mission" in observations and self.use_instr:
            instrs = cast(torch.FloatTensor, observations["minigrid_mission"])
            instrs = instrs.view(rollouts_len, nsamplers, instrs.shape[-1])

        needs_instr_reset_mask = masks != 1.0
        needs_instr_reset_mask[0] = 1
        needs_instr_reset_mask = needs_instr_reset_mask.squeeze(-1)
        blocking_inds: List[int] = np.where(
            needs_instr_reset_mask.view(rollouts_len, -1).any(-1).cpu().numpy()
        )[0].tolist()
        blocking_inds.append(rollouts_len)

        instr_embeddings: Optional[torch.Tensor] = None
        if self.use_instr:
            instr_reset_multi_inds = list(
                (int(a), int(b))
                for a, b in zip(*np.where(needs_instr_reset_mask.cpu().numpy()))
            )
            time_ind_to_which_need_instr_reset: List[List] = [
                [] for _ in range(rollouts_len)
            ]
            reset_multi_ind_to_index = {
                mi: i for i, mi in enumerate(instr_reset_multi_inds)
            }
            for a, b in instr_reset_multi_inds:
                time_ind_to_which_need_instr_reset[a].append(b)

            unique_instr_embeddings = self._get_instr_embedding(
                instrs[needs_instr_reset_mask]
            )

            instr_embeddings_list = [unique_instr_embeddings[:nsamplers]]
            current_instr_embeddings_list = list(instr_embeddings_list[-1])

            for time_ind in range(1, rollouts_len):
                if len(time_ind_to_which_need_instr_reset[time_ind]) == 0:
                    instr_embeddings_list.append(instr_embeddings_list[-1])
                else:
                    for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[
                        time_ind
                    ]:
                        current_instr_embeddings_list[sampler_needing_reset_ind] = (
                            unique_instr_embeddings[
                                reset_multi_ind_to_index[
                                    (time_ind, sampler_needing_reset_ind)
                                ]
                            ]
                        )

                    instr_embeddings_list.append(
                        torch.stack(current_instr_embeddings_list, dim=0)
                    )

            instr_embeddings = torch.stack(instr_embeddings_list, dim=0)

        # The following code can be used to compute the instr_embeddings in another way
        # and thus verify that the above logic is (more likely to be) correct
        # needs_instr_reset_mask = (masks != 1.0)
        # needs_instr_reset_mask[0] *= 0
        # needs_instr_reset_inds = needs_instr_reset_mask.view(nrollouts, -1).any(-1).cpu().numpy()
        #
        # # Get inds where a new task has started
        # blocking_inds: List[int] = np.where(needs_instr_reset_inds)[0].tolist()
        # blocking_inds.append(needs_instr_reset_inds.shape[0])
        # if nrollouts != 1:
        #     pdb.set_trace()
        # if blocking_inds[0] != 0:
        #     blocking_inds.insert(0, 0)
        # if self.use_instr:
        #     instr_embeddings_list = []
        #     for ind0, ind1 in zip(blocking_inds[:-1], blocking_inds[1:]):
        #         instr_embeddings_list.append(
        #             self._get_instr_embedding(instrs[ind0])
        #             .unsqueeze(0)
        #             .repeat(ind1 - ind0, 1, 1)
        #         )
        #     tmp_instr_embeddings = torch.cat(instr_embeddings_list, dim=0)
        # assert (instr_embeddings - tmp_instr_embeddings).abs().max().item() < 1e-6

        # Embed images
        # images = images.view(nrollouts, nsamplers, *images.shape[1:])
        image_embeddings = self.image_conv(images)
        if self.arch.startswith("expert_filmcnn"):
            instr_embeddings_flatter = instr_embeddings.view(
                -1, *instr_embeddings.shape[2:]
            )
            for controller in self.controllers:
                image_embeddings = controller(
                    image_embeddings, instr_embeddings_flatter
                )
            image_embeddings = F.relu(self.film_pool(image_embeddings))

        image_embeddings = image_embeddings.view(rollouts_len, nsamplers, -1)

        if self.use_instr and self.lang_model == "attgru":
            raise NotImplementedError("Currently attgru is not implemented.")

        memory = None
        if self.use_memory:
            assert recurrent_hidden_states.shape[0] == 1
            hidden = (
                recurrent_hidden_states[:, :, : self.semi_memory_size],
                recurrent_hidden_states[:, :, self.semi_memory_size :],
            )
            embeddings_list = []
            for ind0, ind1 in zip(blocking_inds[:-1], blocking_inds[1:]):
                hidden = (hidden[0] * masks[ind0], hidden[1] * masks[ind0])
                rnn_out, hidden = self.memory_rnn(image_embeddings[ind0:ind1], hidden)
                embeddings_list.append(rnn_out)

            # embedding = hidden[0]
            embedding = torch.cat(embeddings_list, dim=0)
            memory = torch.cat(hidden, dim=-1)
        else:
            embedding = image_embeddings

        if self.use_instr and not "filmcnn" in self.arch:
            embedding = torch.cat((embedding, instr_embeddings), dim=-1)

        if hasattr(self, "aux_info") and self.aux_info:
            extra_predictions = {
                info: self.extra_heads[info](embedding) for info in self.extra_heads
            }
        else:
            extra_predictions = dict()

        embedding = embedding.view(rollouts_len * nsamplers, -1)

        ac_output = ActorCriticOutput(
            distributions=CategoricalDistr(
                logits=self.actor(embedding),
            ),
            values=self.critic(embedding),
            extras=(
                extra_predictions
                if not self.include_auxiliary_head
                else {
                    **extra_predictions,
                    "auxiliary_distributions": CategoricalDistr(
                        logits=self.aux(embedding)
                    ),
                }
            ),
        )
        hidden_states = memory

        return self.adapt_result(
            ac_output,
            hidden_states,
            num_steps,
            num_samplers,
            num_agents,
            num_layers,
            observations,
        )

    @staticmethod
    def adapt_inputs(  # type: ignore
        observations: ObservationType,
        recurrent_hidden_states: torch.FloatTensor,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ):
        # INPUTS
        # observations are of shape [num_steps, num_samplers, ...]
        # recurrent_hidden_states are of shape [num_layers, num_samplers, (num_agents,) num_dims]
        # prev_actions are of shape [num_steps, num_samplers, ...]
        # masks are of shape [num_steps, num_samplers, 1]
        # num_agents is assumed to be 1

        num_steps, num_samplers = masks.shape[:2]
        num_layers = recurrent_hidden_states.shape[0]
        num_agents = 1

        # Flatten all observation batch dims
        def recursively_adapt_observations(obs):
            for entry in obs:
                if isinstance(obs[entry], Dict):
                    recursively_adapt_observations(obs[entry])
                else:
                    assert isinstance(obs[entry], torch.Tensor)
                    if entry in ["minigrid_ego_image", "minigrid_mission"]:
                        final_dims = obs[entry].shape[2:]
                        obs[entry] = obs[entry].view(
                            num_steps * num_samplers, *final_dims
                        )

        # Old-style inputs need to be
        # observations [num_steps * num_samplers, ...]
        # recurrent_hidden_states [num_layers, num_samplers (* num_agents), num_dims]
        # prev_actions [num_steps * num_samplers, -1]
        # masks [num_steps * num_samplers, 1]

        recursively_adapt_observations(observations)
        recurrent_hidden_states = cast(
            torch.FloatTensor,
            recurrent_hidden_states.view(num_layers, num_samplers * num_agents, -1),
        )
        if prev_actions is not None:
            prev_actions = prev_actions.view(  # type:ignore
                num_steps * num_samplers, -1
            )
        masks = masks.view(num_steps * num_samplers, 1)  # type:ignore

        return (
            observations,
            recurrent_hidden_states,
            prev_actions,
            masks,
            num_steps,
            num_samplers,
            num_agents,
            num_layers,
        )

    @staticmethod
    def adapt_result(ac_output, hidden_states, num_steps, num_samplers, num_agents, num_layers, observations):  # type: ignore
        distributions = CategoricalDistr(
            logits=ac_output.distributions.logits.view(num_steps, num_samplers, -1),
        )
        values = ac_output.values.view(num_steps, num_samplers, num_agents)
        extras = ac_output.extras  # ignore shape
        # TODO confirm the shape of the auxiliary distribution is the same as the actor's
        if "auxiliary_distributions" in extras:
            extras["auxiliary_distributions"] = CategoricalDistr(
                logits=extras["auxiliary_distributions"].logits.view(
                    num_steps, num_samplers, -1  # assume single-agent
                ),
            )

        hidden_states = hidden_states.view(num_layers, num_samplers * num_agents, -1)

        # Unflatten all observation batch dims
        def recursively_adapt_observations(obs):
            for entry in obs:
                if isinstance(obs[entry], Dict):
                    recursively_adapt_observations(obs[entry])
                else:
                    assert isinstance(obs[entry], torch.Tensor)
                    if entry in ["minigrid_ego_image", "minigrid_mission"]:
                        final_dims = obs[entry].shape[
                            1:
                        ]  # assumes no agents dim in observations!
                        obs[entry] = obs[entry].view(
                            num_steps, num_samplers * num_agents, *final_dims
                        )

        recursively_adapt_observations(observations)

        return (
            ActorCriticOutput(
                distributions=distributions, values=values, extras=extras
            ),
            hidden_states,
        )


class BabyAIRecurrentACModel(ActorCriticModel[CategoricalDistr]):
    def __init__(
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        image_dim=128,
        memory_dim=128,
        instr_dim=128,
        use_instr=False,
        lang_model="gru",
        use_memory=False,
        arch="cnn1",
        aux_info=None,
        include_auxiliary_head: bool = False,
    ):
        super().__init__(action_space=action_space, observation_space=observation_space)

        assert "minigrid_ego_image" in observation_space.spaces
        assert not use_instr or "minigrid_mission" in observation_space.spaces

        self.memory_dim = memory_dim
        self.include_auxiliary_head = include_auxiliary_head

        self.baby_ai_model = BabyAIACModelWrapped(
            obs_space={
                "image": 7 * 7 * 3,
                "instr": 100,
            },
            action_space=action_space,
            image_dim=image_dim,
            memory_dim=memory_dim,
            instr_dim=instr_dim,
            use_instr=use_instr,
            lang_model=lang_model,
            use_memory=use_memory,
            arch=arch,
            aux_info=aux_info,
            include_auxiliary_head=self.include_auxiliary_head,
        )
        self.memory_key = "rnn"

    @property
    def recurrent_hidden_state_size(self) -> int:
        return 2 * self.memory_dim

    @property
    def num_recurrent_layers(self):
        return 1

    def _recurrent_memory_specification(self):
        return {
            self.memory_key: (
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
        }

    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        out, recurrent_hidden_states = self.baby_ai_model.forward(
            observations=observations,
            recurrent_hidden_states=cast(
                torch.FloatTensor, memory.tensor(self.memory_key)
            ),
            prev_actions=prev_actions,
            masks=masks,
        )
        return out, memory.set_tensor(self.memory_key, recurrent_hidden_states)


================================================
FILE: allenact_plugins/babyai_plugin/babyai_tasks.py
================================================
import random
import signal
from typing import Tuple, Any, List, Dict, Optional, Union, Callable

import babyai
import babyai.bot
import gym
import numpy as np
from gym.utils import seeding
from gym_minigrid.minigrid import MiniGridEnv

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.base_abstractions.task import Task, TaskSampler
from allenact.utils.system import get_logger


class BabyAITask(Task[MiniGridEnv]):
    def __init__(
        self,
        env: MiniGridEnv,
        sensors: Union[SensorSuite, List[Sensor]],
        task_info: Dict[str, Any],
        expert_view_size: int = 7,
        expert_can_see_through_walls: bool = False,
        **kwargs,
    ):
        super().__init__(
            env=env,
            sensors=sensors,
            task_info=task_info,
            max_steps=env.max_steps,
            **kwargs,
        )
        self._was_successful: bool = False
        self.bot: Optional[babyai.bot.Bot] = None
        self._bot_died = False
        self.expert_view_size = expert_view_size
        self.expert_can_see_through_walls = expert_can_see_through_walls
        self._last_action: Optional[int] = None

        env.max_steps = env.max_steps + 1

    @property
    def action_space(self) -> gym.spaces.Discrete:
        return self.env.action_space

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        return self.env.render(mode=mode)

    def _step(self, action: int) -> RLStepResult:
        assert isinstance(action, int)

        minigrid_obs, reward, done, info = self.env.step(action=action)
        self._last_action = action

        self._was_successful = done and reward > 0

        return RLStepResult(
            observation=self.get_observations(minigrid_output_obs=minigrid_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )

    def get_observations(
        self, *args, minigrid_output_obs: Optional[Dict[str, Any]] = None, **kwargs
    ) -> Any:
        return self.sensor_suite.get_observations(
            env=self.env, task=self, minigrid_output_obs=minigrid_output_obs
        )

    def reached_terminal_state(self) -> bool:
        return self._was_successful

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return tuple(
            x
            for x, _ in sorted(
                [(str(a), a.value) for a in MiniGridEnv.Actions], key=lambda x: x[1]
            )
        )

    def close(self) -> None:
        pass

    def _expert_timeout_hander(self, signum, frame):
        raise TimeoutError

    def query_expert(self, **kwargs) -> Tuple[Any, bool]:
        see_through_walls = self.env.see_through_walls
        agent_view_size = self.env.agent_view_size

        if self._bot_died:
            return 0, False

        try:
            self.env.agent_view_size = self.expert_view_size
            self.env.expert_can_see_through_walls = self.expert_can_see_through_walls

            if self.bot is None:
                self.bot = babyai.bot.Bot(self.env)

            signal.signal(signal.SIGALRM, self._expert_timeout_hander)
            signal.alarm(kwargs.get("timeout", 4 if self.num_steps_taken() == 0 else 2))
            return self.bot.replan(self._last_action), True
        except TimeoutError as _:
            self._bot_died = True
            return 0, False
        finally:
            signal.alarm(0)
            self.env.see_through_walls = see_through_walls
            self.env.agent_view_size = agent_view_size

    def metrics(self) -> Dict[str, Any]:
        metrics = {
            **super(BabyAITask, self).metrics(),
            "success": 1.0 * (self.reached_terminal_state()),
        }
        return metrics


class BabyAITaskSampler(TaskSampler):
    def __init__(
        self,
        env_builder: Union[str, Callable[..., MiniGridEnv]],
        sensors: Union[SensorSuite, List[Sensor]],
        max_tasks: Optional[int] = None,
        num_unique_seeds: Optional[int] = None,
        task_seeds_list: Optional[List[int]] = None,
        deterministic_sampling: bool = False,
        extra_task_kwargs: Optional[Dict] = None,
        **kwargs,
    ):
        super(BabyAITaskSampler, self).__init__()
        self.sensors = (
            SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors
        )
        self.max_tasks = max_tasks
        self.num_unique_seeds = num_unique_seeds
        self.deterministic_sampling = deterministic_sampling
        self.extra_task_kwargs = (
            extra_task_kwargs if extra_task_kwargs is not None else {}
        )

        self._last_env_seed: Optional[int] = None
        self._last_task: Optional[BabyAITask] = None

        assert (self.num_unique_seeds is None) or (
            0 < self.num_unique_seeds
        ), "`num_unique_seeds` must be a positive integer."

        self.num_unique_seeds = num_unique_seeds
        self.task_seeds_list = task_seeds_list
        if self.task_seeds_list is not None:
            if self.num_unique_seeds is not None:
                assert self.num_unique_seeds == len(
                    self.task_seeds_list
                ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
            self.num_unique_seeds = len(self.task_seeds_list)
        elif self.num_unique_seeds is not None:
            self.task_seeds_list = list(range(self.num_unique_seeds))

        if (not deterministic_sampling) and self.max_tasks:
            get_logger().warning(
                "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
                " this might be a mistake when running testing."
            )

        if isinstance(env_builder, str):
            self.env = gym.make(env_builder)
        else:
            self.env = env_builder()

        self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1))
        self.num_tasks_generated = 0

    @property
    def length(self) -> Union[int, float]:
        return (
            float("inf")
            if self.max_tasks is None
            else self.max_tasks - self.num_tasks_generated
        )

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        return None if self.num_unique_seeds is None else self.num_unique_seeds

    @property
    def last_sampled_task(self) -> Optional[Task]:
        raise NotImplementedError

    def next_task(self, force_advance_scene: bool = False) -> Optional[BabyAITask]:
        if self.length <= 0:
            return None

        if self.num_unique_seeds is not None:
            if self.deterministic_sampling:
                self._last_env_seed = self.task_seeds_list[
                    self.num_tasks_generated % len(self.task_seeds_list)
                ]
            else:
                self._last_env_seed = self.np_seeded_random_gen.choice(
                    self.task_seeds_list
                )
        else:
            self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)

        self.env.seed(self._last_env_seed)
        self.env.saved_seed = self._last_env_seed
        self.env.reset()

        self.num_tasks_generated += 1
        self._last_task = BabyAITask(env=self.env, sensors=self.sensors, task_info={})
        return self._last_task

    def close(self) -> None:
        self.env.close()

    @property
    def all_observation_spaces_equal(self) -> bool:
        return True

    def reset(self) -> None:
        self.num_tasks_generated = 0
        self.env.reset()

    def set_seed(self, seed: int) -> None:
        self.np_seeded_random_gen, _ = seeding.np_random(seed)


================================================
FILE: allenact_plugins/babyai_plugin/configs/__init__.py
================================================


================================================
FILE: allenact_plugins/babyai_plugin/data/__init__.py
================================================


================================================
FILE: allenact_plugins/babyai_plugin/extra_environment.yml
================================================
dependencies:
  - networkx
  - pip
  - pip:
      - "--editable=git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai"


================================================
FILE: allenact_plugins/babyai_plugin/extra_requirements.txt
================================================
babyai @ git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd
networkx

================================================
FILE: allenact_plugins/babyai_plugin/scripts/__init__.py
================================================


================================================
FILE: allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py
================================================
import argparse
import os
import platform

from allenact_plugins.babyai_plugin.babyai_constants import (
    BABYAI_EXPERT_TRAJECTORIES_DIR,
)

LEVEL_TO_TRAIN_VALID_IDS = {
    "BossLevel": (
        "1DkVVpIEVtpyo1LxOXQL_bVyjFCTO3cHD",
        "1ccEFA_n5RT4SWD0Wa_qO65z2HACJBace",
    ),
    "GoToObjMaze": (
        "1P1CuMbGDJtZit1f-8hmd-HwweXZMj77T",
        "1MVlVsIpJUZ0vjrYGXY6Ku4m4vBxtWjRZ",
    ),
    "GoTo": ("1ABR1q-TClgjSlbhVdVJjzOBpTmTtlTN1", "13DlEx5woi31MIs_dzyLxfi7dPe1g59l2"),
    "GoToLocal": (
        "1U8YWdd3viN2lxOP5BByNUZRPVDKVvDAN",
        "1Esy-J0t8eJUg6_RT8F4kkegHYDWwqmSl",
    ),
}


def get_args():
    """Creates the argument parser and parses input arguments."""

    # noinspection PyTypeChecker
    parser = argparse.ArgumentParser(
        description="download_babyai_expert_demos",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        "dataset",
        nargs="?",
        default="all",
        help="dataset name (one of {}, or all)".format(
            ", ".join(LEVEL_TO_TRAIN_VALID_IDS.keys())
        ),
    )

    return parser.parse_args()


if __name__ == "__main__":
    args = get_args()

    if platform.system() == "Linux":
        download_template = """wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id={}" -O {}"""
    elif platform.system() == "Darwin":
        download_template = """wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={}' -O- | gsed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id={}" -O {}"""
    else:
        raise NotImplementedError("{} is not supported".format(platform.system()))

    try:
        os.makedirs(BABYAI_EXPERT_TRAJECTORIES_DIR, exist_ok=True)

        if args.dataset == "all":
            id_items = LEVEL_TO_TRAIN_VALID_IDS
        else:
            assert (
                args.dataset in LEVEL_TO_TRAIN_VALID_IDS
            ), "Only {} are valid datasets".format(
                ", ".join(LEVEL_TO_TRAIN_VALID_IDS.keys())
            )
            id_items = {args.dataset: LEVEL_TO_TRAIN_VALID_IDS[args.dataset]}

        for level_name, (train_id, valid_id) in id_items.items():
            train_path = os.path.join(
                BABYAI_EXPERT_TRAJECTORIES_DIR, "BabyAI-{}-v0.pkl".format(level_name)
            )
            if os.path.exists(train_path):
                print("{} already exists, skipping...".format(train_path))
            else:
                os.system(download_template.format(train_id, train_id, train_path))
                print("Demos saved to {}.".format(train_path))

            valid_path = os.path.join(
                BABYAI_EXPERT_TRAJECTORIES_DIR,
                "BabyAI-{}-v0_valid.pkl".format(level_name),
            )
            if os.path.exists(valid_path):
                print("{} already exists, skipping...".format(valid_path))
            else:
                os.system(download_template.format(valid_id, valid_id, valid_path))
                print("Demos saved to {}.".format(valid_path))
    except Exception as _:
        raise Exception(
            "Failed to download babyai demos. Make sure you have the appropriate command line"
            " tools installed for your platform. For MacOS you'll need to install `gsed` and `gwget (the gnu version"
            " of sed) using homebrew or some other method."
        )


================================================
FILE: allenact_plugins/babyai_plugin/scripts/get_instr_length_percentiles.py
================================================
import glob
import os

import babyai
import numpy as np

from allenact_plugins.babyai_plugin.babyai_constants import (
    BABYAI_EXPERT_TRAJECTORIES_DIR,
)

# Boss level
# [(50, 11.0), (90, 22.0), (99, 32.0), (99.9, 38.0), (99.99, 43.0)]

if __name__ == "__main__":
    # level = "BossLevel"
    level = "GoToLocal"
    files = glob.glob(
        os.path.join(BABYAI_EXPERT_TRAJECTORIES_DIR, "*{}-v0.pkl".format(level))
    )
    assert len(files) == 1

    demos = babyai.utils.load_demos(files[0])

    percentiles = [50, 90, 99, 99.9, 99.99, 100]
    print(
        list(
            zip(
                percentiles,
                np.percentile([len(d[0].split(" ")) for d in demos], percentiles),
            )
        )
    )


================================================
FILE: allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py
================================================
import glob
import os

import babyai

from allenact_plugins.babyai_plugin.babyai_constants import (
    BABYAI_EXPERT_TRAJECTORIES_DIR,
)


def make_small_demos(dir: str):
    for file_path in glob.glob(os.path.join(dir, "*.pkl")):
        if "valid" not in file_path and "small" not in file_path:
            new_file_path = file_path.replace(".pkl", "-small.pkl")
            if os.path.exists(new_file_path):
                continue
            print(
                "Saving small version of {} to {}...".format(
                    os.path.basename(file_path), new_file_path
                )
            )
            babyai.utils.save_demos(
                babyai.utils.load_demos(file_path)[:1000], new_file_path
            )
            print("Done.")


if __name__ == "__main__":
    make_small_demos(BABYAI_EXPERT_TRAJECTORIES_DIR)


================================================
FILE: allenact_plugins/clip_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker

with ImportChecker(
    "Cannot `import clip`. Please install clip from the openai/CLIP git repository:"
    "\n`pip install git+https://github.com/openai/CLIP.git@b46f5ac7587d2e1862f8b7b1573179d80dcdd620`"
):
    # noinspection PyUnresolvedReferences
    import clip


================================================
FILE: allenact_plugins/clip_plugin/clip_preprocessors.py
================================================
from typing import List, Optional, Any, cast, Dict, Tuple

import clip
import gym
import numpy as np
import torch
import torch.nn as nn
from clip.model import CLIP

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.misc_utils import prepare_locals_for_super


class ClipResNetEmbedder(nn.Module):
    def __init__(self, resnet: CLIP, pool=True, pooling_type="avg"):
        super().__init__()
        self.model = resnet
        self.pool = pool
        self.pooling_type = pooling_type

        if not pool:
            self.model.visual.attnpool = nn.Identity()
        elif self.pooling_type == "attn":
            pass
        elif self.pooling_type == "avg":
            self.model.visual.attnpool = nn.Sequential(
                nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(start_dim=-3, end_dim=-1)
            )
        else:
            raise NotImplementedError("`pooling_type` must be 'avg' or 'attn'.")

        self.eval()

    def forward(self, x):
        with torch.no_grad():
            return self.model.visual(x)


class ClipResNetPreprocessor(Preprocessor):
    """Preprocess RGB or depth image using a ResNet model with CLIP model
    weights."""

    CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073)
    CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711)

    def __init__(
        self,
        rgb_input_uuid: str,
        clip_model_type: str,
        pool: bool,
        device: Optional[torch.device] = None,
        device_ids: Optional[List[torch.device]] = None,
        input_img_height_width: Tuple[int, int] = (224, 224),
        chunk_size: Optional[int] = None,
        **kwargs: Any,
    ):
        assert clip_model_type in clip.available_models()
        assert pool == False or input_img_height_width == (224, 224)
        assert all(iis % 32 == 0 for iis in input_img_height_width)

        output_height_width = tuple(iis // 32 for iis in input_img_height_width)
        if clip_model_type == "RN50":
            output_shape = (2048,) + output_height_width
        elif clip_model_type == "RN50x16":
            output_shape = (3072,) + output_height_width
        else:
            raise NotImplementedError(
                f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'"
            )

        if pool:
            output_shape = output_shape[:1]

        self.clip_model_type = clip_model_type

        self.pool = pool

        self.device = torch.device("cpu") if device is None else device
        self.device_ids = device_ids or cast(
            List[torch.device], list(range(torch.cuda.device_count()))
        )
        self._resnet: Optional[ClipResNetEmbedder] = None

        self.chunk_size = chunk_size

        low = -np.inf
        high = np.inf
        shape = output_shape

        input_uuids = [rgb_input_uuid]
        assert (
            len(input_uuids) == 1
        ), "resnet preprocessor can only consume one observation type"

        observation_space = gym.spaces.Box(low=low, high=high, shape=shape)

        super().__init__(**prepare_locals_for_super(locals()))

    @property
    def resnet(self) -> ClipResNetEmbedder:
        if self._resnet is None:
            self._resnet = ClipResNetEmbedder(
                clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool
            ).to(self.device)
            for module in self._resnet.modules():
                if "BatchNorm" in type(module).__name__:
                    module.momentum = 0.0
            self._resnet.eval()
        return self._resnet

    def to(self, device: torch.device) -> "ClipResNetPreprocessor":
        self._resnet = self.resnet.to(device)
        self.device = device
        return self

    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
        x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)  # bhwc -> bchw
        # If the input is depth, repeat it across all 3 channels
        if x.shape[1] == 1:
            x = x.repeat(1, 3, 1, 1)

        n = x.shape[0]
        if self.chunk_size is not None and x.shape[0] > self.chunk_size:
            processed_chunks = []
            for idx in range(0, n, self.chunk_size):
                processed_chunks.append(
                    self.resnet(x[idx : min(idx + self.chunk_size, n)]).float()
                )
            x = torch.cat(processed_chunks, dim=0)
        else:
            x = self.resnet(x).float()
        return x


class ClipViTEmbedder(nn.Module):
    def __init__(self, model: CLIP, class_emb_only: bool = False):
        super().__init__()
        self.model = model
        self.model.visual.transformer.resblocks = nn.Sequential(
            *list(self.model.visual.transformer.resblocks)[:-1]
        )
        self.class_emb_only = class_emb_only

        self.eval()

    def forward(self, x):
        m = self.model.visual
        with torch.no_grad():
            x = m.conv1(x)  # shape = [*, width, grid, grid]
            x = x.reshape(x.shape[0], x.shape[1], -1)  # shape = [*, width, grid ** 2]
            x = x.permute(0, 2, 1)  # shape = [*, grid ** 2, width]
            x = torch.cat(
                [
                    m.class_embedding.to(x.dtype)
                    + torch.zeros(
                        x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device
                    ),
                    x,
                ],
                dim=1,
            )  # shape = [*, grid ** 2 + 1, width]
            x = x + m.positional_embedding.to(x.dtype)
            x = m.ln_pre(x)

            x = x.permute(1, 0, 2)  # NLD -> LND
            x = m.transformer(x)
            x = x.permute(1, 0, 2)  # LND -> NLD

            if self.class_emb_only:
                return x[:, 0, :]
            else:
                return x


class ClipViTPreprocessor(Preprocessor):
    """Preprocess RGB or depth image using a ResNet model with CLIP model
    weights."""

    CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073)
    CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711)

    def __init__(
        self,
        rgb_input_uuid: str,
        clip_model_type: str,
        class_emb_only: bool,
        device: Optional[torch.device] = None,
        device_ids: Optional[List[torch.device]] = None,
        **kwargs: Any,
    ):
        assert clip_model_type in clip.available_models()

        if clip_model_type == "ViT-B/32":
            output_shape = (7 * 7 + 1, 768)
        elif clip_model_type == "ViT-B/16":
            output_shape = (14 * 14 + 1, 768)
        elif clip_model_type == "ViT-L/14":
            output_shape = (16 * 16 + 1, 1024)
        else:
            raise NotImplementedError(
                f"Currently `clip_model_type` must be one of 'ViT-B/32', 'ViT-B/16', or 'ViT-B/14'"
            )

        if class_emb_only:
            output_shape = output_shape[1:]

        self.clip_model_type = clip_model_type

        self.class_emb_only = class_emb_only

        self.device = torch.device("cpu") if device is None else device
        self.device_ids = device_ids or cast(
            List[torch.device], list(range(torch.cuda.device_count()))
        )
        self._vit: Optional[ClipViTEmbedder] = None

        low = -np.inf
        high = np.inf
        shape = output_shape

        input_uuids = [rgb_input_uuid]
        assert (
            len(input_uuids) == 1
        ), "resnet preprocessor can only consume one observation type"

        observation_space = gym.spaces.Box(low=low, high=high, shape=shape)

        super().__init__(**prepare_locals_for_super(locals()))

    @property
    def vit(self) -> ClipViTEmbedder:
        if self._vit is None:
            self._vit = ClipViTEmbedder(
                model=clip.load(self.clip_model_type, device=self.device)[0],
                class_emb_only=self.class_emb_only,
            ).to(self.device)
            for module in self._vit.modules():
                if "BatchNorm" in type(module).__name__:
                    module.momentum = 0.0
            self._vit.eval()
        return self._vit

    def to(self, device: torch.device) -> "ClipViTPreprocessor":
        self._vit = self.vit.to(device)
        self.device = device
        return self

    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
        x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)  # bhwc -> bchw
        # If the input is depth, repeat it across all 3 channels
        if x.shape[1] == 1:
            x = x.repeat(1, 3, 1, 1)
        x = self.vit(x).float()
        return x


================================================
FILE: allenact_plugins/clip_plugin/extra_environment.yml
================================================
channels:
  - pytorch
  - defaults
  - conda-forge
dependencies:
  - pytorch>=1.7.1
  - torchvision
  - pip:
      - ftfy
      - regex
      - tqdm
      - "--editable=git+https://github.com/openai/CLIP.git@e184f608c5d5e58165682f7c332c3a8b4c1545f2#egg=clip"


================================================
FILE: allenact_plugins/clip_plugin/extra_requirements.txt
================================================
torch>=1.7.1
torchvision
ftfy
regex
tqdm
clip @ git+https://github.com/openai/clip@e184f608c5d5e58165682f7c332c3a8b4c1545f2#egg=clip


================================================
FILE: allenact_plugins/gym_plugin/__init__.py
================================================


================================================
FILE: allenact_plugins/gym_plugin/extra_environment.yml
================================================
channels:
  - defaults
  - conda-forge
dependencies:
  - gym-box2d>=0.17.0,<0.20.0


================================================
FILE: allenact_plugins/gym_plugin/extra_requirements.txt
================================================
gym[box2d]>=0.17.0,<0.20.0


================================================
FILE: allenact_plugins/gym_plugin/gym_distributions.py
================================================
import torch

from allenact.base_abstractions.distributions import Distr


class GaussianDistr(torch.distributions.Normal, Distr):
    """PyTorch's Normal distribution with a `mode` method."""

    def mode(self) -> torch.FloatTensor:
        return super().mean


================================================
FILE: allenact_plugins/gym_plugin/gym_environment.py
================================================
from typing import Optional

import gym
import numpy as np


class GymEnvironment(gym.Wrapper):
    """gym.Wrapper with minimal bookkeeping (initial observation)."""

    def __init__(self, gym_env_name: str):
        super().__init__(gym.make(gym_env_name))
        self._initial_observation: Optional[np.ndarray] = None
        self.reset()  # generate initial observation

    def reset(self) -> np.ndarray:
        self._initial_observation = self.env.reset()
        return self._initial_observation

    @property
    def initial_observation(self) -> np.ndarray:
        assert (
            self._initial_observation is not None
        ), "Attempted to read initial_observation without calling reset()"
        res = self._initial_observation
        self._initial_observation = None
        return res


================================================
FILE: allenact_plugins/gym_plugin/gym_models.py
================================================
from typing import Dict, Union, Optional, Tuple, Any, Sequence, cast

import gym
import torch
import torch.nn as nn

from allenact.algorithms.onpolicy_sync.policy import (
    ActorCriticModel,
    DistributionType,
)
from allenact.base_abstractions.misc import ActorCriticOutput, Memory
from allenact_plugins.gym_plugin.gym_distributions import GaussianDistr


class MemorylessActorCritic(ActorCriticModel[GaussianDistr]):
    """ActorCriticModel for gym tasks with continuous control in the range [-1,
    1]."""

    def __init__(
        self,
        input_uuid: str,
        action_space: gym.spaces.Box,
        observation_space: gym.spaces.Dict,
        action_std: float = 0.5,
        mlp_hidden_dims: Sequence[int] = (64, 32),
    ):
        super().__init__(action_space, observation_space)

        self.input_uuid = input_uuid
        assert len(observation_space[self.input_uuid].shape) == 1
        state_dim = observation_space[self.input_uuid].shape[0]
        assert len(action_space.shape) == 1
        action_dim = action_space.shape[0]

        mlp_hidden_dims = (state_dim,) + tuple(mlp_hidden_dims)

        # action mean range -1 to 1
        self.actor = nn.Sequential(
            *self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims),
            nn.Linear(32, action_dim),
            nn.Tanh(),
        )

        # critic
        self.critic = nn.Sequential(
            *self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims),
            nn.Linear(32, 1),
        )

        # maximum standard deviation
        self.register_buffer(
            "action_std",
            torch.tensor([action_std] * action_dim).view(1, 1, -1),
            persistent=False,
        )

    @staticmethod
    def make_mlp_hidden(nl, *dims):
        res = []
        for it, dim in enumerate(dims[:-1]):
            res.append(
                nn.Linear(dim, dims[it + 1]),
            )
            res.append(nl())
        return res

    def _recurrent_memory_specification(self):
        return None

    def forward(  # type:ignore
        self,
        observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]],
        memory: Memory,
        prev_actions: Any,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        means = self.actor(observations[self.input_uuid])
        values = self.critic(observations[self.input_uuid])

        return (
            ActorCriticOutput(
                cast(DistributionType, GaussianDistr(loc=means, scale=self.action_std)),
                values,
                {},
            ),
            None,  # no Memory
        )


================================================
FILE: allenact_plugins/gym_plugin/gym_sensors.py
================================================
from typing import Optional, Any

import gym
import numpy as np

from allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super
from allenact.base_abstractions.task import Task, SubTaskType
from allenact_plugins.gym_plugin.gym_environment import GymEnvironment


class GymBox2DSensor(Sensor[gym.Env, Task[gym.Env]]):
    """Wrapper for gym Box2D tasks' observations."""

    def __init__(
        self,
        gym_env_name: str = "LunarLanderContinuous-v2",
        uuid: str = "gym_box2d_sensor",
        **kwargs: Any
    ):
        self.gym_env_name = gym_env_name

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self) -> gym.Space:
        if self.gym_env_name in ["LunarLanderContinuous-v2", "LunarLander-v2"]:
            return gym.spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
        elif self.gym_env_name in ["BipedalWalker-v2", "BipedalWalkerHardcore-v2"]:
            high = np.array([np.inf] * 24)
            return gym.spaces.Box(-high, high, dtype=np.float32)
        elif self.gym_env_name == "CarRacing-v0":
            state_w, state_h = 96, 96
            return gym.spaces.Box(
                low=0, high=255, shape=(state_h, state_w, 3), dtype=np.uint8
            )
        raise NotImplementedError()

    def get_observation(
        self,
        env: GymEnvironment,
        task: Optional[SubTaskType],
        *args,
        gym_obs: Optional[np.ndarray] = None,
        **kwargs: Any
    ) -> np.ndarray:
        if gym_obs is not None:
            return gym_obs
        else:
            return env.initial_observation


class GymMuJoCoSensor(Sensor[gym.Env, Task[gym.Env]]):
    """Wrapper for gym MuJoCo and Robotics tasks observations."""

    def __init__(self, gym_env_name: str, uuid: str, **kwargs: Any):
        self.gym_env_name = gym_env_name

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self) -> gym.Space:
        # observation space for gym MoJoCo
        if self.gym_env_name == "InvertedPendulum-v2":
            return gym.spaces.Box(-np.inf, np.inf, shape=(4,), dtype="float32")
        elif self.gym_env_name == "Ant-v2":
            return gym.spaces.Box(-np.inf, np.inf, shape=(111,), dtype="float32")
        elif self.gym_env_name in ["Reacher-v2", "Hopper-v2"]:
            return gym.spaces.Box(-np.inf, np.inf, shape=(11,), dtype="float32")
        elif self.gym_env_name == "InvertedDoublePendulum-v2":
            return gym.spaces.Box(-np.inf, np.inf, (11,), "float32")
        elif self.gym_env_name in ["HumanoidStandup-v2", "Humanoid-v2"]:
            return gym.spaces.Box(-np.inf, np.inf, (376,), "float32")
        elif self.gym_env_name in ["HalfCheetah-v2", "Walker2d-v2"]:
            return gym.spaces.Box(-np.inf, np.inf, (17,), "float32")
        elif self.gym_env_name == "Swimmer-v2":
            return gym.spaces.Box(-np.inf, np.inf, (8,), "float32")
        # TODO observation space for gym Robotics
        elif self.gym_env_name == "HandManipulateBlock-v0":
            return gym.spaces.Dict(
                dict(
                    desired_goal=gym.spaces.Box(
                        -np.inf, np.inf, shape=(7,), dtype="float32"
                    ),
                    achieved_goal=gym.spaces.Box(
                        -np.inf, np.inf, shape=(7,), dtype="float32"
                    ),
                    observation=gym.spaces.Box(
                        -np.inf, np.inf, shape=(61,), dtype="float32"
                    ),
                )
            )
        else:
            raise NotImplementedError

    def get_observation(
        self,
        env: GymEnvironment,
        task: Optional[SubTaskType],
        *args,
        gym_obs: Optional[np.ndarray] = None,
        **kwargs: Any
    ) -> np.ndarray:
        if gym_obs is not None:
            return np.array(gym_obs, dtype=np.float32)  # coerce to be float32
        else:
            return np.array(env.initial_observation, dtype=np.float32)


================================================
FILE: allenact_plugins/gym_plugin/gym_tasks.py
================================================
import random
from typing import Any, List, Dict, Optional, Union, Callable, Sequence, Tuple

import gym
import numpy as np
from gym.utils import seeding

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.base_abstractions.task import Task, TaskSampler
from allenact.utils.experiment_utils import set_seed
from allenact.utils.system import get_logger
from allenact_plugins.gym_plugin.gym_environment import GymEnvironment
from allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor, GymMuJoCoSensor


class GymTask(Task[gym.Env]):
    """Abstract gym task.

    Subclasses need to implement `class_action_names` and `_step`.
    """

    def __init__(
        self,
        env: GymEnvironment,
        sensors: Union[SensorSuite, List[Sensor]],
        task_info: Dict[str, Any],
        **kwargs,
    ):
        max_steps = env.spec.max_episode_steps
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self._gym_done = False
        self.task_name: str = self.env.spec.id

    @property
    def action_space(self) -> gym.spaces.Space:
        return self.env.action_space

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        if mode == "rgb":
            mode = "rgb_array"
        return self.env.render(mode=mode)

    def get_observations(
        self, *args, gym_obs: Optional[Dict[str, Any]] = None, **kwargs
    ) -> Any:
        return self.sensor_suite.get_observations(
            env=self.env, task=self, gym_obs=gym_obs
        )

    def reached_terminal_state(self) -> bool:
        return self._gym_done

    def close(self) -> None:
        pass

    def metrics(self) -> Dict[str, Any]:
        # noinspection PyUnresolvedReferences,PyCallingNonCallable
        env_metrics = self.env.metrics() if hasattr(self.env, "metrics") else {}
        return {
            **super().metrics(),
            **{k: float(v) for k, v in env_metrics.items()},
            "success": int(
                self.env.was_successful
                if hasattr(self.env, "was_successful")
                else self.cumulative_reward > 0
            ),
        }


class GymContinuousTask(GymTask):
    """Task for a continuous-control gym Box2D & MuJoCo Env; it allows
    interfacing allenact with gym tasks."""

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return tuple()

    def _step(self, action: Sequence[float]) -> RLStepResult:
        action = np.array(action)

        gym_obs, reward, self._gym_done, info = self.env.step(action=action)

        return RLStepResult(
            observation=self.get_observations(gym_obs=gym_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )


def default_task_selector(env_name: str) -> type:
    """Helper function for `GymTaskSampler`."""
    if env_name in [
        # Box2d Env
        "CarRacing-v0",
        "LunarLanderContinuous-v2",
        "BipedalWalker-v2",
        "BipedalWalkerHardcore-v2",
        # MuJoCo Env
        "InvertedPendulum-v2",
        "Ant-v2",
        "InvertedDoublePendulum-v2",
        "Humanoid-v2",
        "Reacher-v2",
        "Hopper-v2",
        "HalfCheetah-v2",
        "Swimmer-v2",
        "Walker2d-v2",
    ]:
        return GymContinuousTask
    raise NotImplementedError()


def sensor_selector(env_name: str) -> Sensor:
    """Helper function for `GymTaskSampler`."""
    if env_name in [
        "CarRacing-v0",
        "LunarLanderContinuous-v2",
        "BipedalWalker-v2",
        "BipedalWalkerHardcore-v2",
        "LunarLander-v2",
    ]:
        return GymBox2DSensor(env_name)
    elif env_name in [
        "InvertedPendulum-v2",
        "Ant-v2",
        "InvertedDoublePendulum-v2",
        "Humanoid-v2",
        "Reacher-v2",
        "Hopper-v2",
        "HalfCheetah-v2",
        "Swimmer-v2",
        "Walker2d-v2",
    ]:
        return GymMuJoCoSensor(gym_env_name=env_name, uuid="gym_mujoco_data")
    raise NotImplementedError()


class GymTaskSampler(TaskSampler):
    """TaskSampler for gym environments."""

    def __init__(
        self,
        gym_env_type: str = "LunarLanderContinuous-v2",
        sensors: Optional[Union[SensorSuite, List[Sensor]]] = None,
        max_tasks: Optional[int] = None,
        num_unique_seeds: Optional[int] = None,
        task_seeds_list: Optional[List[int]] = None,
        deterministic_sampling: bool = False,
        task_selector: Callable[[str], type] = default_task_selector,
        repeat_failed_task_for_min_steps: int = 0,
        extra_task_kwargs: Optional[Dict] = None,
        seed: Optional[int] = None,
        **kwargs,
    ):
        super().__init__()

        self.gym_env_type = gym_env_type

        self.sensors: SensorSuite
        if sensors is None:
            self.sensors = SensorSuite([sensor_selector(self.gym_env_type)])
        else:
            self.sensors = (
                SensorSuite(sensors)
                if not isinstance(sensors, SensorSuite)
                else sensors
            )

        self.max_tasks = max_tasks
        self.num_unique_seeds = num_unique_seeds
        self.deterministic_sampling = deterministic_sampling
        self.repeat_failed_task_for_min_steps = repeat_failed_task_for_min_steps
        self.extra_task_kwargs = (
            extra_task_kwargs if extra_task_kwargs is not None else {}
        )

        self._last_env_seed: Optional[int] = None
        self._last_task: Optional[GymTask] = None
        self._number_of_steps_taken_with_task_seed = 0

        assert (not deterministic_sampling) or repeat_failed_task_for_min_steps <= 0, (
            "If `deterministic_sampling` is True then we require"
            " `repeat_failed_task_for_min_steps <= 0`"
        )
        assert (self.num_unique_seeds is None) or (
            0 < self.num_unique_seeds
        ), "`num_unique_seeds` must be a positive integer."

        self.num_unique_seeds = num_unique_seeds
        self.task_seeds_list = task_seeds_list
        if self.task_seeds_list is not None:
            if self.num_unique_seeds is not None:
                assert self.num_unique_seeds == len(
                    self.task_seeds_list
                ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
            self.num_unique_seeds = len(self.task_seeds_list)
        elif self.num_unique_seeds is not None:
            self.task_seeds_list = list(range(self.num_unique_seeds))
        if num_unique_seeds is not None and repeat_failed_task_for_min_steps > 0:
            raise NotImplementedError(
                "`repeat_failed_task_for_min_steps` must be <=0 if number"
                " of unique seeds is not None."
            )

        assert (not deterministic_sampling) or (
            self.num_unique_seeds is not None
        ), "Cannot use deterministic sampling when `num_unique_seeds` is `None`."

        if (not deterministic_sampling) and self.max_tasks:
            get_logger().warning(
                "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
                " this might be a mistake when running testing."
            )

        if seed is not None:
            self.set_seed(seed)
        else:
            self.np_seeded_random_gen, _ = seeding.np_random(
                random.randint(0, 2**31 - 1)
            )

        self.num_tasks_generated = 0
        self.task_type = task_selector(self.gym_env_type)
        self.env: GymEnvironment = GymEnvironment(self.gym_env_type)

    @property
    def length(self) -> Union[int, float]:
        return (
            float("inf")
            if self.max_tasks is None
            else self.max_tasks - self.num_tasks_generated
        )

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        return None if self.num_unique_seeds is None else self.num_unique_seeds

    @property
    def last_sampled_task(self) -> Optional[Task]:
        raise NotImplementedError

    def next_task(self, force_advance_scene: bool = False) -> Optional[GymTask]:
        if self.length <= 0:
            return None

        repeating = False
        if self.num_unique_seeds is not None:
            if self.deterministic_sampling:
                self._last_env_seed = self.task_seeds_list[
                    self.num_tasks_generated % len(self.task_seeds_list)
                ]
            else:
                self._last_env_seed = self.np_seeded_random_gen.choice(
                    self.task_seeds_list
                )
        else:
            if self._last_task is not None:
                self._number_of_steps_taken_with_task_seed += (
                    self._last_task.num_steps_taken()
                )

            if (
                self._last_env_seed is not None
                and self._number_of_steps_taken_with_task_seed
                < self.repeat_failed_task_for_min_steps
                and self._last_task.cumulative_reward == 0
            ):
                repeating = True
            else:
                self._number_of_steps_taken_with_task_seed = 0
                self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)

        task_has_same_seed_reset = hasattr(self.env, "same_seed_reset")

        if repeating and task_has_same_seed_reset:
            # noinspection PyUnresolvedReferences
            self.env.same_seed_reset()
        else:
            self.env.seed(self._last_env_seed)
            self.env.saved_seed = self._last_env_seed
            self.env.reset()

        self.num_tasks_generated += 1

        task_info = {"id": "random%d" % random.randint(0, 2**63 - 1)}

        self._last_task = self.task_type(
            **dict(env=self.env, sensors=self.sensors, task_info=task_info),
            **self.extra_task_kwargs,
        )

        return self._last_task

    def close(self) -> None:
        self.env.close()

    @property
    def all_observation_spaces_equal(self) -> bool:
        return True

    def reset(self) -> None:
        self.num_tasks_generated = 0
        self.env.reset()

    def set_seed(self, seed: int) -> None:
        self.np_seeded_random_gen, _ = seeding.np_random(seed)
        if seed is not None:
            set_seed(seed)


================================================
FILE: allenact_plugins/habitat_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker

with ImportChecker(
    "\n\nPlease install habitat following\n\n"
    "https://allenact.org/installation/installation-framework/#installation-of-habitat\n\n"
):
    import habitat
    import habitat_sim


================================================
FILE: allenact_plugins/habitat_plugin/data/__init__.py
================================================


================================================
FILE: allenact_plugins/habitat_plugin/extra_environment.yml
================================================
channels:
  - defaults
  - conda-forge
  - aihabitat
dependencies:
  - habitat-sim=0.1.5
  - numba
  - pip
  - pip:
      - "--editable=git+https://github.com/Lucaweihs/habitat-lab.git@99124c785bd5ca51e321ea20462f71071cd43ae2#egg=habitat"
      - numpy-quaternion
      - pyquaternion>=0.9.9


================================================
FILE: allenact_plugins/habitat_plugin/extra_environment_headless.yml
================================================
channels:
  - defaults
  - conda-forge
  - aihabitat
dependencies:
  - habitat-sim=0.1.5
  - headless
  - numba
  - pip
  - pip:
      - "--editable=git+https://github.com/Lucaweihs/habitat-lab.git@99124c785bd5ca51e321ea20462f71071cd43ae2#egg=habitat"
      - numpy-quaternion
      - pyquaternion>=0.9.9


================================================
FILE: allenact_plugins/habitat_plugin/extra_requirements.txt
================================================
habitat @ git+https://github.com/facebookresearch/habitat-lab.git@33654923dc733f5fcea23aea6391034c3f694a67
numpy-quaternion
pyquaternion>=0.9.9
numba


================================================
FILE: allenact_plugins/habitat_plugin/habitat_constants.py
================================================
import os

HABITAT_BASE = os.getenv(
    "HABITAT_BASE_DIR",
    default=os.path.join(os.getcwd(), "external_projects", "habitat-lab"),
)
HABITAT_DATA_BASE = os.path.join(
    os.getcwd(),
    "data",
)

if (not os.path.exists(HABITAT_BASE)) or (not os.path.exists(HABITAT_DATA_BASE)):
    raise ImportError(
        "In order to run properly the Habitat environment makes several assumptions about the file structure of"
        " the local system. The file structure of the current environment does not seem to respect this required"
        " file structure. Please see https://allenact.org/installation/installation-framework/#installation-of-habitat"
        " for details as to how to set up your local environment to make it possible to use the habitat plugin of"
        " AllenAct."
    )

HABITAT_DATASETS_DIR = os.path.join(HABITAT_DATA_BASE, "datasets")
HABITAT_SCENE_DATASETS_DIR = os.path.join(HABITAT_DATA_BASE, "scene_datasets")
HABITAT_CONFIGS_DIR = os.path.join(HABITAT_BASE, "configs")

TESTED_HABITAT_COMMIT = "33654923dc733f5fcea23aea6391034c3f694a67"

MOVE_AHEAD = "MOVE_FORWARD"
ROTATE_LEFT = "TURN_LEFT"
ROTATE_RIGHT = "TURN_RIGHT"
LOOK_DOWN = "LOOK_DOWN"
LOOK_UP = "LOOK_UP"
END = "STOP"


================================================
FILE: allenact_plugins/habitat_plugin/habitat_environment.py
================================================
"""A wrapper for interacting with the Habitat environment."""

import os
from typing import Dict, Union, List, Optional

import numpy as np

import habitat
from allenact.utils.cache_utils import DynamicDistanceCache
from allenact.utils.system import get_logger
from habitat.config import Config
from habitat.core.dataset import Dataset
from habitat.core.simulator import Observations, AgentState, ShortestPathPoint
from habitat.tasks.nav.nav import NavigationEpisode as HabitatNavigationEpisode


class HabitatEnvironment:
    def __init__(self, config: Config, dataset: Dataset, verbose: bool = False) -> None:
        self.env = habitat.Env(config=config, dataset=dataset)

        if not verbose:
            os.environ["GLOG_minloglevel"] = "2"
            os.environ["MAGNUM_LOG"] = "quiet"

        # Set the target to a random goal from the provided list for this episode
        self.goal_index = 0
        self.last_geodesic_distance = None
        self.distance_cache = DynamicDistanceCache(rounding=1)
        self._current_frame: Optional[np.ndarray] = None

    @property
    def scene_name(self) -> str:
        return self.env.current_episode.scene_id

    @property
    def current_frame(self) -> np.ndarray:
        assert self._current_frame is not None
        return self._current_frame

    def step(self, action_dict: Dict[str, Union[str, int]]) -> Observations:
        obs = self.env.step(action_dict["action"])
        self._current_frame = obs
        return obs

    def get_location(self) -> Optional[np.ndarray]:
        return self.env.sim.get_agent_state().position

    def get_rotation(self) -> Optional[List[float]]:
        return self.env.sim.get_agent_state().rotation

    def get_shortest_path(
        self,
        source_state: AgentState,
        target_state: AgentState,
    ) -> List[ShortestPathPoint]:
        return self.env.sim.action_space_shortest_path(source_state, [target_state])

    def get_current_episode(self) -> HabitatNavigationEpisode:
        return self.env.current_episode  # type: ignore

    # noinspection PyMethodMayBeStatic
    def start(self):
        get_logger().debug("No need to start a habitat_plugin env")

    def stop(self):
        self.env.close()

    def reset(self):
        self._current_frame = self.env.reset()

    @property
    def last_action_success(self) -> bool:
        # For now we can not have failure of actions
        return True

    @property
    def num_episodes(self) -> int:
        ep_iterator = self.env.episode_iterator
        assert isinstance(ep_iterator, habitat.core.dataset.EpisodeIterator)
        return len(ep_iterator.episodes)


================================================
FILE: allenact_plugins/habitat_plugin/habitat_preprocessors.py
================================================


================================================
FILE: allenact_plugins/habitat_plugin/habitat_sensors.py
================================================
from typing import Any, Optional, Tuple, TYPE_CHECKING

import gym
import numpy as np
from pyquaternion import Quaternion

from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment

if TYPE_CHECKING:
    from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask, ObjectNavTask  # type: ignore


class RGBSensorHabitat(RGBSensor[HabitatEnvironment, Task[HabitatEnvironment]]):
    # For backwards compatibility
    def __init__(
        self,
        use_resnet_normalization: bool = False,
        mean: Optional[np.ndarray] = np.array(
            [[[0.485, 0.456, 0.406]]], dtype=np.float32
        ),
        stdev: Optional[np.ndarray] = np.array(
            [[[0.229, 0.224, 0.225]]], dtype=np.float32
        ),
        height: Optional[int] = None,
        width: Optional[int] = None,
        uuid: str = "rgb",
        output_shape: Optional[Tuple[int, ...]] = None,
        output_channels: int = 3,
        unnormalized_infimum: float = 0.0,
        unnormalized_supremum: float = 1.0,
        scale_first: bool = True,
        **kwargs: Any
    ):
        super().__init__(**prepare_locals_for_super(locals()))

    def frame_from_env(
        self, env: HabitatEnvironment, task: Optional[Task[HabitatEnvironment]]
    ) -> np.ndarray:
        return env.current_frame["rgb"].copy()


class DepthSensorHabitat(DepthSensor[HabitatEnvironment, Task[HabitatEnvironment]]):
    # For backwards compatibility
    def __init__(
        self,
        use_resnet_normalization: Optional[bool] = None,
        use_normalization: Optional[bool] = None,
        mean: Optional[np.ndarray] = np.array([[0.5]], dtype=np.float32),
        stdev: Optional[np.ndarray] = np.array([[0.25]], dtype=np.float32),
        height: Optional[int] = None,
        width: Optional[int] = None,
        uuid: str = "depth",
        output_shape: Optional[Tuple[int, ...]] = None,
        output_channels: int = 1,
        unnormalized_infimum: float = 0.0,
        unnormalized_supremum: float = 5.0,
        scale_first: bool = False,
        **kwargs: Any
    ):
        # Give priority to use_normalization, but use_resnet_normalization for backward compat. if not set
        if use_resnet_normalization is not None and use_normalization is None:
            use_normalization = use_resnet_normalization
        elif use_normalization is None:
            use_normalization = False

        super().__init__(**prepare_locals_for_super(locals()))

    def frame_from_env(
        self, env: HabitatEnvironment, task: Optional[Task[HabitatEnvironment]]
    ) -> np.ndarray:
        return env.current_frame["depth"].copy()


class TargetCoordinatesSensorHabitat(Sensor[HabitatEnvironment, "PointNavTask"]):
    def __init__(
        self, coordinate_dims: int, uuid: str = "target_coordinates_ind", **kwargs: Any
    ):
        self.coordinate_dims = coordinate_dims

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self):
        # Distance is a non-negative real and angle is normalized to the range (-Pi, Pi] or [-Pi, Pi)
        return gym.spaces.Box(
            np.float32(-3.15), np.float32(1000), shape=(self.coordinate_dims,)
        )

    def get_observation(
        self,
        env: HabitatEnvironment,
        task: Optional["PointNavTask"],
        *args: Any,
        **kwargs: Any
    ) -> Any:
        frame = env.current_frame
        goal = frame["pointgoal_with_gps_compass"]
        return goal


class TargetObjectSensorHabitat(Sensor[HabitatEnvironment, "ObjectNavTask"]):
    def __init__(self, num_objects: int, uuid: str = "target_object_id", **kwargs: Any):
        observation_space = self._get_observation_space(num_objects)
        super().__init__(**prepare_locals_for_super(locals()))

    @staticmethod
    def _get_observation_space(num_objects: int):
        return gym.spaces.Discrete(num_objects)

    def get_observation(
        self,
        env: HabitatEnvironment,
        task: Optional["ObjectNavTask"],
        *args: Any,
        **kwargs: Any
    ) -> Any:
        frame = env.current_frame
        goal = frame["objectgoal"][0]
        return goal


class AgentCoordinatesSensorHabitat(Sensor[HabitatEnvironment, "PointNavTask"]):
    def __init__(self, uuid: str = "agent_position_and_rotation", **kwargs: Any):
        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    @staticmethod
    def _get_observation_space():
        return gym.spaces.Box(np.float32(-1000), np.float32(1000), shape=(4,))

    @staticmethod
    def get_observation(
        env: HabitatEnvironment,
        task: Optional["PointNavTask"],
        *args: Any,
        **kwargs: Any
    ) -> Any:
        position = env.env.sim.get_agent_state().position
        quaternion = Quaternion(env.env.sim.get_agent_state().rotation.components)
        return np.array([position[0], position[1], position[2], quaternion.radians])


================================================
FILE: allenact_plugins/habitat_plugin/habitat_task_samplers.py
================================================
from typing import List, Optional, Union, Callable, Any, Dict, Type

import gym

import habitat
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import Builder
from allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment
from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask, ObjectNavTask  # type: ignore
from habitat.config import Config


class PointNavTaskSampler(TaskSampler):
    def __init__(
        self,
        env_config: Config,
        sensors: List[Sensor],
        max_steps: int,
        action_space: gym.Space,
        distance_to_goal: float,
        filter_dataset_func: Optional[
            Callable[[habitat.Dataset], habitat.Dataset]
        ] = None,
        **task_init_kwargs,
    ) -> None:
        self.grid_size = 0.25
        self.env: Optional[HabitatEnvironment] = None
        self.max_tasks: Optional[int] = None
        self.reset_tasks: Optional[int] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_space = action_space
        self.env_config = env_config
        self.distance_to_goal = distance_to_goal
        self.seed: Optional[int] = None
        self.filter_dataset_func = filter_dataset_func

        self._last_sampled_task: Optional[PointNavTask] = None

        self.task_init_kwargs = task_init_kwargs

    def _create_environment(self) -> HabitatEnvironment:
        dataset = habitat.make_dataset(
            self.env_config.DATASET.TYPE, config=self.env_config.DATASET
        )
        if len(dataset.episodes) == 0:
            raise RuntimeError("Empty input dataset.")

        if self.filter_dataset_func is not None:
            dataset = self.filter_dataset_func(dataset)
            if len(dataset.episodes) == 0:
                raise RuntimeError("Empty dataset after filtering.")

        env = HabitatEnvironment(config=self.env_config, dataset=dataset)
        self.max_tasks = None if self.env_config.MODE == "train" else env.num_episodes
        self.reset_tasks = self.max_tasks
        return env

    @property
    def length(self) -> Union[int, float]:
        """
        @return: Number of total tasks remaining that can be sampled. Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks

    @property
    def total_unique(self) -> Union[int, float, None]:
        return self.env.num_episodes

    @property
    def last_sampled_task(self) -> Optional[PointNavTask]:
        return self._last_sampled_task

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    @property
    def all_observation_spaces_equal(self) -> bool:
        """
        @return: True if all Tasks that can be sampled by this sampler have the
            same observation space. Otherwise False.
        """
        return True

    def next_task(self, force_advance_scene=False) -> Optional[PointNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.env is not None:
            self.env.reset()
        else:
            self.env = self._create_environment()
            self.env.reset()
        ep_info = self.env.get_current_episode()
        assert len(ep_info.goals) == 1
        target = ep_info.goals[0].position

        task_info = {
            "target": target,
            "distance_to_goal": self.distance_to_goal,
            "episode_id": ep_info.episode_id,
            "scene_id": ep_info.scene_id.split("/")[-1],
            **ep_info.info,
        }

        self._last_sampled_task = PointNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            **self.task_init_kwargs,
        )

        if self.max_tasks is not None:
            self.max_tasks -= 1

        return self._last_sampled_task

    def reset(self):
        self.max_tasks = self.reset_tasks

    def set_seed(self, seed: int):
        self.seed = seed
        if seed is not None:
            self.env.env.seed(seed)


class ObjectNavTaskSampler(TaskSampler):
    def __init__(
        self,
        env_config: Config,
        sensors: List[Sensor],
        max_steps: int,
        action_space: gym.Space,
        filter_dataset_func: Optional[
            Callable[[habitat.Dataset], habitat.Dataset]
        ] = None,
        task_kwargs: Dict[str, Any] = None,
        objectnav_task_type: Union[
            Type[ObjectNavTask], Builder[ObjectNavTask]
        ] = ObjectNavTask,
        **kwargs,
    ) -> None:
        self.grid_size = 0.25
        self.env: Optional[HabitatEnvironment] = None
        self.max_tasks: Optional[int] = None
        self.reset_tasks: Optional[int] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_space = action_space
        self.env_config = env_config
        self.seed: Optional[int] = None
        self.filter_dataset_func = filter_dataset_func
        self.objectnav_task_type = objectnav_task_type

        self.task_kwargs = {} if task_kwargs is None else task_kwargs
        self._last_sampled_task: Optional[ObjectNavTask] = None

    def _create_environment(self) -> HabitatEnvironment:
        dataset = habitat.make_dataset(
            self.env_config.DATASET.TYPE, config=self.env_config.DATASET
        )

        if self.filter_dataset_func is not None:
            dataset = self.filter_dataset_func(dataset)
            if len(dataset.episodes) == 0:
                raise RuntimeError("Empty dataset after filtering.")

        env = HabitatEnvironment(config=self.env_config, dataset=dataset)
        self.max_tasks = (
            None if self.env_config.MODE == "train" else env.num_episodes
        )  # mp3d objectnav val -> 2184
        self.reset_tasks = self.max_tasks
        return env

    @property
    def length(self) -> Union[int, float]:
        """
        @return: Number of total tasks remaining that can be sampled. Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks

    @property
    def total_unique(self) -> Union[int, float, None]:
        return self.env.num_episodes

    @property
    def last_sampled_task(self) -> Optional[ObjectNavTask]:
        return self._last_sampled_task

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    @property
    def all_observation_spaces_equal(self) -> bool:
        """
        @return: True if all Tasks that can be sampled by this sampler have the
            same observation space. Otherwise False.
        """
        return True

    def next_task(self, force_advance_scene=False) -> Optional[ObjectNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.env is not None:
            if force_advance_scene:
                self.env.env._episode_iterator._forced_scene_switch()
                self.env.env._episode_iterator._set_shuffle_intervals()
            self.env.reset()
        else:
            self.env = self._create_environment()
            self.env.reset()
        ep_info = self.env.get_current_episode()

        target_categories = {g.object_category for g in ep_info.goals}
        assert len(target_categories) == 1

        target_category = list(target_categories)[0]

        task_info = {
            "target_category": target_category,
            "episode_id": ep_info.episode_id,
            "scene_id": ep_info.scene_id.split("/")[-1],
            **ep_info.info,
        }

        self._last_sampled_task = self.objectnav_task_type(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            **self.task_kwargs,
        )

        if self.max_tasks is not None:
            self.max_tasks -= 1

        return self._last_sampled_task

    def reset(self):
        self.max_tasks = self.reset_tasks

    def set_seed(self, seed: int):
        self.seed = seed
        if seed is not None:
            self.env.env.seed(seed)


================================================
FILE: allenact_plugins/habitat_plugin/habitat_tasks.py
================================================
from abc import ABC
from typing import Tuple, List, Dict, Any, Optional, Union, Sequence, cast

import gym
import numpy as np
from habitat.sims.habitat_simulator.actions import HabitatSimActions
from habitat.sims.habitat_simulator.habitat_simulator import HabitatSim
from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.utils.system import get_logger
from allenact_plugins.habitat_plugin.habitat_constants import (
    MOVE_AHEAD,
    ROTATE_LEFT,
    ROTATE_RIGHT,
    END,
    LOOK_UP,
    LOOK_DOWN,
)
from allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment
from allenact_plugins.habitat_plugin.habitat_sensors import (
    AgentCoordinatesSensorHabitat,
)


class HabitatTask(Task[HabitatEnvironment], ABC):
    def __init__(
        self,
        env: HabitatEnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        **kwargs,
    ) -> None:
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )

        self._last_action: Optional[str] = None
        self._last_action_ind: Optional[int] = None
        self._last_action_success: Optional[bool] = None
        self._actions_taken: List[str] = []
        self._positions = []
        pos = self.get_agent_position_and_rotation()
        self._positions.append(
            {"x": pos[0], "y": pos[1], "z": pos[2], "rotation": pos[3]}
        )
        ep = self.env.get_current_episode()
        # Extract the scene name from the scene path and append the episode id to generate
        # a globally unique episode_id
        self._episode_id = ep.scene_id.split("/")[-1][:-4] + "_" + ep.episode_id

    def get_agent_position_and_rotation(self):
        return AgentCoordinatesSensorHabitat.get_observation(self.env, self)

    @property
    def last_action(self):
        return self._last_action

    @last_action.setter
    def last_action(self, value: str):
        self._last_action = value

    @property
    def last_action_success(self):
        return self._last_action_success

    @last_action_success.setter
    def last_action_success(self, value: Optional[bool]):
        self._last_action_success = value

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        if mode == "rgb":
            return self.env.current_frame["rgb"]
        elif mode == "depth":
            return self.env.current_frame["depth"]
        else:
            raise NotImplementedError()


class PointNavTask(Task[HabitatEnvironment]):
    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END)

    def __init__(
        self,
        env: HabitatEnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        failed_end_reward: float = 0.0,
        **kwargs,
    ) -> None:
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self._took_end_action: bool = False
        self._success: Optional[bool] = False
        self._subsampled_locations_from_which_obj_visible = None

        # Get the geodesic distance to target from the environment and make sure it is
        # a valid value
        self.last_geodesic_distance = self.current_geodesic_dist_to_target()
        self.start_distance = self.last_geodesic_distance
        assert self.last_geodesic_distance is not None

        # noinspection PyProtectedMember
        self._shortest_path_follower = ShortestPathFollower(
            cast(HabitatSim, env.env.sim), env.env._config.TASK.SUCCESS_DISTANCE, False
        )
        self._shortest_path_follower.mode = "geodesic_path"

        self._rewards: List[float] = []
        self._metrics = None
        self.failed_end_reward = failed_end_reward

    def current_geodesic_dist_to_target(self) -> Optional[float]:
        metrics = self.env.env.get_metrics()
        if metrics["distance_to_goal"] is None:
            habitat_env = self.env.env
            habitat_env.task.measurements.update_measures(
                episode=habitat_env.current_episode, action=None, task=habitat_env.task
            )
            metrics = self.env.env.get_metrics()

        return metrics["distance_to_goal"]

    @property
    def action_space(self):
        return gym.spaces.Discrete(len(self._actions))

    def reached_terminal_state(self) -> bool:
        return self.env.env.episode_over

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return cls._actions

    def close(self) -> None:
        self.env.stop()

    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.class_action_names()[action]

        self.env.step({"action": action_str})

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.last_action_success = self.env.last_action_success

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
        return self.env.current_frame["rgb"]

    def _is_goal_in_range(self) -> bool:
        return (
            self.current_geodesic_dist_to_target() <= self.task_info["distance_to_goal"]
        )

    def judge(self) -> float:
        reward = -0.01

        new_geodesic_distance = self.current_geodesic_dist_to_target()
        if self.last_geodesic_distance is None:
            self.last_geodesic_distance = new_geodesic_distance

        if self.last_geodesic_distance is not None:
            if (
                new_geodesic_distance is None
                or new_geodesic_distance in [float("-inf"), float("inf")]
                or np.isnan(new_geodesic_distance)
            ):
                new_geodesic_distance = self.last_geodesic_distance
            delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance
            reward += delta_distance_reward
            self.last_geodesic_distance = new_geodesic_distance

            if self.is_done():
                reward += 10.0 if self._success else self.failed_end_reward
        else:
            get_logger().warning("Could not get geodesic distance from habitat env.")

        self._rewards.append(float(reward))

        return float(reward)

    def metrics(self) -> Dict[str, Any]:
        if not self.is_done():
            return {}

        _metrics = self.env.env.get_metrics()
        metrics = {
            **super(PointNavTask, self).metrics(),
            "success": 1 * self._success,
            "ep_length": self.num_steps_taken(),
            "reward": np.sum(self._rewards),
            "spl": _metrics["spl"] if _metrics["spl"] is not None else 0.0,
            "dist_to_target": self.current_geodesic_dist_to_target(),
        }
        self._rewards = []
        return metrics

    def query_expert(self, **kwargs) -> Tuple[int, bool]:
        if self._is_goal_in_range():
            return self.class_action_names().index(END), True

        target = self.task_info["target"]
        habitat_action = self._shortest_path_follower.get_next_action(target)
        if habitat_action == HabitatSimActions.MOVE_FORWARD:
            return self.class_action_names().index(MOVE_AHEAD), True
        elif habitat_action == HabitatSimActions.TURN_LEFT:
            return self.class_action_names().index(ROTATE_LEFT), True
        elif habitat_action == HabitatSimActions.TURN_RIGHT:
            return self.class_action_names().index(ROTATE_RIGHT), True
        else:
            return 0, False


class ObjectNavTask(HabitatTask):
    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN)

    def __init__(
        self,
        env: HabitatEnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        look_constraints: Optional[Tuple[int, int]] = None,
        **kwargs,
    ) -> None:
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self.look_constraints = look_constraints
        self._look_state = 0

        self._took_end_action: bool = False
        self._success: Optional[bool] = False
        self._subsampled_locations_from_which_obj_visible = None

        # Get the geodesic distance to target from the environemnt and make sure it is
        # a valid value
        self.last_geodesic_distance = self.current_geodesic_dist_to_target()
        assert not (
            self.last_geodesic_distance is None
            or self.last_geodesic_distance in [float("-inf"), float("inf")]
            or np.isnan(self.last_geodesic_distance)
        ), "Bad geodesic distance"
        self._min_distance_to_goal = self.last_geodesic_distance
        self._num_invalid_actions = 0

        # noinspection PyProtectedMember
        self._shortest_path_follower = ShortestPathFollower(
            env.env.sim, env.env._config.TASK.SUCCESS.SUCCESS_DISTANCE, False
        )
        self._shortest_path_follower.mode = "geodesic_path"

        self._rewards: List[float] = []
        self._metrics = None
        self.task_info["episode_id"] = self._episode_id

    @property
    def action_space(self):
        return gym.spaces.Discrete(len(self._actions))

    def reached_terminal_state(self) -> bool:
        return self.env.env.episode_over

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return cls._actions

    def action_names(self, **kwargs) -> Tuple[str, ...]:
        return self._actions

    def close(self) -> None:
        self.env.stop()

    def current_geodesic_dist_to_target(self) -> Optional[float]:
        metrics = self.env.env.get_metrics()
        if metrics["distance_to_goal"] is None:
            habitat_env = self.env.env
            habitat_env.task.measurements.update_measures(
                episode=habitat_env.current_episode, action=None, task=habitat_env.task
            )
            metrics = self.env.env.get_metrics()

        return metrics["distance_to_goal"]

    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        old_pos = self.get_agent_position_and_rotation()

        action_str = self.action_names()[action]
        self._actions_taken.append(action_str)

        skip_action = False
        if self.look_constraints is not None:
            max_look_up, max_look_down = self.look_constraints

            if action_str == LOOK_UP:
                num_look_ups = self._look_state
                # assert num_look_ups <= max_look_up
                skip_action = num_look_ups >= max_look_up
                self._look_state += 1

            if action_str == LOOK_DOWN:
                num_look_downs = -self._look_state
                # assert num_look_downs <= max_look_down
                skip_action = num_look_downs >= max_look_down
                self._look_state -= 1

            self._look_state = min(max(self._look_state, -max_look_down), max_look_up)

        if not skip_action:
            self.env.step({"action": action_str})

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.last_action_success = self.env.last_action_success

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        new_pos = self.get_agent_position_and_rotation()
        if np.all(old_pos == new_pos):
            self._num_invalid_actions += 1

        pos = self.get_agent_position_and_rotation()
        self._positions.append(
            {"x": pos[0], "y": pos[1], "z": pos[2], "rotation": pos[3]}
        )

        return step_result

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
        return self.env.current_frame["rgb"]

    def _is_goal_in_range(self) -> bool:
        # The habitat simulator will return an SPL value of 0.0 whenever the goal is not in range
        return bool(self.env.env.get_metrics()["spl"])

    def judge(self) -> float:
        # Set default reward
        reward = -0.01

        # Get geodesic distance reward
        new_geodesic_distance = self.current_geodesic_dist_to_target()
        self._min_distance_to_goal = min(
            new_geodesic_distance, self._min_distance_to_goal
        )
        if (
            new_geodesic_distance is None
            or new_geodesic_distance in [float("-inf"), float("inf")]
            or np.isnan(new_geodesic_distance)
        ):
            new_geodesic_distance = self.last_geodesic_distance
        delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance
        reward += delta_distance_reward

        if self._took_end_action:
            reward += 10.0 if self._success else 0.0

        # Get success reward
        self._rewards.append(float(reward))
        self.last_geodesic_distance = new_geodesic_distance

        return float(reward)

    def metrics(self) -> Dict[str, Any]:
        self.task_info["taken_actions"] = self._actions_taken
        self.task_info["action_names"] = self.action_names()
        self.task_info["followed_path"] = self._positions
        if not self.is_done():
            return {}
        else:
            _metrics = self.env.env.get_metrics()
            metrics = {
                "success": self._success,
                "ep_length": self.num_steps_taken(),
                "total_reward": np.sum(self._rewards),
                "spl": _metrics["spl"] if _metrics["spl"] is not None else 0.0,
                "min_distance_to_target": self._min_distance_to_goal,
                "num_invalid_actions": self._num_invalid_actions,
                "task_info": self.task_info,
            }
            self._rewards = []
            return metrics

    def query_expert(self, **kwargs) -> Tuple[int, bool]:
        if self._is_goal_in_range():
            return self.class_action_names().index(END), True

        target = self.task_info["target"]
        action = self._shortest_path_follower.get_next_action(target)
        return action, action is not None


================================================
FILE: allenact_plugins/habitat_plugin/habitat_utils.py
================================================
import os
from typing import List

import habitat
from allenact_plugins.habitat_plugin.habitat_constants import (
    HABITAT_BASE,
    HABITAT_CONFIGS_DIR,
)
from habitat import Config


def construct_env_configs(
    config: Config,
    allow_scene_repeat: bool = False,
) -> List[Config]:
    """Create list of Habitat Configs for training on multiple processes To
    allow better performance, dataset are split into small ones for each
    individual env, grouped by scenes.

    # Parameters

    config : configs that contain num_processes as well as information
             necessary to create individual environments.
    allow_scene_repeat: if `True` and the number of distinct scenes
        in the dataset is less than the total number of processes this will
        result in scenes being repeated across processes. If `False`, then
        if the total number of processes is greater than the number of scenes,
        this will result in a RuntimeError exception being raised.

    # Returns

    List of Configs, one for each process.
    """

    config.freeze()
    num_processes = config.NUM_PROCESSES
    configs = []
    dataset = habitat.make_dataset(config.DATASET.TYPE)
    scenes = dataset.get_scenes_to_load(config.DATASET)

    if len(scenes) > 0:
        if len(scenes) < num_processes:
            if not allow_scene_repeat:
                raise RuntimeError(
                    "reduce the number of processes as there aren't enough number of scenes."
                )
            else:
                scenes = (scenes * (1 + (num_processes // len(scenes))))[:num_processes]

    scene_splits: List[List] = [[] for _ in range(num_processes)]
    for idx, scene in enumerate(scenes):
        scene_splits[idx % len(scene_splits)].append(scene)

    assert sum(map(len, scene_splits)) == len(scenes)

    for i in range(num_processes):

        task_config = config.clone()
        task_config.defrost()
        if len(scenes) > 0:
            task_config.DATASET.CONTENT_SCENES = scene_splits[i]

        if len(config.SIMULATOR_GPU_IDS) == 0:
            task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = -1
        else:
            task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (
                config.SIMULATOR_GPU_IDS[i % len(config.SIMULATOR_GPU_IDS)]
            )

        task_config.freeze()

        configs.append(task_config.clone())

    return configs


def construct_env_configs_mp3d(config: Config) -> List[Config]:
    r"""Create list of Habitat Configs for training on multiple processes
    To allow better performance, dataset are split into small ones for
    each individual env, grouped by scenes.
    Args:
        config: configs that contain num_processes as well as information
        necessary to create individual environments.
    Returns:
        List of Configs, one for each process
    """

    config.freeze()
    num_processes = config.NUM_PROCESSES
    configs = []
    # dataset = habitat.make_dataset(config.DATASET.TYPE)
    # scenes = dataset.get_scenes_to_load(config.DATASET)

    if num_processes == 1:
        scene_splits = [["pRbA3pwrgk9"]]
    else:
        small = [
            "rPc6DW4iMge",
            "e9zR4mvMWw7",
            "uNb9QFRL6hY",
            "qoiz87JEwZ2",
            "sKLMLpTHeUy",
            "s8pcmisQ38h",
            "759xd9YjKW5",
            "XcA2TqTSSAj",
            "SN83YJsR3w2",
            "8WUmhLawc2A",
            "JeFG25nYj2p",
            "17DRP5sb8fy",
            "Uxmj2M2itWa",
            "XcA2TqTSSAj",
            "SN83YJsR3w2",
            "8WUmhLawc2A",
            "JeFG25nYj2p",
            "17DRP5sb8fy",
            "Uxmj2M2itWa",
            "D7N2EKCX4Sj",
            "b8cTxDM8gDG",
            "sT4fr6TAbpF",
            "S9hNv5qa7GM",
            "82sE5b5pLXE",
            "pRbA3pwrgk9",
            "aayBHfsNo7d",
            "cV4RVeZvu5T",
            "i5noydFURQK",
            "YmJkqBEsHnH",
            "jh4fc5c5qoQ",
            "VVfe2KiqLaN",
            "29hnd4uzFmX",
            "Pm6F8kyY3z2",
            "JF19kD82Mey",
            "GdvgFV5R1Z5",
            "HxpKQynjfin",
            "vyrNrziPKCB",
        ]
        med = [
            "V2XKFyX4ASd",
            "VFuaQ6m2Qom",
            "ZMojNkEp431",
            "5LpN3gDmAk7",
            "r47D5H71a5s",
            "ULsKaCPVFJR",
            "E9uDoFAP3SH",
            "kEZ7cmS4wCh",
            "ac26ZMwG7aT",
            "dhjEzFoUFzH",
            "mJXqzFtmKg4",
            "p5wJjkQkbXX",
            "Vvot9Ly1tCj",
            "EDJbREhghzL",
            "VzqfbhrpDEA",
            "7y3sRwLe3Va",
        ]

        scene_splits = [[] for _ in range(config.NUM_PROCESSES)]
        distribute(
            small,
            scene_splits,
            num_gpus=8,
            procs_per_gpu=3,
            proc_offset=1,
            scenes_per_process=2,
        )
        distribute(
            med,
            scene_splits,
            num_gpus=8,
            procs_per_gpu=3,
            proc_offset=0,
            scenes_per_process=1,
        )

        # gpu0 = [['pRbA3pwrgk9', '82sE5b5pLXE', 'S9hNv5qa7GM'],
        #         ['Uxmj2M2itWa', '17DRP5sb8fy', 'JeFG25nYj2p'],
        #         ['5q7pvUzZiYa', '759xd9YjKW5', 's8pcmisQ38h'],
        #         ['e9zR4mvMWw7', 'rPc6DW4iMge', 'vyrNrziPKCB']]
        # gpu1 = [['sT4fr6TAbpF', 'b8cTxDM8gDG', 'D7N2EKCX4Sj'],
        #         ['8WUmhLawc2A', 'SN83YJsR3w2', 'XcA2TqTSSAj'],
        #         ['sKLMLpTHeUy', 'qoiz87JEwZ2', 'uNb9QFRL6hY'],
        #         ['V2XKFyX4ASd', 'VFuaQ6m2Qom', 'ZMojNkEp431']]
        # gpu2 = [['5LpN3gDmAk7', 'r47D5H71a5s', 'ULsKaCPVFJR', 'E9uDoFAP3SH'],
        #         ['VVfe2KiqLaN', 'jh4fc5c5qoQ', 'YmJkqBEsHnH'],  # small
        #         ['i5noydFURQK', 'cV4RVeZvu5T', 'aayBHfsNo7d']]  # small
        # gpu3 = [['kEZ7cmS4wCh', 'ac26ZMwG7aT', 'dhjEzFoUFzH'],
        #         ['mJXqzFtmKg4', 'p5wJjkQkbXX', 'Vvot9Ly1tCj']]
        # gpu4 = [['EDJbREhghzL', 'VzqfbhrpDEA', '7y3sRwLe3Va'],
        #         ['ur6pFq6Qu1A', 'PX4nDJXEHrG', 'PuKPg4mmafe']]
        # gpu5 = [['r1Q1Z4BcV1o', 'gTV8FGcVJC9', '1pXnuDYAj8r'],
        #         ['JF19kD82Mey', 'Pm6F8kyY3z2', '29hnd4uzFmX']]  # small
        # gpu6 = [['VLzqgDo317F', '1LXtFkjw3qL'],
        #         ['HxpKQynjfin', 'gZ6f7yhEvPG', 'GdvgFV5R1Z5']]  # small
        # gpu7 = [['D7G3Y4RVNrH', 'B6ByNegPMKs']]
        #
        # scene_splits = gpu0 + gpu1 + gpu2 + gpu3 + gpu4 + gpu5 + gpu6 + gpu7

    for i in range(num_processes):

        task_config = config.clone()
        task_config.defrost()
        task_config.DATASET.CONTENT_SCENES = scene_splits[i]

        task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_IDS[
            i % len(config.SIMULATOR_GPU_IDS)
        ]

        task_config.freeze()

        configs.append(task_config.clone())

    return configs


def distribute(
    data: List[str],
    scene_splits: List[List],
    num_gpus=8,
    procs_per_gpu=4,
    proc_offset=0,
    scenes_per_process=1,
) -> None:
    for idx, scene in enumerate(data):
        i = (idx // num_gpus) % scenes_per_process
        j = idx % num_gpus
        scene_splits[j * procs_per_gpu + i + proc_offset].append(scene)


def get_habitat_config(path: str):
    assert (
        path[-4:].lower() == ".yml" or path[-5:].lower() == ".yaml"
    ), f"path ({path}) must be a .yml or .yaml file."

    if not os.path.isabs(path):
        candidate_paths = [
            os.path.join(d, path)
            for d in [os.getcwd(), HABITAT_BASE, HABITAT_CONFIGS_DIR]
        ]
        success = False
        for candidate_path in candidate_paths:
            if os.path.exists(candidate_path):
                success = True
                path = candidate_path
                break

        if not success:
            raise FileExistsError(
                f"Could not find config file with given relative path {path}. Tried the following possible absolute"
                f" paths {candidate_paths}."
            )
    elif not os.path.exists(path):
        raise FileExistsError(f"Could not find config file with given path {path}.")

    return habitat.get_config(path)


================================================
FILE: allenact_plugins/habitat_plugin/scripts/__init__.py
================================================


================================================
FILE: allenact_plugins/habitat_plugin/scripts/agent_demo.py
================================================
import os

import cv2
import habitat
from pyquaternion import Quaternion

from allenact_plugins.habitat_plugin.habitat_constants import (
    HABITAT_CONFIGS_DIR,
    HABITAT_DATASETS_DIR,
    HABITAT_SCENE_DATASETS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_utils import get_habitat_config

FORWARD_KEY = "w"
LEFT_KEY = "a"
RIGHT_KEY = "d"
FINISH = "f"


def transform_rgb_bgr(image):
    return image[:, :, [2, 1, 0]]


def agent_demo():
    config = get_habitat_config(
        os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav.yaml")
    )
    config.defrost()
    config.DATASET.DATA_PATH = os.path.join(
        HABITAT_DATASETS_DIR, "pointnav/gibson/v1/train/train.json.gz"
    )
    config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR

    config.DATASET.CONTENT_SCENES = ["Adrian"]

    config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = 0

    config.freeze()
    env = habitat.Env(config=config)

    print("Environment creation successful")
    observations = env.reset()
    cv2.imshow("RGB", transform_rgb_bgr(observations["rgb"]))

    print("Agent stepping around inside environment.")

    count_steps = 0
    action = None
    while not env.episode_over:
        keystroke = cv2.waitKey(0)

        if keystroke == ord(FORWARD_KEY):
            action = 1
            print("action: FORWARD")
        elif keystroke == ord(LEFT_KEY):
            action = 2
            print("action: LEFT")
        elif keystroke == ord(RIGHT_KEY):
            action = 3
            print("action: RIGHT")
        elif keystroke == ord(FINISH):
            action = 0
            print("action: FINISH")
        else:
            print("INVALID KEY")
            continue

        observations = env.step(action)
        count_steps += 1

        print("Position:", env.sim.get_agent_state().position)
        print("Quaternions:", env.sim.get_agent_state().rotation)
        quat = Quaternion(env.sim.get_agent_state().rotation.components)
        print(quat.radians)
        cv2.imshow("RGB", transform_rgb_bgr(observations["rgb"]))

    print("Episode finished after {} steps.".format(count_steps))

    if action == habitat.SimulatorActions.STOP and observations["pointgoal"][0] < 0.2:
        print("you successfully navigated to destination point")
    else:
        print("your navigation was unsuccessful")


if __name__ == "__main__":
    agent_demo()


================================================
FILE: allenact_plugins/habitat_plugin/scripts/make_map.py
================================================
import os

import habitat
import numpy as np
from tqdm import tqdm

from allenact_plugins.habitat_plugin.habitat_constants import (
    HABITAT_CONFIGS_DIR,
    HABITAT_DATA_BASE,
    HABITAT_SCENE_DATASETS_DIR,
    HABITAT_DATASETS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_utils import get_habitat_config

map_resolution = 0.05
map_size = 960


def make_map(env, scene):
    vacancy_map = np.zeros([map_size, map_size], dtype=bool)
    for i in tqdm(range(map_size)):
        for j in range(map_size):
            x = (i - map_size // 2) * map_resolution
            z = (j - map_size // 2) * map_resolution
            vacancy_map[j, i] = env.sim.is_navigable([x, 0.0, z])

    np.save(
        os.path.join(HABITAT_DATA_BASE, "map_data/pointnav/v1/gibson/data/" + scene),
        vacancy_map,
    )


def generate_maps():
    config = get_habitat_config(
        os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav.yaml")
    )
    config.defrost()
    config.DATASET.DATA_PATH = os.path.join(
        HABITAT_DATASETS_DIR, "pointnav/gibson/v1/train/train.json.gz"
    )
    config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR
    config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = 0
    config.freeze()

    dataset = habitat.make_dataset(config.DATASET.TYPE)
    scenes = dataset.get_scenes_to_load(config.DATASET)

    for scene in scenes:
        print("Making environment for:", scene)
        config.defrost()
        config.DATASET.CONTENT_SCENES = [scene]
        config.freeze()
        env = habitat.Env(config=config)
        make_map(env, scene)
        env.close()


if __name__ == "__main__":
    generate_maps()


================================================
FILE: allenact_plugins/ithor_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker

with ImportChecker(
    "Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`)."
):
    # noinspection PyUnresolvedReferences
    import ai2thor


================================================
FILE: allenact_plugins/ithor_plugin/extra_environment.yml
================================================
channels:
  - defaults
  - conda-forge
dependencies:
  - ai2thor>=2.5.3
  - numba
  - pip
  - colour
  - packaging
  - pip:
      - numpy-quaternion
      - pyquaternion>=0.9.9
      - python-xlib


================================================
FILE: allenact_plugins/ithor_plugin/extra_requirements.txt
================================================
ai2thor>=2.5.3
numpy-quaternion
pyquaternion>=0.9.9
colour
numba
packaging
python-xlib


================================================
FILE: allenact_plugins/ithor_plugin/ithor_constants.py
================================================
"""Common constants used when training agents to complete tasks in iTHOR, the
interactive version of AI2-THOR."""

from collections import OrderedDict
from typing import Set, Dict

MOVE_AHEAD = "MoveAhead"
ROTATE_LEFT = "RotateLeft"
ROTATE_RIGHT = "RotateRight"
LOOK_DOWN = "LookDown"
LOOK_UP = "LookUp"
END = "End"

VISIBILITY_DISTANCE = 1.25
FOV = 90.0

ORDERED_SCENE_TYPES = ("kitchens", "livingrooms", "bedrooms", "bathrooms")

NUM_SCENE_TYPES = len(ORDERED_SCENE_TYPES)


def make_scene_name(type_ind, scene_num):
    if type_ind == 1:
        return "FloorPlan" + str(scene_num) + "_physics"
    elif scene_num < 10:
        return "FloorPlan" + str(type_ind) + "0" + str(scene_num) + "_physics"
    else:
        return "FloorPlan" + str(type_ind) + str(scene_num) + "_physics"


SCENES_TYPE_TO_SCENE_NAMES = OrderedDict(
    [
        (
            ORDERED_SCENE_TYPES[type_ind - 1],
            tuple(
                make_scene_name(type_ind=type_ind, scene_num=scene_num)
                for scene_num in range(1, 31)
            ),
        )
        for type_ind in range(1, NUM_SCENE_TYPES + 1)
    ]
)

SCENES_TYPE_TO_TRAIN_SCENE_NAMES = OrderedDict(
    (key, scenes[:20]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items()
)

SCENES_TYPE_TO_VALID_SCENE_NAMES = OrderedDict(
    (key, scenes[20:25]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items()
)

SCENES_TYPE_TO_TEST_SCENE_NAMES = OrderedDict(
    (key, scenes[25:30]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items()
)

ALL_SCENE_NAMES = sum(SCENES_TYPE_TO_SCENE_NAMES.values(), tuple())

TRAIN_SCENE_NAMES = sum(
    (scenes for scenes in SCENES_TYPE_TO_TRAIN_SCENE_NAMES.values()), tuple()
)

VALID_SCENE_NAMES = sum(
    (scenes for scenes in SCENES_TYPE_TO_VALID_SCENE_NAMES.values()), tuple()
)
TEST_SCENE_NAMES = sum(
    (scenes for scenes in SCENES_TYPE_TO_TEST_SCENE_NAMES.values()), tuple()
)

TRAIN_SCENE_NAMES_SET = set(TRAIN_SCENE_NAMES)
VALID_SCENE_NAMES_SET = set(VALID_SCENE_NAMES)
TEST_SCENE_NAMES_SET = set(TEST_SCENE_NAMES)

_object_type_and_location_tsv = """
AlarmClock	bedrooms
Apple	kitchens
ArmChair	livingrooms,bedrooms
BaseballBat	bedrooms
BasketBall	bedrooms
Bathtub	bathrooms
BathtubBasin	bathrooms
Bed	bedrooms
Blinds	kitchens,bedrooms
Book	kitchens,livingrooms,bedrooms
Boots	livingrooms,bedrooms
Bottle	kitchens
Bowl	kitchens,livingrooms,bedrooms
Box	livingrooms,bedrooms
Bread	kitchens
ButterKnife	kitchens
Cabinet	kitchens,livingrooms,bedrooms,bathrooms
Candle	livingrooms,bathrooms
Cart	bathrooms
CD	bedrooms
CellPhone	kitchens,livingrooms,bedrooms
Chair	kitchens,livingrooms,bedrooms
Cloth	bedrooms,bathrooms
CoffeeMachine	kitchens
CoffeeTable	livingrooms,bedrooms
CounterTop	kitchens,livingrooms,bedrooms,bathrooms
CreditCard	kitchens,livingrooms,bedrooms
Cup	kitchens
Curtains	kitchens,livingrooms,bedrooms
Desk	bedrooms
DeskLamp	livingrooms,bedrooms
DiningTable	kitchens,livingrooms,bedrooms
DishSponge	kitchens,bathrooms
Drawer	kitchens,livingrooms,bedrooms,bathrooms
Dresser	livingrooms,bedrooms,bathrooms
Egg	kitchens
Faucet	kitchens,bathrooms
FloorLamp	livingrooms,bedrooms
Footstool	bedrooms
Fork	kitchens
Fridge	kitchens
GarbageCan	kitchens,livingrooms,bedrooms,bathrooms
HandTowel	bathrooms
HandTowelHolder	bathrooms
HousePlant	kitchens,livingrooms,bedrooms,bathrooms
Kettle	kitchens
KeyChain	livingrooms,bedrooms
Knife	kitchens
Ladle	kitchens
Laptop	kitchens,livingrooms,bedrooms
LaundryHamper	bedrooms
LaundryHamperLid	bedrooms
Lettuce	kitchens
LightSwitch	kitchens,livingrooms,bedrooms,bathrooms
Microwave	kitchens
Mirror	kitchens,livingrooms,bedrooms,bathrooms
Mug	kitchens,bedrooms
Newspaper	livingrooms
Ottoman	livingrooms,bedrooms
Painting	kitchens,livingrooms,bedrooms,bathrooms
Pan	kitchens
PaperTowel	kitchens,bathrooms
Pen	kitchens,livingrooms,bedrooms
Pencil	kitchens,livingrooms,bedrooms
PepperShaker	kitchens
Pillow	livingrooms,bedrooms
Plate	kitchens,livingrooms
Plunger	bathrooms
Poster	bedrooms
Pot	kitchens
Potato	kitchens
RemoteControl	livingrooms,bedrooms
Safe	kitchens,livingrooms,bedrooms
SaltShaker	kitchens
ScrubBrush	bathrooms
Shelf	kitchens,livingrooms,bedrooms,bathrooms
ShowerCurtain	bathrooms
ShowerDoor	bathrooms
ShowerGlass	bathrooms
ShowerHead	bathrooms
SideTable	livingrooms,bedrooms
Sink	kitchens,bathrooms
SinkBasin	kitchens,bathrooms
SoapBar	bathrooms
SoapBottle	kitchens,bathrooms
Sofa	livingrooms,bedrooms
Spatula	kitchens
Spoon	kitchens
SprayBottle	bathrooms
Statue	kitchens,livingrooms,bedrooms
StoveBurner	kitchens
StoveKnob	kitchens
TeddyBear	bedrooms
Television	livingrooms,bedrooms
TennisRacket	bedrooms
TissueBox	livingrooms,bedrooms,bathrooms
Toaster	kitchens
Toilet	bathrooms
ToiletPaper	bathrooms
ToiletPaperHanger	bathrooms
Tomato	kitchens
Towel	bathrooms
TowelHolder	bathrooms
TVStand	livingrooms
Vase	kitchens,livingrooms,bedrooms
Watch	livingrooms,bedrooms
WateringCan	livingrooms
Window	kitchens,livingrooms,bedrooms,bathrooms
WineBottle	kitchens
"""

OBJECT_TYPE_TO_SCENE_TYPES = OrderedDict()
for ot_tab_scene_types in _object_type_and_location_tsv.split("\n"):
    if ot_tab_scene_types != "":
        ot, scene_types_csv = ot_tab_scene_types.split("\t")
        OBJECT_TYPE_TO_SCENE_TYPES[ot] = tuple(sorted(scene_types_csv.split(",")))

SCENE_TYPE_TO_OBJECT_TYPES: Dict[str, Set[str]] = OrderedDict(
    ((k, set()) for k in ORDERED_SCENE_TYPES)
)
for ot_tab_scene_types in _object_type_and_location_tsv.split("\n"):
    if ot_tab_scene_types != "":
        ot, scene_types_csv = ot_tab_scene_types.split("\t")
        for scene_type in scene_types_csv.split(","):
            SCENE_TYPE_TO_OBJECT_TYPES[scene_type].add(ot)


================================================
FILE: allenact_plugins/ithor_plugin/ithor_environment.py
================================================
"""A wrapper for engaging with the THOR environment."""

import copy
import functools
import math
import random
from typing import Tuple, Dict, List, Set, Union, Any, Optional, Mapping, cast

import ai2thor.server
import networkx as nx
import numpy as np
from ai2thor.controller import Controller
from scipy.spatial.transform import Rotation

from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV
from allenact_plugins.ithor_plugin.ithor_util import round_to_factor


class IThorEnvironment(object):
    """Wrapper for the ai2thor controller providing additional functionality
    and bookkeeping.

    See [here](https://ai2thor.allenai.org/documentation/installation) for comprehensive
     documentation on AI2-THOR.

    # Attributes

    controller : The ai2thor controller.
    """

    def __init__(
        self,
        x_display: Optional[str] = None,
        docker_enabled: bool = False,
        local_thor_build: Optional[str] = None,
        visibility_distance: float = VISIBILITY_DISTANCE,
        fov: float = FOV,
        player_screen_width: int = 300,
        player_screen_height: int = 300,
        quality: str = "Very Low",
        restrict_to_initially_reachable_points: bool = False,
        make_agents_visible: bool = True,
        object_open_speed: float = 1.0,
        simplify_physics: bool = False,
    ) -> None:
        """Initializer.

        # Parameters

        x_display : The x display into which to launch ai2thor (possibly necessarily if you are running on a server
            without an attached display).
        docker_enabled : Whether or not to run thor in a docker container (useful on a server without an attached
            display so that you don't have to start an x display).
        local_thor_build : The path to a local build of ai2thor. This is probably not necessary for your use case
            and can be safely ignored.
        visibility_distance : The distance (in meters) at which objects, in the viewport of the agent,
            are considered visible by ai2thor and will have their "visible" flag be set to `True` in the metadata.
        fov : The agent's camera's field of view.
        player_screen_width : The width resolution (in pixels) of the images returned by ai2thor.
        player_screen_height : The height resolution (in pixels) of the images returned by ai2thor.
        quality : The quality at which to render. Possible quality settings can be found in
            `ai2thor._quality_settings.QUALITY_SETTINGS`.
        restrict_to_initially_reachable_points : Whether or not to restrict the agent to locations in ai2thor
            that were found to be (initially) reachable by the agent (i.e. reachable by the agent after resetting
            the scene). This can be useful if you want to ensure there are only a fixed set of locations where the
            agent can go.
        make_agents_visible : Whether or not the agent should be visible. Most noticable when there are multiple agents
            or when quality settings are high so that the agent casts a shadow.
        object_open_speed : How quickly objects should be opened. High speeds mean faster simulation but also mean
            that opening objects have a lot of kinetic energy and can, possibly, knock other objects away.
        simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object
            interactions when opening drawers (when simplified, objects within a drawer do not slide around on
            their own when the drawer is opened or closed, instead they are effectively glued down).
        """

        self._start_player_screen_width = player_screen_width
        self._start_player_screen_height = player_screen_height
        self._local_thor_build = local_thor_build
        self.x_display = x_display
        self.controller: Optional[Controller] = None
        self._started = False
        self._quality = quality

        self._initially_reachable_points: Optional[List[Dict]] = None
        self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None
        self._move_mag: Optional[float] = None
        self._grid_size: Optional[float] = None
        self._visibility_distance = visibility_distance
        self._fov = fov
        self.restrict_to_initially_reachable_points = (
            restrict_to_initially_reachable_points
        )
        self.make_agents_visible = make_agents_visible
        self.object_open_speed = object_open_speed
        self._always_return_visible_range = False
        self.simplify_physics = simplify_physics

        self.start(None)
        # noinspection PyTypeHints
        self.controller.docker_enabled = docker_enabled  # type: ignore

    @property
    def scene_name(self) -> str:
        """Current ai2thor scene."""
        return self.controller.last_event.metadata["sceneName"]

    @property
    def current_frame(self) -> np.ndarray:
        """Returns rgb image corresponding to the agent's egocentric view."""
        return self.controller.last_event.frame

    @property
    def last_event(self) -> ai2thor.server.Event:
        """Last event returned by the controller."""
        return self.controller.last_event

    @property
    def started(self) -> bool:
        """Has the ai2thor controller been started."""
        return self._started

    @property
    def last_action(self) -> str:
        """Last action, as a string, taken by the agent."""
        return self.controller.last_event.metadata["lastAction"]

    @last_action.setter
    def last_action(self, value: str) -> None:
        """Set the last action taken by the agent.

        Doing this is rewriting history, be careful.
        """
        self.controller.last_event.metadata["lastAction"] = value

    @property
    def last_action_success(self) -> bool:
        """Was the last action taken by the agent a success?"""
        return self.controller.last_event.metadata["lastActionSuccess"]

    @last_action_success.setter
    def last_action_success(self, value: bool) -> None:
        """Set whether or not the last action taken by the agent was a success.

        Doing this is rewriting history, be careful.
        """
        self.controller.last_event.metadata["lastActionSuccess"] = value

    @property
    def last_action_return(self) -> Any:
        """Get the value returned by the last action (if applicable).

        For an example of an action that returns a value, see
        `"GetReachablePositions"`.
        """
        return self.controller.last_event.metadata["actionReturn"]

    @last_action_return.setter
    def last_action_return(self, value: Any) -> None:
        """Set the value returned by the last action.

        Doing this is rewriting history, be careful.
        """
        self.controller.last_event.metadata["actionReturn"] = value

    def start(
        self,
        scene_name: Optional[str],
        move_mag: float = 0.25,
        **kwargs,
    ) -> None:
        """Starts the ai2thor controller if it was previously stopped.

        After starting, `reset` will be called with the scene name and move magnitude.

        # Parameters

        scene_name : The scene to load.
        move_mag : The amount of distance the agent moves in a single `MoveAhead` step.
        kwargs : additional kwargs, passed to reset.
        """
        if self._started:
            raise RuntimeError(
                "Trying to start the environment but it is already started."
            )

        # noinspection PyUnresolvedReferences
        self.controller = Controller(
            x_display=self.x_display,
            width=self._start_player_screen_width,
            height=self._start_player_screen_height,
            local_executable_path=self._local_thor_build,
            quality=self._quality,
            server_class=ai2thor.fifo_server.FifoServer,
        )

        if (
            self._start_player_screen_height,
            self._start_player_screen_width,
        ) != self.current_frame.shape[:2]:
            self.controller.step(
                {
                    "action": "ChangeResolution",
                    "x": self._start_player_screen_width,
                    "y": self._start_player_screen_height,
                }
            )

        self._started = True
        self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs)

    def stop(self) -> None:
        """Stops the ai2thor controller."""
        try:
            self.controller.stop()
        except Exception as e:
            get_logger().warning(str(e))
        finally:
            self._started = False

    def reset(
        self,
        scene_name: Optional[str],
        move_mag: float = 0.25,
        **kwargs,
    ):
        """Resets the ai2thor in a new scene.

        Resets ai2thor into a new scene and initializes the scene/agents with
        prespecified settings (e.g. move magnitude).

        # Parameters

        scene_name : The scene to load.
        move_mag : The amount of distance the agent moves in a single `MoveAhead` step.
        kwargs : additional kwargs, passed to the controller "Initialize" action.
        """
        self._move_mag = move_mag
        self._grid_size = self._move_mag

        if scene_name is None:
            scene_name = self.controller.last_event.metadata["sceneName"]
        self.controller.reset(scene_name)

        self.controller.step(
            {
                "action": "Initialize",
                "gridSize": self._grid_size,
                "visibilityDistance": self._visibility_distance,
                "fieldOfView": self._fov,
                "makeAgentsVisible": self.make_agents_visible,
                "alwaysReturnVisibleRange": self._always_return_visible_range,
                **kwargs,
            }
        )

        if self.object_open_speed != 1.0:
            self.controller.step(
                {"action": "ChangeOpenSpeed", "x": self.object_open_speed}
            )

        self._initially_reachable_points = None
        self._initially_reachable_points_set = None
        self.controller.step({"action": "GetReachablePositions"})
        if not self.controller.last_event.metadata["lastActionSuccess"]:
            get_logger().warning(
                "Error when getting reachable points: {}".format(
                    self.controller.last_event.metadata["errorMessage"]
                )
            )
        self._initially_reachable_points = self.last_action_return

    def teleport_agent_to(
        self,
        x: float,
        y: float,
        z: float,
        rotation: float,
        horizon: float,
        standing: Optional[bool] = None,
        force_action: bool = False,
        only_initially_reachable: Optional[bool] = None,
        verbose=True,
        ignore_y_diffs=False,
    ) -> None:
        """Helper function teleporting the agent to a given location."""
        if standing is None:
            standing = self.last_event.metadata.get(
                "isStanding", self.last_event.metadata["agent"].get("isStanding")
            )
        original_location = self.get_agent_location()
        target = {"x": x, "y": y, "z": z}
        if only_initially_reachable is None:
            only_initially_reachable = self.restrict_to_initially_reachable_points
        if only_initially_reachable:
            reachable_points = self.initially_reachable_points
            reachable = False
            for p in reachable_points:
                if self.position_dist(target, p, ignore_y=ignore_y_diffs) < 0.01:
                    reachable = True
                    break
            if not reachable:
                self.last_action = "TeleportFull"
                self.last_event.metadata["errorMessage"] = (
                    "Target position was not initially reachable."
                )
                self.last_action_success = False
                return
        self.controller.step(
            dict(
                action="TeleportFull",
                x=x,
                y=y,
                z=z,
                rotation={"x": 0.0, "y": rotation, "z": 0.0},
                horizon=horizon,
                standing=standing,
                forceAction=force_action,
            )
        )
        if not self.last_action_success:
            agent_location = self.get_agent_location()
            rot_diff = (
                agent_location["rotation"] - original_location["rotation"]
            ) % 360
            new_old_dist = self.position_dist(
                original_location, agent_location, ignore_y=ignore_y_diffs
            )
            if (
                self.position_dist(
                    original_location, agent_location, ignore_y=ignore_y_diffs
                )
                > 1e-2
                or min(rot_diff, 360 - rot_diff) > 1
            ):
                get_logger().warning(
                    "Teleportation FAILED but agent still moved (position_dist {}, rot diff {})"
                    " (\nprevious location\n{}\ncurrent_location\n{}\n)".format(
                        new_old_dist, rot_diff, original_location, agent_location
                    )
                )
            return

        if force_action:
            assert self.last_action_success
            return

        agent_location = self.get_agent_location()
        rot_diff = (agent_location["rotation"] - rotation) % 360
        if (
            self.position_dist(agent_location, target, ignore_y=ignore_y_diffs) > 1e-2
            or min(rot_diff, 360 - rot_diff) > 1
        ):
            if only_initially_reachable:
                self._snap_agent_to_initially_reachable(verbose=False)
            if verbose:
                get_logger().warning(
                    "Teleportation did not place agent"
                    " precisely where desired in scene {}"
                    " (\ndesired\n{}\nactual\n{}\n)"
                    " perhaps due to grid snapping."
                    " Action is considered failed but agent may have moved.".format(
                        self.scene_name,
                        {
                            "x": x,
                            "y": y,
                            "z": z,
                            "rotation": rotation,
                            "standing": standing,
                            "horizon": horizon,
                        },
                        agent_location,
                    )
                )
            self.last_action_success = False
        return

    def random_reachable_state(self, seed: int = None) -> Dict:
        """Returns a random reachable location in the scene."""
        if seed is not None:
            random.seed(seed)
        xyz = random.choice(self.currently_reachable_points)
        rotation = random.choice([0, 90, 180, 270])
        horizon = random.choice([0, 30, 60, 330])
        state = copy.copy(xyz)
        state["rotation"] = rotation
        state["horizon"] = horizon
        return state

    def randomize_agent_location(
        self, seed: int = None, partial_position: Optional[Dict[str, float]] = None
    ) -> Dict:
        """Teleports the agent to a random reachable location in the scene."""
        if partial_position is None:
            partial_position = {}
        k = 0
        state: Optional[Dict] = None

        while k == 0 or (not self.last_action_success and k < 10):
            state = self.random_reachable_state(seed=seed)
            self.teleport_agent_to(**{**state, **partial_position})
            k += 1

        if not self.last_action_success:
            get_logger().warning(
                (
                    "Randomize agent location in scene {}"
                    " with seed {} and partial position {} failed in "
                    "10 attempts. Forcing the action."
                ).format(self.scene_name, seed, partial_position)
            )
            self.teleport_agent_to(**{**state, **partial_position}, force_action=True)  # type: ignore
            assert self.last_action_success

        assert state is not None
        return state

    def object_pixels_in_frame(
        self, object_id: str, hide_all: bool = True, hide_transparent: bool = False
    ) -> np.ndarray:
        """Return an mask for a given object in the agent's current view.

        # Parameters

        object_id : The id of the object.
        hide_all : Whether or not to hide all other objects in the scene before getting the mask.
        hide_transparent : Whether or not partially transparent objects are considered to occlude the object.

        # Returns

        A numpy array of the mask.
        """

        # Emphasizing an object turns it magenta and hides all other objects
        # from view, we can find where the hand object is on the screen by
        # emphasizing it and then scanning across the image for the magenta pixels.
        if hide_all:
            self.step({"action": "EmphasizeObject", "objectId": object_id})
        else:
            self.step({"action": "MaskObject", "objectId": object_id})
            if hide_transparent:
                self.step({"action": "HideTranslucentObjects"})
        # noinspection PyShadowingBuiltins
        filter = np.array([[[255, 0, 255]]])
        object_pixels = 1 * np.all(self.current_frame == filter, axis=2)
        if hide_all:
            self.step({"action": "UnemphasizeAll"})
        else:
            self.step({"action": "UnmaskObject", "objectId": object_id})
            if hide_transparent:
                self.step({"action": "UnhideAllObjects"})
        return object_pixels

    def object_pixels_on_grid(
        self,
        object_id: str,
        grid_shape: Tuple[int, int],
        hide_all: bool = True,
        hide_transparent: bool = False,
    ) -> np.ndarray:
        """Like `object_pixels_in_frame` but counts object pixels in a
        partitioning of the image."""

        def partition(n, num_parts):
            m = n // num_parts
            parts = [m] * num_parts
            num_extra = n % num_parts
            for k in range(num_extra):
                parts[k] += 1
            return parts

        object_pixels = self.object_pixels_in_frame(
            object_id=object_id, hide_all=hide_all, hide_transparent=hide_transparent
        )

        # Divide the current frame into a grid and count the number
        # of hand object pixels in each of the grid squares
        sums_in_blocks: List[List] = []
        frame_shape = self.current_frame.shape[:2]
        row_inds = np.cumsum([0] + partition(frame_shape[0], grid_shape[0]))
        col_inds = np.cumsum([0] + partition(frame_shape[1], grid_shape[1]))
        for i in range(len(row_inds) - 1):
            sums_in_blocks.append([])
            for j in range(len(col_inds) - 1):
                sums_in_blocks[i].append(
                    np.sum(
                        object_pixels[
                            row_inds[i] : row_inds[i + 1], col_inds[j] : col_inds[j + 1]
                        ]
                    )
                )
        return np.array(sums_in_blocks, dtype=np.float32)

    def object_in_hand(self):
        """Object metadata for the object in the agent's hand."""
        inv_objs = self.last_event.metadata["inventoryObjects"]
        if len(inv_objs) == 0:
            return None
        elif len(inv_objs) == 1:
            return self.get_object_by_id(
                self.last_event.metadata["inventoryObjects"][0]["objectId"]
            )
        else:
            raise AttributeError("Must be <= 1 inventory objects.")

    @property
    def initially_reachable_points(self) -> List[Dict[str, float]]:
        """List of {"x": x, "y": y, "z": z} locations in the scene that were
        reachable after initially resetting."""
        assert self._initially_reachable_points is not None
        return copy.deepcopy(self._initially_reachable_points)  # type:ignore

    @property
    def initially_reachable_points_set(self) -> Set[Tuple[float, float]]:
        """Set of (x,z) locations in the scene that were reachable after
        initially resetting."""
        if self._initially_reachable_points_set is None:
            self._initially_reachable_points_set = set()
            for p in self.initially_reachable_points:
                self._initially_reachable_points_set.add(
                    self._agent_location_to_tuple(p)
                )

        return self._initially_reachable_points_set

    @property
    def currently_reachable_points(self) -> List[Dict[str, float]]:
        """List of {"x": x, "y": y, "z": z} locations in the scene that are
        currently reachable."""
        self.step({"action": "GetReachablePositions"})
        return self.last_event.metadata["actionReturn"]  # type:ignore

    def get_agent_location(self) -> Dict[str, Union[float, bool]]:
        """Gets agent's location."""
        metadata = self.controller.last_event.metadata
        location = {
            "x": metadata["agent"]["position"]["x"],
            "y": metadata["agent"]["position"]["y"],
            "z": metadata["agent"]["position"]["z"],
            "rotation": metadata["agent"]["rotation"]["y"],
            "horizon": metadata["agent"]["cameraHorizon"],
            "standing": metadata.get("isStanding", metadata["agent"].get("isStanding")),
        }
        return location

    @staticmethod
    def _agent_location_to_tuple(p: Dict[str, float]) -> Tuple[float, float]:
        return round(p["x"], 2), round(p["z"], 2)

    def _snap_agent_to_initially_reachable(self, verbose=True):
        agent_location = self.get_agent_location()

        end_location_tuple = self._agent_location_to_tuple(agent_location)
        if end_location_tuple in self.initially_reachable_points_set:
            return

        agent_x = agent_location["x"]
        agent_z = agent_location["z"]

        closest_reachable_points = list(self.initially_reachable_points_set)
        closest_reachable_points = sorted(
            closest_reachable_points,
            key=lambda xz: abs(xz[0] - agent_x) + abs(xz[1] - agent_z),
        )

        # In rare cases end_location_tuple might be not considered to be in self.initially_reachable_points_set
        # even when it is, here we check for such cases.
        if (
            math.sqrt(
                (
                    (
                        np.array(closest_reachable_points[0])
                        - np.array(end_location_tuple)
                    )
                    ** 2
                ).sum()
            )
            < 1e-6
        ):
            return

        saved_last_action = self.last_action
        saved_last_action_success = self.last_action_success
        saved_last_action_return = self.last_action_return
        saved_error_message = self.last_event.metadata["errorMessage"]

        # Thor behaves weirdly when the agent gets off of the grid and you
        # try to teleport the agent back to the closest grid location. To
        # get around this we first teleport the agent to random location
        # and then back to where it should be.
        for point in self.initially_reachable_points:
            if abs(agent_x - point["x"]) > 0.1 or abs(agent_z - point["z"]) > 0.1:
                self.teleport_agent_to(
                    rotation=0,
                    horizon=30,
                    **point,
                    only_initially_reachable=False,
                    verbose=False,
                )
                if self.last_action_success:
                    break

        for p in closest_reachable_points:
            self.teleport_agent_to(
                **{**agent_location, "x": p[0], "z": p[1]},
                only_initially_reachable=False,
                verbose=False,
            )
            if self.last_action_success:
                break

        teleport_forced = False
        if not self.last_action_success:
            self.teleport_agent_to(
                **{
                    **agent_location,
                    "x": closest_reachable_points[0][0],
                    "z": closest_reachable_points[0][1],
                },
                force_action=True,
                only_initially_reachable=False,
                verbose=False,
            )
            teleport_forced = True

        self.last_action = saved_last_action
        self.last_action_success = saved_last_action_success
        self.last_action_return = saved_last_action_return
        self.last_event.metadata["errorMessage"] = saved_error_message
        new_agent_location = self.get_agent_location()
        if verbose:
            get_logger().warning(
                (
                    "In {}, at location (x,z)=({},{}) which is not in the set "
                    "of initially reachable points;"
                    " attempting to correct this: agent teleported to (x,z)=({},{}).\n"
                    "Teleportation {} forced."
                ).format(
                    self.scene_name,
                    agent_x,
                    agent_z,
                    new_agent_location["x"],
                    new_agent_location["z"],
                    "was" if teleport_forced else "wasn't",
                )
            )

    def step(
        self,
        action_dict: Optional[Dict[str, Union[str, int, float, Dict]]] = None,
        **kwargs: Union[str, int, float, Dict],
    ) -> ai2thor.server.Event:
        """Take a step in the ai2thor environment."""
        if action_dict is None:
            action_dict = dict()
        action_dict.update(kwargs)

        action = cast(str, action_dict["action"])

        skip_render = "renderImage" in action_dict and not action_dict["renderImage"]
        last_frame: Optional[np.ndarray] = None
        if skip_render:
            last_frame = self.current_frame

        if self.simplify_physics:
            action_dict["simplifyPhysics"] = True

        if "Move" in action and "Hand" not in action:  # type: ignore
            action_dict = {
                **action_dict,
                "moveMagnitude": self._move_mag,
            }  # type: ignore
            start_location = self.get_agent_location()
            sr = self.controller.step(action_dict)

            if self.restrict_to_initially_reachable_points:
                end_location_tuple = self._agent_location_to_tuple(
                    self.get_agent_location()
                )
                if end_location_tuple not in self.initially_reachable_points_set:
                    self.teleport_agent_to(**start_location, force_action=True)  # type: ignore
                    self.last_action = action
                    self.last_action_success = False
                    self.last_event.metadata["errorMessage"] = (
                        "Moved to location outside of initially reachable points."
                    )
        elif "RandomizeHideSeekObjects" in action:
            last_position = self.get_agent_location()
            self.controller.step(action_dict)
            metadata = self.last_event.metadata
            if self.position_dist(last_position, self.get_agent_location()) > 0.001:
                self.teleport_agent_to(**last_position, force_action=True)  # type: ignore
                get_logger().warning(
                    "In scene {}, after randomization of hide and seek objects, agent moved.".format(
                        self.scene_name
                    )
                )

            sr = self.controller.step({"action": "GetReachablePositions"})
            self._initially_reachable_points = self.controller.last_event.metadata[
                "actionReturn"
            ]
            self._initially_reachable_points_set = None
            self.last_action = action
            self.last_action_success = metadata["lastActionSuccess"]
            self.controller.last_event.metadata["actionReturn"] = []
        elif "RotateUniverse" in action:
            sr = self.controller.step(action_dict)
            metadata = self.last_event.metadata

            if metadata["lastActionSuccess"]:
                sr = self.controller.step({"action": "GetReachablePositions"})
                self._initially_reachable_points = self.controller.last_event.metadata[
                    "actionReturn"
                ]
                self._initially_reachable_points_set = None
                self.last_action = action
                self.last_action_success = metadata["lastActionSuccess"]
                self.controller.last_event.metadata["actionReturn"] = []
        else:
            sr = self.controller.step(action_dict)

        if self.restrict_to_initially_reachable_points:
            self._snap_agent_to_initially_reachable()

        if skip_render:
            assert last_frame is not None
            self.last_event.frame = last_frame

        return sr

    @staticmethod
    def position_dist(
        p0: Mapping[str, Any],
        p1: Mapping[str, Any],
        ignore_y: bool = False,
        l1_dist: bool = False,
    ) -> float:
        """Distance between two points of the form {"x": x, "y":y, "z":z"}."""
        if l1_dist:
            return (
                abs(p0["x"] - p1["x"])
                + (0 if ignore_y else abs(p0["y"] - p1["y"]))
                + abs(p0["z"] - p1["z"])
            )
        else:
            return math.sqrt(
                (p0["x"] - p1["x"]) ** 2
                + (0 if ignore_y else (p0["y"] - p1["y"]) ** 2)
                + (p0["z"] - p1["z"]) ** 2
            )

    @staticmethod
    def rotation_dist(a: Dict[str, float], b: Dict[str, float]):
        """Distance between rotations."""

        def deg_dist(d0: float, d1: float):
            dist = (d0 - d1) % 360
            return min(dist, 360 - dist)

        return sum(deg_dist(a[k], b[k]) for k in ["x", "y", "z"])

    @staticmethod
    def angle_between_rotations(a: Dict[str, float], b: Dict[str, float]):
        return np.abs(
            (180 / (2 * math.pi))
            * (
                Rotation.from_euler("xyz", [a[k] for k in "xyz"], degrees=True)
                * Rotation.from_euler("xyz", [b[k] for k in "xyz"], degrees=True).inv()
            ).as_rotvec()
        ).sum()

    def closest_object_with_properties(
        self, properties: Dict[str, Any]
    ) -> Optional[Dict[str, Any]]:
        """Find the object closest to the agent that has the given
        properties."""
        agent_pos = self.controller.last_event.metadata["agent"]["position"]
        min_dist = float("inf")
        closest = None
        for o in self.all_objects():
            satisfies_all = True
            for k, v in properties.items():
                if o[k] != v:
                    satisfies_all = False
                    break
            if satisfies_all:
                d = self.position_dist(agent_pos, o["position"])
                if d < min_dist:
                    min_dist = d
                    closest = o
        return closest

    def closest_visible_object_of_type(
        self, object_type: str
    ) -> Optional[Dict[str, Any]]:
        """Find the object closest to the agent that is visible and has the
        given type."""
        properties = {"visible": True, "objectType": object_type}
        return self.closest_object_with_properties(properties)

    def closest_object_of_type(self, object_type: str) -> Optional[Dict[str, Any]]:
        """Find the object closest to the agent that has the given type."""
        properties = {"objectType": object_type}
        return self.closest_object_with_properties(properties)

    def closest_reachable_point_to_position(
        self, position: Dict[str, float]
    ) -> Tuple[Dict[str, float], float]:
        """Of all reachable positions, find the one that is closest to the
        given location."""
        target = np.array([position["x"], position["z"]])
        min_dist = float("inf")
        closest_point = None
        for pt in self.initially_reachable_points:
            dist = np.linalg.norm(target - np.array([pt["x"], pt["z"]]))
            if dist < min_dist:
                closest_point = pt
                min_dist = dist
                if min_dist < 1e-3:
                    break
        assert closest_point is not None
        return closest_point, min_dist

    @staticmethod
    def _angle_from_to(a_from: float, a_to: float) -> float:
        a_from = a_from % 360
        a_to = a_to % 360
        min_rot = min(a_from, a_to)
        max_rot = max(a_from, a_to)
        rot_across_0 = (360 - max_rot) + min_rot
        rot_not_across_0 = max_rot - min_rot
        rot_err = min(rot_across_0, rot_not_across_0)
        if rot_across_0 == rot_err:
            rot_err *= -1 if a_to > a_from else 1
        else:
            rot_err *= 1 if a_to > a_from else -1
        return rot_err

    def agent_xz_to_scene_xz(self, agent_xz: Dict[str, float]) -> Dict[str, float]:
        agent_pos = self.get_agent_location()

        x_rel_agent = agent_xz["x"]
        z_rel_agent = agent_xz["z"]
        scene_x = agent_pos["x"]
        scene_z = agent_pos["z"]
        rotation = agent_pos["rotation"]
        if abs(rotation) < 1e-5:
            scene_x += x_rel_agent
            scene_z += z_rel_agent
        elif abs(rotation - 90) < 1e-5:
            scene_x += z_rel_agent
            scene_z += -x_rel_agent
        elif abs(rotation - 180) < 1e-5:
            scene_x += -x_rel_agent
            scene_z += -z_rel_agent
        elif abs(rotation - 270) < 1e-5:
            scene_x += -z_rel_agent
            scene_z += x_rel_agent
        else:
            raise Exception("Rotation must be one of 0, 90, 180, or 270.")

        return {"x": scene_x, "z": scene_z}

    def scene_xz_to_agent_xz(self, scene_xz: Dict[str, float]) -> Dict[str, float]:
        agent_pos = self.get_agent_location()
        x_err = scene_xz["x"] - agent_pos["x"]
        z_err = scene_xz["z"] - agent_pos["z"]

        rotation = agent_pos["rotation"]
        if abs(rotation) < 1e-5:
            agent_x = x_err
            agent_z = z_err
        elif abs(rotation - 90) < 1e-5:
            agent_x = -z_err
            agent_z = x_err
        elif abs(rotation - 180) < 1e-5:
            agent_x = -x_err
            agent_z = -z_err
        elif abs(rotation - 270) < 1e-5:
            agent_x = z_err
            agent_z = -x_err
        else:
            raise Exception("Rotation must be one of 0, 90, 180, or 270.")

        return {"x": agent_x, "z": agent_z}

    def all_objects(self) -> List[Dict[str, Any]]:
        """Return all object metadata."""
        return self.controller.last_event.metadata["objects"]

    def all_objects_with_properties(
        self, properties: Dict[str, Any]
    ) -> List[Dict[str, Any]]:
        """Find all objects with the given properties."""
        objects = []
        for o in self.all_objects():
            satisfies_all = True
            for k, v in properties.items():
                if o[k] != v:
                    satisfies_all = False
                    break
            if satisfies_all:
                objects.append(o)
        return objects

    def visible_objects(self) -> List[Dict[str, Any]]:
        """Return all visible objects."""
        return self.all_objects_with_properties({"visible": True})

    def get_object_by_id(self, object_id: str) -> Optional[Dict[str, Any]]:
        for o in self.last_event.metadata["objects"]:
            if o["objectId"] == object_id:
                return o
        return None

    ###
    # Following is used for computing shortest paths between states
    ###
    _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}

    GRAPH_ACTIONS_SET = {"LookUp", "LookDown", "RotateLeft", "RotateRight", "MoveAhead"}

    def reachable_points_with_rotations_and_horizons(self):
        self.controller.step({"action": "GetReachablePositions"})
        assert self.last_action_success

        points_slim = self.last_event.metadata["actionReturn"]

        points = []
        for r in [0, 90, 180, 270]:
            for horizon in [-30, 0, 30, 60]:
                for p in points_slim:
                    p = copy.copy(p)
                    p["rotation"] = r
                    p["horizon"] = horizon
                    points.append(p)
        return points

    @staticmethod
    def location_for_key(key, y_value=0.0):
        x, z, rot, hor = key
        loc = dict(x=x, y=y_value, z=z, rotation=rot, horizon=hor)
        return loc

    @staticmethod
    def get_key(input_dict: Dict[str, Any]) -> Tuple[float, float, int, int]:
        if "x" in input_dict:
            x = input_dict["x"]
            z = input_dict["z"]
            rot = input_dict["rotation"]
            hor = input_dict["horizon"]
        else:
            x = input_dict["position"]["x"]
            z = input_dict["position"]["z"]
            rot = input_dict["rotation"]["y"]
            hor = input_dict["cameraHorizon"]

        return (
            round(x, 2),
            round(z, 2),
            round_to_factor(rot, 90) % 360,
            round_to_factor(hor, 30) % 360,
        )

    def update_graph_with_failed_action(self, failed_action: str):
        if (
            self.scene_name not in self._CACHED_GRAPHS
            or failed_action not in self.GRAPH_ACTIONS_SET
        ):
            return

        source_key = self.get_key(self.last_event.metadata["agent"])
        self._check_contains_key(source_key)

        edge_dict = self.graph[source_key]
        to_remove_key = None
        for target_key in self.graph[source_key]:
            if edge_dict[target_key]["action"] == failed_action:
                to_remove_key = target_key
                break
        if to_remove_key is not None:
            self.graph.remove_edge(source_key, to_remove_key)

    def _add_from_to_edge(
        self,
        g: nx.DiGraph,
        s: Tuple[float, float, int, int],
        t: Tuple[float, float, int, int],
    ):
        def ae(x, y):
            return abs(x - y) < 0.001

        s_x, s_z, s_rot, s_hor = s
        t_x, t_z, t_rot, t_hor = t

        dist = round(math.sqrt((s_x - t_x) ** 2 + (s_z - t_z) ** 2), 2)
        angle_dist = (round_to_factor(t_rot - s_rot, 90) % 360) // 90
        horz_dist = (round_to_factor(t_hor - s_hor, 30) % 360) // 30

        # If source and target differ by more than one action, continue
        if sum(x != 0 for x in [dist, angle_dist, horz_dist]) != 1:
            return

        grid_size = self._grid_size
        action = None
        if angle_dist != 0:
            if angle_dist == 1:
                action = "RotateRight"
            elif angle_dist == 3:
                action = "RotateLeft"

        elif horz_dist != 0:
            if horz_dist == 11:
                action = "LookUp"
            elif horz_dist == 1:
                action = "LookDown"
        elif ae(dist, grid_size):
            if (
                (s_rot == 0 and ae(t_z - s_z, grid_size))
                or (s_rot == 90 and ae(t_x - s_x, grid_size))
                or (s_rot == 180 and ae(t_z - s_z, -grid_size))
                or (s_rot == 270 and ae(t_x - s_x, -grid_size))
            ):
                g.add_edge(s, t, action="MoveAhead")

        if action is not None:
            g.add_edge(s, t, action=action)

    @functools.lru_cache(1)
    def possible_neighbor_offsets(self) -> Tuple[Tuple[float, float, int, int], ...]:
        grid_size = round(self._grid_size, 2)
        offsets = []
        for rot_diff in [-90, 0, 90]:
            for horz_diff in [-30, 0, 30, 60]:
                for x_diff in [-grid_size, 0, grid_size]:
                    for z_diff in [-grid_size, 0, grid_size]:
                        if (rot_diff != 0) + (horz_diff != 0) + (x_diff != 0) + (
                            z_diff != 0
                        ) == 1:
                            offsets.append((x_diff, z_diff, rot_diff, horz_diff))
        return tuple(offsets)

    def _add_node_to_graph(self, graph: nx.DiGraph, s: Tuple[float, float, int, int]):
        if s in graph:
            return

        existing_nodes = set(graph.nodes())
        graph.add_node(s)

        for o in self.possible_neighbor_offsets():
            t = (s[0] + o[0], s[1] + o[1], s[2] + o[2], s[3] + o[3])
            if t in existing_nodes:
                self._add_from_to_edge(graph, s, t)
                self._add_from_to_edge(graph, t, s)

    @property
    def graph(self):
        if self.scene_name not in self._CACHED_GRAPHS:
            g = nx.DiGraph()
            points = self.reachable_points_with_rotations_and_horizons()
            for p in points:
                self._add_node_to_graph(g, self.get_key(p))

            self._CACHED_GRAPHS[self.scene_name] = g
        return self._CACHED_GRAPHS[self.scene_name]

    @graph.setter
    def graph(self, g):
        self._CACHED_GRAPHS[self.scene_name] = g

    def _check_contains_key(self, key: Tuple[float, float, int, int], add_if_not=True):
        if key not in self.graph:
            get_logger().warning(
                "{} was not in the graph for scene {}.".format(key, self.scene_name)
            )
            if add_if_not:
                self._add_node_to_graph(self.graph, key)

    def shortest_state_path(self, source_state_key, goal_state_key):
        self._check_contains_key(source_state_key)
        self._check_contains_key(goal_state_key)
        # noinspection PyBroadException
        try:
            path = nx.shortest_path(self.graph, source_state_key, goal_state_key)
            return path
        except Exception as _:
            return None

    def action_transitioning_between_keys(self, s, t):
        self._check_contains_key(s)
        self._check_contains_key(t)
        if self.graph.has_edge(s, t):
            return self.graph.get_edge_data(s, t)["action"]
        else:
            return None

    def shortest_path_next_state(self, source_state_key, goal_state_key):
        self._check_contains_key(source_state_key)
        self._check_contains_key(goal_state_key)
        if source_state_key == goal_state_key:
            raise RuntimeError("called next state on the same source and goal state")
        state_path = self.shortest_state_path(source_state_key, goal_state_key)
        return state_path[1]

    def shortest_path_next_action(self, source_state_key, goal_state_key):
        self._check_contains_key(source_state_key)
        self._check_contains_key(goal_state_key)

        next_state_key = self.shortest_path_next_state(source_state_key, goal_state_key)
        return self.graph.get_edge_data(source_state_key, next_state_key)["action"]

    def shortest_path_length(self, source_state_key, goal_state_key):
        self._check_contains_key(source_state_key)
        self._check_contains_key(goal_state_key)
        try:
            return nx.shortest_path_length(self.graph, source_state_key, goal_state_key)
        except nx.NetworkXNoPath as _:
            return float("inf")


================================================
FILE: allenact_plugins/ithor_plugin/ithor_sensors.py
================================================
import copy
from functools import reduce
from typing import Any, Dict, Optional, Union, Sequence

import ai2thor.controller
import gym
import gym.spaces
import numpy as np
import torch

from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.embodiedai.mapping.mapping_utils.map_builders import (
    BinnedPointCloudMapBuilder,
    SemanticMapBuilder,
    ObjectHull2d,
)
from allenact.embodiedai.sensors.vision_sensors import RGBSensor
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
from allenact_plugins.ithor_plugin.ithor_util import include_object_data
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask, ObjectNavTask

THOR_ENV_TYPE = Union[
    ai2thor.controller.Controller, IThorEnvironment, RoboThorEnvironment
]
THOR_TASK_TYPE = Union[
    Task[ai2thor.controller.Controller],
    Task[IThorEnvironment],
    Task[RoboThorEnvironment],
]


class RGBSensorThor(RGBSensor[THOR_ENV_TYPE, THOR_TASK_TYPE]):
    """Sensor for RGB images in THOR.

    Returns from a running IThorEnvironment instance, the current RGB
    frame corresponding to the agent's egocentric view.
    """

    def frame_from_env(
        self,
        env: THOR_ENV_TYPE,
        task: Optional[THOR_TASK_TYPE],
    ) -> np.ndarray:  # type:ignore
        if isinstance(env, ai2thor.controller.Controller):
            return env.last_event.frame.copy()
        else:
            return env.current_frame.copy()


class GoalObjectTypeThorSensor(Sensor):
    def __init__(
        self,
        object_types: Sequence[str],
        target_to_detector_map: Optional[Dict[str, str]] = None,
        detector_types: Optional[Sequence[str]] = None,
        uuid: str = "goal_object_type_ind",
        **kwargs: Any,
    ):
        self.ordered_object_types = list(object_types)
        assert self.ordered_object_types == sorted(
            self.ordered_object_types
        ), "object types input to goal object type sensor must be ordered"

        self.target_to_detector_map = target_to_detector_map

        if target_to_detector_map is None:
            self.object_type_to_ind = {
                ot: i for i, ot in enumerate(self.ordered_object_types)
            }
        else:
            assert (
                detector_types is not None
            ), "Missing detector_types for map {}".format(target_to_detector_map)
            self.target_to_detector = target_to_detector_map
            self.detector_types = detector_types

            detector_index = {ot: i for i, ot in enumerate(self.detector_types)}
            self.object_type_to_ind = {
                ot: detector_index[self.target_to_detector[ot]]
                for ot in self.ordered_object_types
            }

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self):
        if self.target_to_detector_map is None:
            return gym.spaces.Discrete(len(self.ordered_object_types))
        else:
            return gym.spaces.Discrete(len(self.detector_types))

    def get_observation(
        self,
        env: IThorEnvironment,
        task: Optional[ObjectNaviThorGridTask],
        *args: Any,
        **kwargs: Any,
    ) -> Any:
        return self.object_type_to_ind[task.task_info["object_type"]]


class TakeEndActionThorNavSensor(
    Sensor[
        Union[RoboThorEnvironment, IThorEnvironment],
        Union[ObjectNaviThorGridTask, ObjectNavTask, PointNavTask],
    ]
):
    def __init__(self, nactions: int, uuid: str, **kwargs: Any) -> None:
        self.nactions = nactions

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self) -> gym.spaces.Discrete:
        """The observation space.

        Equals `gym.spaces.Discrete(2)` where a 0 indicates that the agent
        **should not** take the `End` action and a 1 indicates that the agent
        **should** take the end action.
        """
        return gym.spaces.Discrete(2)

    def get_observation(  # type:ignore
        self,
        env: IThorEnvironment,
        task: Union[ObjectNaviThorGridTask, ObjectNavTask, PointNavTask],
        *args,
        **kwargs,
    ) -> np.ndarray:
        if isinstance(task, ObjectNaviThorGridTask):
            should_end = task.is_goal_object_visible()
        elif isinstance(task, ObjectNavTask):
            should_end = task._is_goal_in_range()
        elif isinstance(task, PointNavTask):
            should_end = task._is_goal_in_range()
        else:
            raise NotImplementedError

        if should_end is None:
            should_end = False
        return np.array([1 * should_end], dtype=np.int64)


class RelativePositionChangeTHORSensor(
    Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]
):
    def __init__(self, uuid: str = "rel_position_change", **kwargs: Any):
        observation_space = gym.spaces.Dict(
            {
                "last_allocentric_position": gym.spaces.Box(
                    low=np.array([-np.inf, -np.inf, 0], dtype=np.float32),
                    high=np.array([np.inf, np.inf, 360], dtype=np.float32),
                    shape=(3,),
                    dtype=np.float32,
                ),
                "dx_dz_dr": gym.spaces.Box(
                    low=np.array([-np.inf, -np.inf, -360], dtype=np.float32),
                    high=np.array([-np.inf, -np.inf, 360], dtype=np.float32),
                    shape=(3,),
                    dtype=np.float32,
                ),
            }
        )
        super().__init__(**prepare_locals_for_super(locals()))

        self.last_xzr: Optional[np.ndarray] = None

    @staticmethod
    def get_relative_position_change(from_xzr: np.ndarray, to_xzr: np.ndarray):
        dx_dz_dr = to_xzr - from_xzr

        # Transform dx, dz (in global coordinates) into the relative coordinates
        # given by rotation r0=from_xzr[-2]. This requires rotating everything so that
        # r0 is facing in the positive z direction. Since thor rotations are negative
        # the usual rotation direction this means we want to rotate by r0 degrees.
        theta = np.pi * from_xzr[-1] / 180
        cos_theta = np.cos(theta)
        sin_theta = np.sin(theta)

        dx_dz_dr = (
            np.array(
                [
                    [cos_theta, -sin_theta, 0],
                    [sin_theta, cos_theta, 0],
                    [0, 0, 1],  # Don't change dr
                ]
            )
            @ dx_dz_dr.reshape(-1, 1)
        ).reshape(-1)

        dx_dz_dr[-1] = dx_dz_dr[-1] % 360
        return dx_dz_dr

    def get_observation(
        self,
        env: RoboThorEnvironment,
        task: Optional[Task[RoboThorEnvironment]],
        *args: Any,
        **kwargs: Any,
    ) -> Any:

        if task.num_steps_taken() == 0:
            p = env.controller.last_event.metadata["agent"]["position"]
            r = env.controller.last_event.metadata["agent"]["rotation"]["y"]
            self.last_xzr = np.array([p["x"], p["z"], r % 360])

        p = env.controller.last_event.metadata["agent"]["position"]
        r = env.controller.last_event.metadata["agent"]["rotation"]["y"]
        current_xzr = np.array([p["x"], p["z"], r % 360])

        dx_dz_dr = self.get_relative_position_change(
            from_xzr=self.last_xzr, to_xzr=current_xzr
        )

        to_return = {"last_allocentric_position": self.last_xzr, "dx_dz_dr": dx_dz_dr}

        self.last_xzr = current_xzr

        return to_return


class ReachableBoundsTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):
    def __init__(self, margin: float, uuid: str = "scene_bounds", **kwargs: Any):
        observation_space = gym.spaces.Dict(
            {
                "x_range": gym.spaces.Box(
                    low=np.array([-np.inf, -np.inf], dtype=np.float32),
                    high=np.array([np.inf, np.inf], dtype=np.float32),
                    shape=(2,),
                    dtype=np.float32,
                ),
                "z_range": gym.spaces.Box(
                    low=np.array([-np.inf, -np.inf], dtype=np.float32),
                    high=np.array([np.inf, np.inf], dtype=np.float32),
                    shape=(2,),
                    dtype=np.float32,
                ),
            }
        )
        super().__init__(**prepare_locals_for_super(locals()))

        self.margin = margin
        self._bounds_cache = {}

    @staticmethod
    def get_bounds(
        controller: ai2thor.controller.Controller,
        margin: float,
    ) -> Dict[str, np.ndarray]:
        positions = controller.step("GetReachablePositions").metadata["actionReturn"]
        min_x = min(p["x"] for p in positions)
        max_x = max(p["x"] for p in positions)
        min_z = min(p["z"] for p in positions)
        max_z = max(p["z"] for p in positions)

        return {
            "x_range": np.array([min_x - margin, max_x + margin]),
            "z_range": np.array([min_z - margin, max_z + margin]),
        }

    def get_observation(
        self,
        env: RoboThorEnvironment,
        task: Optional[Task[RoboThorEnvironment]],
        *args: Any,
        **kwargs: Any,
    ) -> Any:
        if isinstance(env, ai2thor.controller.Controller):
            controller = env
        else:
            controller = env.controller

        scene_name = controller.last_event.metadata["sceneName"]
        if scene_name not in self._bounds_cache:
            self._bounds_cache[scene_name] = self.get_bounds(
                controller=controller, margin=self.margin
            )

        return copy.deepcopy(self._bounds_cache[scene_name])


class SceneBoundsTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):
    def __init__(self, uuid: str = "scene_bounds", **kwargs: Any):
        observation_space = gym.spaces.Dict(
            {
                "x_range": gym.spaces.Box(
                    low=np.array([-np.inf, -np.inf]),
                    high=np.array([np.inf, np.inf]),
                    shape=(2,),
                    dtype=np.float32,
                ),
                "z_range": gym.spaces.Box(
                    low=np.array([-np.inf, -np.inf]),
                    high=np.array([np.inf, np.inf]),
                    shape=(2,),
                    dtype=np.float32,
                ),
            }
        )
        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(
        self,
        env: RoboThorEnvironment,
        task: Optional[Task[RoboThorEnvironment]],
        *args: Any,
        **kwargs: Any,
    ) -> Any:
        scene_bounds = env.controller.last_event.metadata["sceneBounds"]
        center = scene_bounds["center"]
        size = scene_bounds["size"]

        return {
            "x_range": np.array(
                [center["x"] - size["x"] / 2, center["x"] + size["x"] / 2]
            ),
            "z_range": np.array(
                [center["z"] - size["z"] / 2, center["z"] + size["z"] / 2]
            ),
        }


class BinnedPointCloudMapTHORSensor(
    Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]
):
    observation_space = gym.spaces.Dict

    def __init__(
        self,
        fov: Optional[float],
        vision_range_in_cm: int,
        map_size_in_cm: int,
        resolution_in_cm: int,
        map_range_sensor: Sensor,
        return_egocentric_local_context: bool = False,
        height_bins: Sequence[float] = (0.02, 2),
        ego_only: bool = True,
        exclude_agent: bool = False,
        uuid: str = "binned_pc_map",
        device: torch.device = torch.device("cpu"),
        **kwargs: Any,
    ):
        self.fov = fov
        self.vision_range_in_cm = vision_range_in_cm
        self.map_size_in_cm = map_size_in_cm
        self.resolution_in_cm = resolution_in_cm
        self.height_bins = height_bins
        self.ego_only = ego_only
        self.return_egocentric_local_context = return_egocentric_local_context
        self.exclude_agent = exclude_agent

        self.binned_pc_map_builder = BinnedPointCloudMapBuilder(
            fov=fov,
            vision_range_in_cm=vision_range_in_cm,
            map_size_in_cm=map_size_in_cm,
            resolution_in_cm=resolution_in_cm,
            height_bins=height_bins,
            return_egocentric_local_context=return_egocentric_local_context,
        )
        self.device = device

        big_map_space = gym.spaces.Box(
            low=0,
            high=np.inf,
            shape=self.binned_pc_map_builder.binned_point_cloud_map.shape,
            dtype=np.float32,
        )
        local_map_space = gym.spaces.Box(
            low=0,
            high=np.inf,
            shape=(self.binned_pc_map_builder.vision_range_in_map_units,) * 2
            + self.binned_pc_map_builder.binned_point_cloud_map.shape[-1:],
            dtype=np.float32,
        )

        space_dict = {
            "egocentric_update": local_map_space,
        }
        if self.return_egocentric_local_context:
            space_dict = {
                "egocentric_local_context": copy.deepcopy(local_map_space),
            }
        if not ego_only:
            space_dict["allocentric_update"] = copy.deepcopy(big_map_space)
            space_dict["map"] = copy.deepcopy(big_map_space)

        observation_space = gym.spaces.Dict(space_dict)
        super().__init__(**prepare_locals_for_super(locals()))

        self.map_range_sensor = map_range_sensor

    @property
    def device(self):
        return self.binned_pc_map_builder.device

    @device.setter
    def device(self, val: torch.device):
        self.binned_pc_map_builder.device = torch.device(val)

    def get_observation(
        self,
        env: RoboThorEnvironment,
        task: Optional[Task[RoboThorEnvironment]],
        *args: Any,
        **kwargs: Any,
    ) -> Any:
        if isinstance(env, ai2thor.controller.Controller):
            controller = env
        else:
            controller = env.controller

        e = controller.last_event
        metadata = e.metadata

        if task.num_steps_taken() == 0:
            xz_ranges_dict = self.map_range_sensor.get_observation(env=env, task=task)
            if self.fov is None:
                self.binned_pc_map_builder.fov = e.metadata["fov"]
            self.binned_pc_map_builder.reset(
                min_xyz=np.array(
                    [
                        xz_ranges_dict["x_range"][0],
                        0,  # TODO: Should y be different per scene?
                        xz_ranges_dict["z_range"][0],
                    ]
                )
            )

        depth_frame = e.depth_frame

        if self.exclude_agent:
            depth_frame = depth_frame.copy()
            assert len(e.instance_masks) > 0
            depth_frame[~reduce(np.logical_or, e.instance_masks.values())] = np.nan

        map_dict = self.binned_pc_map_builder.update(
            depth_frame=depth_frame,
            camera_xyz=np.array(
                [metadata["cameraPosition"][k] for k in ["x", "y", "z"]]
            ),
            camera_rotation=metadata["agent"]["rotation"]["y"],
            camera_horizon=metadata["agent"]["cameraHorizon"],
        )
        return {k: map_dict[k] for k in self.observation_space.spaces.keys()}


class SemanticMapTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):
    observation_space = gym.spaces.Dict

    def __init__(
        self,
        fov: float,
        vision_range_in_cm: int,
        map_size_in_cm: int,
        resolution_in_cm: int,
        ordered_object_types: Sequence[str],
        map_range_sensor: Sensor,
        ego_only: bool = True,
        uuid: str = "semantic_map",
        device: torch.device = torch.device("cpu"),
        **kwargs: Any,
    ):
        self.fov = fov
        self.vision_range_in_cm = vision_range_in_cm
        self.map_size_in_cm = map_size_in_cm
        self.resolution_in_cm = resolution_in_cm
        self.ordered_object_types = ordered_object_types
        self.map_range_sensor = map_range_sensor
        self.ego_only = ego_only

        self.semantic_map_builder = SemanticMapBuilder(
            fov=fov,
            vision_range_in_cm=vision_range_in_cm,
            map_size_in_cm=map_size_in_cm,
            resolution_in_cm=resolution_in_cm,
            ordered_object_types=ordered_object_types,
            device=device,
        )

        def get_map_space(nchannels: int, size: int):
            return gym.spaces.Box(
                low=0,
                high=1,
                shape=(size, size, nchannels),
                dtype=np.bool_,
            )

        n = len(self.ordered_object_types)
        small = self.vision_range_in_cm // self.resolution_in_cm
        big = self.semantic_map_builder.ground_truth_semantic_map.shape[0]

        space_dict = {
            "egocentric_update": get_map_space(
                nchannels=n,
                size=small,
            ),
            "egocentric_mask": get_map_space(
                nchannels=1,
                size=small,
            ),
        }
        if not ego_only:
            space_dict["explored_mask"] = get_map_space(
                nchannels=1,
                size=big,
            )
            space_dict["map"] = get_map_space(
                nchannels=n,
                size=big,
            )

        observation_space = gym.spaces.Dict(space_dict)
        super().__init__(**prepare_locals_for_super(locals()))

    @property
    def device(self):
        return self.semantic_map_builder.device

    @device.setter
    def device(self, val: torch.device):
        self.semantic_map_builder.device = torch.device(val)

    def get_observation(
        self,
        env: RoboThorEnvironment,
        task: Optional[Task[RoboThorEnvironment]],
        *args: Any,
        **kwargs: Any,
    ) -> Any:
        with include_object_data(env.controller):
            last_event = env.controller.last_event
            metadata = last_event.metadata

            if task.num_steps_taken() == 0:
                env.controller.step(
                    "Get2DSemanticHulls", objectTypes=self.ordered_object_types
                )
                assert env.last_event.metadata[
                    "lastActionSuccess"
                ], f"Get2DSemanticHulls failed with error '{env.last_event.metadata['lastActionSuccess']}'"

                object_id_to_hull = env.controller.last_event.metadata["actionReturn"]

                xz_ranges_dict = self.map_range_sensor.get_observation(
                    env=env, task=task
                )

                self.semantic_map_builder.reset(
                    min_xyz=np.array(
                        [
                            xz_ranges_dict["x_range"][0],
                            0,  # TODO: Should y be different per scene?
                            xz_ranges_dict["z_range"][0],
                        ]
                    ),
                    object_hulls=[
                        ObjectHull2d(
                            object_id=o["objectId"],
                            object_type=o["objectType"],
                            hull_points=object_id_to_hull[o["objectId"]],
                        )
                        for o in metadata["objects"]
                        if o["objectId"] in object_id_to_hull
                    ],
                )

            map_dict = self.semantic_map_builder.update(
                depth_frame=last_event.depth_frame,
                camera_xyz=np.array(
                    [metadata["cameraPosition"][k] for k in ["x", "y", "z"]]
                ),
                camera_rotation=metadata["agent"]["rotation"]["y"],
                camera_horizon=metadata["agent"]["cameraHorizon"],
            )
            return {
                k: map_dict[k] > 0.001 if map_dict[k].dtype != np.bool_ else map_dict[k]
                for k in self.observation_space.spaces.keys()
            }


================================================
FILE: allenact_plugins/ithor_plugin/ithor_task_samplers.py
================================================
import copy
import random
from typing import List, Dict, Optional, Any, Union, cast

import gym

from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import set_deterministic_cudnn, set_seed
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask


class ObjectNavTaskSampler(TaskSampler):
    def __init__(
        self,
        scenes: List[str],
        object_types: str,
        sensors: List[Sensor],
        max_steps: int,
        env_args: Dict[str, Any],
        action_space: gym.Space,
        scene_period: Optional[Union[int, str]] = None,
        max_tasks: Optional[int] = None,
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        **kwargs,
    ) -> None:
        self.env_args = env_args
        self.scenes = scenes
        self.object_types = object_types
        self.grid_size = 0.25
        self.env: Optional[IThorEnvironment] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_space = action_space

        self.scene_counter: Optional[int] = None
        self.scene_order: Optional[List[str]] = None
        self.scene_id: Optional[int] = None
        self.scene_period: Optional[Union[str, int]] = (
            scene_period  # default makes a random choice
        )
        self.max_tasks: Optional[int] = None
        self.reset_tasks = max_tasks

        self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None

        self.seed: Optional[int] = None
        self.set_seed(seed)

        if deterministic_cudnn:
            set_deterministic_cudnn()

        self.reset()

    def _create_environment(self) -> IThorEnvironment:
        env = IThorEnvironment(
            make_agents_visible=False,
            object_open_speed=0.05,
            restrict_to_initially_reachable_points=True,
            **self.env_args,
        )
        return env

    @property
    def length(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled. Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        return None

    @property
    def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]:
        return self._last_sampled_task

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    @property
    def all_observation_spaces_equal(self) -> bool:
        """Check if observation spaces equal.

        # Returns

        True if all Tasks that can be sampled by this sampler have the
            same observation space. Otherwise False.
        """
        return True

    def sample_scene(self, force_advance_scene: bool):
        if force_advance_scene:
            if self.scene_period != "manual":
                get_logger().warning(
                    "When sampling scene, have `force_advance_scene == True`"
                    "but `self.scene_period` is not equal to 'manual',"
                    "this may cause unexpected behavior."
                )
            self.scene_id = (1 + self.scene_id) % len(self.scenes)
            if self.scene_id == 0:
                random.shuffle(self.scene_order)

        if self.scene_period is None:
            # Random scene
            self.scene_id = random.randint(0, len(self.scenes) - 1)
        elif self.scene_period == "manual":
            pass
        elif self.scene_counter >= cast(int, self.scene_period):
            if self.scene_id == len(self.scene_order) - 1:
                # Randomize scene order for next iteration
                random.shuffle(self.scene_order)
                # Move to next scene
                self.scene_id = 0
            else:
                # Move to next scene
                self.scene_id += 1
            # Reset scene counter
            self.scene_counter = 1
        elif isinstance(self.scene_period, int):
            # Stay in current scene
            self.scene_counter += 1
        else:
            raise NotImplementedError(
                "Invalid scene_period {}".format(self.scene_period)
            )

        if self.max_tasks is not None:
            self.max_tasks -= 1

        return self.scenes[int(self.scene_order[self.scene_id])]

    def next_task(
        self, force_advance_scene: bool = False
    ) -> Optional[ObjectNaviThorGridTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        scene = self.sample_scene(force_advance_scene)

        if self.env is not None:
            if scene.replace("_physics", "") != self.env.scene_name.replace(
                "_physics", ""
            ):
                self.env.reset(scene)
        else:
            self.env = self._create_environment()
            self.env.reset(scene_name=scene)

        pose = self.env.randomize_agent_location()

        object_types_in_scene = set(
            [o["objectType"] for o in self.env.last_event.metadata["objects"]]
        )

        task_info: Dict[str, Any] = {}
        for ot in random.sample(self.object_types, len(self.object_types)):
            if ot in object_types_in_scene:
                task_info["object_type"] = ot
                break

        if len(task_info) == 0:
            get_logger().warning(
                "Scene {} does not contain any"
                " objects of any of the types {}.".format(scene, self.object_types)
            )

        task_info["start_pose"] = copy.copy(pose)
        task_info["id"] = (
            f"{scene}__{'_'.join(list(map(str, self.env.get_key(pose))))}__{task_info['object_type']}"
        )

        self._last_sampled_task = ObjectNaviThorGridTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
        )
        return self._last_sampled_task

    def reset(self):
        self.scene_counter = 0
        self.scene_order = list(range(len(self.scenes)))
        random.shuffle(self.scene_order)
        self.scene_id = 0
        self.max_tasks = self.reset_tasks

    def set_seed(self, seed: int):
        self.seed = seed
        if seed is not None:
            set_seed(seed)


================================================
FILE: allenact_plugins/ithor_plugin/ithor_tasks.py
================================================
import random
from typing import Dict, Tuple, List, Any, Optional, Union, Sequence, cast

import gym
import numpy as np

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_constants import (
    MOVE_AHEAD,
    ROTATE_LEFT,
    ROTATE_RIGHT,
    LOOK_DOWN,
    LOOK_UP,
    END,
)

from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.ithor_plugin.ithor_util import round_to_factor


class ObjectNaviThorGridTask(Task[IThorEnvironment]):
    """Defines the object navigation task in AI2-THOR.

    In object navigation an agent is randomly initialized into an AI2-THOR scene and must
    find an object of a given type (e.g. tomato, television, etc). An object is considered
    found if the agent takes an `End` action and the object is visible to the agent (see
    [here](https://ai2thor.allenai.org/documentation/concepts) for a definition of visibiliy
    in AI2-THOR).

    The actions available to an agent in this task are:

    1. Move ahead
        * Moves agent ahead by 0.25 meters.
    1. Rotate left / rotate right
        * Rotates the agent by 90 degrees counter-clockwise / clockwise.
    1. Look down / look up
        * Changes agent view angle by 30 degrees up or down. An agent cannot look more than 30
          degrees above horizontal or less than 60 degrees below horizontal.
    1. End
        * Ends the task and the agent receives a positive reward if the object type is visible to the agent,
        otherwise it receives a negative reward.

    # Attributes

    env : The ai2thor environment.
    sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer.
    task_info : The task info. Must contain a field "object_type" that specifies, as a string,
        the goal object type.
    max_steps : The maximum number of steps an agent can take an in the task before it is considered failed.
    observation_space: The observation space returned on each step from the sensors.
    """

    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END)

    _CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE: Dict[
        Tuple[str, str], List[Tuple[float, float, int, int]]
    ] = {}

    def __init__(
        self,
        env: IThorEnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        **kwargs,
    ) -> None:
        """Initializer.

        See class documentation for parameter definitions.
        """
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self._took_end_action: bool = False
        self._success: Optional[bool] = False
        self._subsampled_locations_from_which_obj_visible: Optional[
            List[Tuple[float, float, int, int]]
        ] = None

        self.task_info["followed_path"] = [self.env.get_agent_location()]
        self.task_info["action_names"] = self.class_action_names()

    @property
    def action_space(self):
        return gym.spaces.Discrete(len(self._actions))

    def reached_terminal_state(self) -> bool:
        return self._took_end_action

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return cls._actions

    def close(self) -> None:
        self.env.stop()

    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.class_action_names()[action]

        if action_str == END:
            self._took_end_action = True
            self._success = self.is_goal_object_visible()
            self.last_action_success = self._success
        else:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success

            if (
                not self.last_action_success
            ) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None:
                self.env.update_graph_with_failed_action(failed_action=action_str)

            self.task_info["followed_path"].append(self.env.get_agent_location())

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        assert mode == "rgb", "only rgb rendering is implemented"
        return self.env.current_frame

    def is_goal_object_visible(self) -> bool:
        """Is the goal object currently visible?"""
        return any(
            o["objectType"] == self.task_info["object_type"]
            for o in self.env.visible_objects()
        )

    def judge(self) -> float:
        """Compute the reward after having taken a step."""
        reward = -0.01

        if not self.last_action_success:
            reward += -0.03

        if self._took_end_action:
            reward += 1.0 if self._success else -1.0

        return float(reward)

    def metrics(self) -> Dict[str, Any]:
        if not self.is_done():
            return {}
        else:
            return {
                "success": self._success,
                **super(ObjectNaviThorGridTask, self).metrics(),
            }

    def query_expert(self, **kwargs) -> Tuple[int, bool]:
        target = self.task_info["object_type"]

        if self.is_goal_object_visible():
            return self.class_action_names().index(END), True
        else:
            key = (self.env.scene_name, target)
            if self._subsampled_locations_from_which_obj_visible is None:
                if key not in self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE:
                    obj_ids: List[str] = []
                    obj_ids.extend(
                        o["objectId"]
                        for o in self.env.last_event.metadata["objects"]
                        if o["objectType"] == target
                    )

                    assert len(obj_ids) != 0, "No objects to get an expert path to."

                    locations_from_which_object_is_visible: List[
                        Tuple[float, float, int, int]
                    ] = []
                    y = self.env.last_event.metadata["agent"]["position"]["y"]
                    positions_to_check_interactionable_from = [
                        {"x": x, "y": y, "z": z}
                        for x, z in set((x, z) for x, z, _, _ in self.env.graph.nodes)
                    ]
                    for obj_id in set(obj_ids):
                        self.env.controller.step(
                            {
                                "action": "PositionsFromWhichItemIsInteractable",
                                "objectId": obj_id,
                                "positions": positions_to_check_interactionable_from,
                            }
                        )
                        assert (
                            self.env.last_action_success
                        ), "Could not get positions from which item was interactable."

                        returned = self.env.last_event.metadata["actionReturn"]
                        locations_from_which_object_is_visible.extend(
                            (
                                round(x, 2),
                                round(z, 2),
                                round_to_factor(rot, 90) % 360,
                                round_to_factor(hor, 30) % 360,
                            )
                            for x, z, rot, hor, standing in zip(
                                returned["x"],
                                returned["z"],
                                returned["rotation"],
                                returned["horizon"],
                                returned["standing"],
                            )
                            if standing == 1
                        )

                    self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key] = (
                        locations_from_which_object_is_visible
                    )

                self._subsampled_locations_from_which_obj_visible = (
                    self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key]
                )
                if len(self._subsampled_locations_from_which_obj_visible) > 5:
                    self._subsampled_locations_from_which_obj_visible = random.sample(
                        self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key], 5
                    )

            current_loc_key = self.env.get_key(self.env.last_event.metadata["agent"])
            paths = []

            for goal_key in self._subsampled_locations_from_which_obj_visible:
                path = self.env.shortest_state_path(
                    source_state_key=current_loc_key, goal_state_key=goal_key
                )
                if path is not None:
                    paths.append(path)
            if len(paths) == 0:
                return 0, False

            shortest_path_ind = int(np.argmin([len(p) for p in paths]))

            if len(paths[shortest_path_ind]) == 1:
                get_logger().warning(
                    "Shortest path computations suggest we are at the target but episode does not think so."
                )
                return 0, False

            next_key_on_shortest_path = paths[shortest_path_ind][1]
            return (
                self.class_action_names().index(
                    self.env.action_transitioning_between_keys(
                        current_loc_key, next_key_on_shortest_path
                    )
                ),
                True,
            )


================================================
FILE: allenact_plugins/ithor_plugin/ithor_util.py
================================================
import glob
import math
import os
import platform
import traceback
import warnings
from contextlib import contextmanager
from typing import Sequence

import Xlib
import Xlib.display
import ai2thor.controller


@contextmanager
def include_object_data(controller: ai2thor.controller.Controller):
    needs_reset = len(controller.last_event.metadata["objects"]) == 0
    try:
        if needs_reset:
            controller.step("ResetObjectFilter")
            assert controller.last_event.metadata["lastActionSuccess"]
        yield None
    finally:
        if needs_reset:
            controller.step("SetObjectFilter", objectIds=[])
            assert controller.last_event.metadata["lastActionSuccess"]


def vertical_to_horizontal_fov(
    vertical_fov_in_degrees: float, height: float, width: float
):
    assert 0 < vertical_fov_in_degrees < 180
    aspect_ratio = width / height
    vertical_fov_in_rads = (math.pi / 180) * vertical_fov_in_degrees
    return (
        (180 / math.pi)
        * math.atan(math.tan(vertical_fov_in_rads * 0.5) * aspect_ratio)
        * 2
    )


def horizontal_to_vertical_fov(
    horizontal_fov_in_degrees: float, height: float, width: float
):
    return vertical_to_horizontal_fov(
        vertical_fov_in_degrees=horizontal_fov_in_degrees,
        height=width,
        width=height,
    )


def round_to_factor(num: float, base: int) -> int:
    """Rounds floating point number to the nearest integer multiple of the
    given base. E.g., for floating number 90.1 and integer base 45, the result
    is 90.

    # Attributes

    num : floating point number to be rounded.
    base: integer base
    """
    return round(num / base) * base


def get_open_x_displays(throw_error_if_empty: bool = False) -> Sequence[str]:
    assert platform.system() == "Linux", "Can only get X-displays for Linux systems."

    displays = []

    open_display_strs = [
        os.path.basename(s)[1:] for s in glob.glob("/tmp/.X11-unix/X*")
    ]

    for open_display_str in sorted(open_display_strs):
        try:
            open_display_str = str(int(open_display_str))
            display = Xlib.display.Display(f":{open_display_str}")
        except Exception:
            warnings.warn(
                f"Encountered error when attempting to open display :{open_display_str},"
                f" error message:\n{traceback.format_exc()}"
            )
            continue

        displays.extend(
            [f"{open_display_str}.{i}" for i in range(display.screen_count())]
        )

    if throw_error_if_empty and len(displays) == 0:
        raise IOError(
            "Could not find any open X-displays on which to run AI2-THOR processes. "
            " Please see the AI2-THOR installation instructions at"
            " https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin"
            " for information as to how to start such displays."
        )

    return displays


================================================
FILE: allenact_plugins/ithor_plugin/ithor_viz.py
================================================
import copy
import json
import math
import os
from typing import Tuple, Sequence, Union, Dict, Optional, Any, cast, Generator, List

import colour as col
import cv2
import numpy as np
from PIL import Image, ImageDraw
from ai2thor.controller import Controller
from matplotlib import pyplot as plt
from matplotlib.figure import Figure

from allenact.utils.system import get_logger
from allenact.utils.viz_utils import TrajectoryViz

ITHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR = os.path.join(
    os.path.expanduser("~"), ".allenact", "ithor", "top_down_viz_cache"
)


class ThorPositionTo2DFrameTranslator(object):
    def __init__(
        self,
        frame_shape_rows_cols: Tuple[int, int],
        cam_position: Sequence[float],
        orth_size: float,
    ):
        self.frame_shape = frame_shape_rows_cols
        self.lower_left = np.array((cam_position[0], cam_position[2])) - orth_size
        self.span = 2 * orth_size

    def __call__(self, position: Sequence[float]):
        if len(position) == 3:
            x, _, z = position
        else:
            x, z = position

        camera_position = (np.array((x, z)) - self.lower_left) / self.span
        return np.array(
            (
                round(self.frame_shape[0] * (1.0 - camera_position[1])),
                round(self.frame_shape[1] * camera_position[0]),
            ),
            dtype=int,
        )


class ThorViz(TrajectoryViz):
    def __init__(
        self,
        path_to_trajectory: Sequence[str] = ("task_info", "followed_path"),
        label: str = "thor_trajectory",
        figsize: Tuple[float, float] = (8, 8),  # width, height
        fontsize: float = 10,
        scenes: Union[Tuple[str, int, int], Sequence[Tuple[str, int, int]]] = (
            ("FloorPlan{}_physics", 1, 30),
            ("FloorPlan{}_physics", 201, 230),
            ("FloorPlan{}_physics", 301, 330),
            ("FloorPlan{}_physics", 401, 430),
        ),
        viz_rows_cols: Tuple[int, int] = (448, 448),
        single_color: bool = False,
        view_triangle_only_on_last: bool = True,
        disable_view_triangle: bool = False,
        line_opacity: float = 1.0,
        path_to_rot_degrees: Sequence[str] = ("rotation",),
        **kwargs,
    ):
        super().__init__(
            path_to_trajectory=path_to_trajectory,
            label=label,
            figsize=figsize,
            fontsize=fontsize,
            path_to_rot_degrees=path_to_rot_degrees,
            **kwargs,
        )

        if isinstance(scenes[0], str):
            scenes = [cast(Tuple[str, int, int], scenes)]  # make it list of tuples
        self.scenes = cast(List[Tuple[str, int, int]], scenes)

        self.room_path = ITHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR
        os.makedirs(self.room_path, exist_ok=True)

        self.viz_rows_cols = viz_rows_cols
        self.single_color = single_color
        self.view_triangle_only_on_last = view_triangle_only_on_last
        self.disable_view_triangle = disable_view_triangle
        self.line_opacity = line_opacity

        # Only needed for rendering
        self.map_data: Optional[Dict[str, Any]] = None
        self.thor_top_downs: Optional[Dict[str, np.ndarray]] = None

        self.controller: Optional[Controller] = None

    def init_top_down_render(self):
        self.map_data = self.get_translator()
        self.thor_top_downs = self.make_top_down_views()

        # No controller needed after this point
        if self.controller is not None:
            self.controller.stop()
            self.controller = None

    @staticmethod
    def iterate_scenes(
        all_scenes: Sequence[Tuple[str, int, int]]
    ) -> Generator[str, None, None]:
        for scenes in all_scenes:
            for wall in range(scenes[1], scenes[2] + 1):
                roomname = scenes[0].format(wall)
                yield roomname

    def cached_map_data_path(self, roomname: str) -> str:
        return os.path.join(self.room_path, "map_data__{}.json".format(roomname))

    def get_translator(self) -> Dict[str, Any]:
        # roomname = list(ThorViz.iterate_scenes(self.scenes))[0]
        all_map_data = {}
        for roomname in ThorViz.iterate_scenes(self.scenes):
            json_file = self.cached_map_data_path(roomname)
            if not os.path.exists(json_file):
                self.make_controller()
                self.controller.reset(roomname)
                map_data = self.get_agent_map_data()
                get_logger().info("Dumping {}".format(json_file))
                with open(json_file, "w") as f:
                    json.dump(map_data, f, indent=4, sort_keys=True)
            else:
                with open(json_file, "r") as f:
                    map_data = json.load(f)

            pos_translator = ThorPositionTo2DFrameTranslator(
                self.viz_rows_cols,
                self.position_to_tuple(map_data["cam_position"]),
                map_data["cam_orth_size"],
            )
            map_data["pos_translator"] = pos_translator
            all_map_data[roomname] = map_data

        get_logger().debug("Using map_data {}".format(all_map_data))
        return all_map_data

    def cached_image_path(self, roomname: str) -> str:
        return os.path.join(
            self.room_path, "{}__r{}_c{}.png".format(roomname, *self.viz_rows_cols)
        )

    def make_top_down_views(self) -> Dict[str, np.ndarray]:
        top_downs = {}
        for roomname in self.iterate_scenes(self.scenes):
            fname = self.cached_image_path(roomname)
            if not os.path.exists(fname):
                self.make_controller()
                self.dump_top_down_view(roomname, fname)
            top_downs[roomname] = cv2.imread(fname)

        return top_downs

    def crop_viz_image(self, viz_image: np.ndarray) -> np.ndarray:
        y_min = int(self.viz_rows_cols[0] * 0)
        y_max = int(self.viz_rows_cols[0] * 1)
        # But it covers approximately the entire width:
        x_min = 0
        x_max = self.viz_rows_cols[1]
        cropped_viz_image = viz_image[y_min:y_max, x_min:x_max, :]
        return cropped_viz_image

    def make_controller(self):
        if self.controller is None:
            self.controller = Controller()

            self.controller.step({"action": "ChangeQuality", "quality": "Very High"})
            self.controller.step(
                {
                    "action": "ChangeResolution",
                    "x": self.viz_rows_cols[1],
                    "y": self.viz_rows_cols[0],
                }
            )

    def get_agent_map_data(self):
        self.controller.step({"action": "ToggleMapView"})
        cam_position = self.controller.last_event.metadata["cameraPosition"]
        cam_orth_size = self.controller.last_event.metadata["cameraOrthSize"]
        to_return = {
            "cam_position": cam_position,
            "cam_orth_size": cam_orth_size,
        }
        self.controller.step({"action": "ToggleMapView"})
        return to_return

    @staticmethod
    def position_to_tuple(position: Dict[str, float]) -> Tuple[float, float, float]:
        return position["x"], position["y"], position["z"]

    @staticmethod
    def add_lines_to_map(
        ps: Sequence[Any],
        frame: np.ndarray,
        pos_translator: ThorPositionTo2DFrameTranslator,
        opacity: float,
        color: Optional[Tuple[int, ...]] = None,
    ) -> np.ndarray:
        if len(ps) <= 1:
            return frame
        if color is None:
            color = (255, 0, 0)

        img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
        img2 = Image.new("RGBA", frame.shape[:-1])  # Use RGBA

        opacity = int(round(255 * opacity))  # Define transparency for the triangle.
        draw = ImageDraw.Draw(img2)
        for i in range(len(ps) - 1):
            draw.line(
                tuple(reversed(pos_translator(ps[i])))
                + tuple(reversed(pos_translator(ps[i + 1]))),
                fill=color + (opacity,),
                width=int(frame.shape[0] / 100),
            )

        img = Image.alpha_composite(img1, img2)
        return np.array(img.convert("RGB"))

    @staticmethod
    def add_line_to_map(
        p0: Any,
        p1: Any,
        frame: np.ndarray,
        pos_translator: ThorPositionTo2DFrameTranslator,
        opacity: float,
        color: Optional[Tuple[int, ...]] = None,
    ) -> np.ndarray:
        if p0 == p1:
            return frame
        if color is None:
            color = (255, 0, 0)

        img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
        img2 = Image.new("RGBA", frame.shape[:-1])  # Use RGBA

        opacity = int(round(255 * opacity))  # Define transparency for the triangle.
        draw = ImageDraw.Draw(img2)
        draw.line(
            tuple(reversed(pos_translator(p0))) + tuple(reversed(pos_translator(p1))),
            fill=color + (opacity,),
            width=int(frame.shape[0] / 100),
        )

        img = Image.alpha_composite(img1, img2)
        return np.array(img.convert("RGB"))

    @staticmethod
    def add_agent_view_triangle(
        position: Any,
        rotation: float,
        frame: np.ndarray,
        pos_translator: ThorPositionTo2DFrameTranslator,
        scale: float = 1.0,
        opacity: float = 0.1,
    ) -> np.ndarray:
        p0 = np.array((position[0], position[2]))
        p1 = copy.copy(p0)
        p2 = copy.copy(p0)

        theta = -2 * math.pi * (rotation / 360.0)
        rotation_mat = np.array(
            [[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]]
        )
        offset1 = scale * np.array([-1 / 2.0, 1])
        offset2 = scale * np.array([1 / 2.0, 1])

        p1 += np.matmul(rotation_mat, offset1)
        p2 += np.matmul(rotation_mat, offset2)

        img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
        img2 = Image.new("RGBA", frame.shape[:-1])  # Use RGBA

        opacity = int(round(255 * opacity))  # Define transparency for the triangle.
        points = [tuple(reversed(pos_translator(p))) for p in [p0, p1, p2]]
        draw = ImageDraw.Draw(img2)
        draw.polygon(points, fill=(255, 255, 255, opacity))

        img = Image.alpha_composite(img1, img2)
        return np.array(img.convert("RGB"))

    @staticmethod
    def visualize_agent_path(
        positions: Sequence[Any],
        frame: np.ndarray,
        pos_translator: ThorPositionTo2DFrameTranslator,
        single_color: bool = False,
        view_triangle_only_on_last: bool = False,
        disable_view_triangle: bool = False,
        line_opacity: float = 1.0,
        trajectory_start_end_color_str: Tuple[str, str] = ("red", "green"),
    ) -> np.ndarray:
        if single_color:
            frame = ThorViz.add_lines_to_map(
                list(map(ThorViz.position_to_tuple, positions)),
                frame,
                pos_translator,
                line_opacity,
                tuple(
                    map(
                        lambda x: int(round(255 * x)),
                        col.Color(trajectory_start_end_color_str[0]).rgb,
                    )
                ),
            )
        else:
            if len(positions) > 1:
                colors = list(
                    col.Color(trajectory_start_end_color_str[0]).range_to(
                        col.Color(trajectory_start_end_color_str[1]), len(positions) - 1
                    )
                )
                for i in range(len(positions) - 1):
                    frame = ThorViz.add_line_to_map(
                        ThorViz.position_to_tuple(positions[i]),
                        ThorViz.position_to_tuple(positions[i + 1]),
                        frame,
                        pos_translator,
                        opacity=line_opacity,
                        color=tuple(map(lambda x: int(round(255 * x)), colors[i].rgb)),
                    )

        if view_triangle_only_on_last:
            positions = [positions[-1]]
        if disable_view_triangle:
            positions = []
        for position in positions:
            frame = ThorViz.add_agent_view_triangle(
                ThorViz.position_to_tuple(position),
                rotation=position["rotation"],
                frame=frame,
                pos_translator=pos_translator,
                opacity=0.05 + view_triangle_only_on_last * 0.2,
            )
        return frame

    def dump_top_down_view(self, room_name: str, image_path: str):
        get_logger().debug("Dumping {}".format(image_path))

        self.controller.reset(room_name)
        self.controller.step(
            {"action": "Initialize", "gridSize": 0.1, "makeAgentsVisible": False}
        )
        self.controller.step({"action": "ToggleMapView"})
        top_down_view = self.controller.last_event.cv2img

        cv2.imwrite(image_path, top_down_view)

    def make_fig(self, episode: Any, episode_id: str) -> Figure:
        trajectory: Sequence[Dict[str, Any]] = self._access(
            episode, self.path_to_trajectory
        )

        if self.thor_top_downs is None:
            self.init_top_down_render()

        roomname = "_".join(episode_id.split("_")[:2])

        im = self.visualize_agent_path(
            trajectory,
            self.thor_top_downs[roomname],
            self.map_data[roomname]["pos_translator"],
            single_color=self.single_color,
            view_triangle_only_on_last=self.view_triangle_only_on_last,
            disable_view_triangle=self.disable_view_triangle,
            line_opacity=self.line_opacity,
        )

        fig, ax = plt.subplots(figsize=self.figsize)
        ax.set_title(episode_id, fontsize=self.fontsize)
        ax.imshow(self.crop_viz_image(im)[:, :, ::-1])
        ax.axis("off")

        return fig


class ThorMultiViz(ThorViz):
    def __init__(
        self,
        path_to_trajectory_prefix: Sequence[str] = ("task_info", "followed_path"),
        agent_suffixes: Sequence[str] = ("1", "2"),
        label: str = "thor_trajectories",
        trajectory_start_end_color_strs: Sequence[Tuple[str, str]] = (
            ("red", "green"),
            ("cyan", "purple"),
        ),
        **kwargs,
    ):
        super().__init__(label=label, **kwargs)

        self.path_to_trajectory_prefix = list(path_to_trajectory_prefix)
        self.agent_suffixes = list(agent_suffixes)
        self.trajectory_start_end_color_strs = list(trajectory_start_end_color_strs)

    def make_fig(self, episode: Any, episode_id: str) -> Figure:
        if self.thor_top_downs is None:
            self.init_top_down_render()

        roomname = "_".join(episode_id.split("_")[:2])
        im = self.thor_top_downs[roomname]

        for agent, start_end_color in zip(
            self.agent_suffixes, self.trajectory_start_end_color_strs
        ):
            path = self.path_to_trajectory_prefix[:]
            path[-1] = path[-1] + agent
            trajectory = self._access(episode, path)

            im = self.visualize_agent_path(
                trajectory,
                im,
                self.map_data[roomname]["pos_translator"],
                single_color=self.single_color,
                view_triangle_only_on_last=self.view_triangle_only_on_last,
                disable_view_triangle=self.disable_view_triangle,
                line_opacity=self.line_opacity,
                trajectory_start_end_color_str=start_end_color,
            )

        fig, ax = plt.subplots(figsize=self.figsize)
        ax.set_title(episode_id, fontsize=self.fontsize)
        ax.imshow(self.crop_viz_image(im)[:, :, ::-1])
        ax.axis("off")

        return fig


================================================
FILE: allenact_plugins/ithor_plugin/scripts/__init__.py
================================================


================================================
FILE: allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.py
================================================
import os

from allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import (
    create_debug_dataset_from_train_dataset,
)

if __name__ == "__main__":
    CURRENT_PATH = os.getcwd()

    SCENE = "FloorPlan1"
    TARGET = "Apple"
    EPISODES = [0, 7, 11, 12]
    BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "ithor-objectnav", "debug")

    create_debug_dataset_from_train_dataset(
        scene=SCENE,
        target_object_type=TARGET,
        episodes_subset=EPISODES,
        train_dataset_path=os.path.join(
            CURRENT_PATH, "datasets", "ithor-objectnav", "train"
        ),
        base_debug_output_path=BASE_OUT,
    )


================================================
FILE: allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.py
================================================
import os

from allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import (
    create_debug_dataset_from_train_dataset,
)

if __name__ == "__main__":
    CURRENT_PATH = os.getcwd()
    SCENE = "FloorPlan1"
    EPISODES = [0, 7, 11, 12]
    BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "ithor-pointnav", "debug")

    create_debug_dataset_from_train_dataset(
        scene=SCENE,
        target_object_type=None,
        episodes_subset=EPISODES,
        train_dataset_path=os.path.join(
            CURRENT_PATH, "datasets", "ithor-pointnav", "train"
        ),
        base_debug_output_path=BASE_OUT,
    )


================================================
FILE: allenact_plugins/lighthouse_plugin/__init__.py
================================================


================================================
FILE: allenact_plugins/lighthouse_plugin/configs/__init__.py
================================================


================================================
FILE: allenact_plugins/lighthouse_plugin/data/__init__.py
================================================


================================================
FILE: allenact_plugins/lighthouse_plugin/extra_environment.yml
================================================
dependencies:
  - patsy>=0.5.1
  - pip
  - pip:
      - gym-minigrid>=1.0.1


================================================
FILE: allenact_plugins/lighthouse_plugin/extra_requirements.txt
================================================
patsy>=0.5.1
gym-minigrid>=1.0.1


================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_environment.py
================================================
import copy
import curses
import itertools
import time
from functools import lru_cache
from typing import Optional, Tuple, Any, List, Union, cast

import numpy as np
from gym.utils import seeding
from gym_minigrid import minigrid

EMPTY = 0
GOAL = 1
WRONG_CORNER = 2
WALL = 3


@lru_cache(1000)
def _get_world_corners(world_dim: int, world_radius: int):
    if world_radius == 0:
        return ((0,) * world_dim,)

    def combination_to_vec(comb) -> Tuple[int, ...]:
        vec = [world_radius] * world_dim
        for k in comb:
            vec[k] *= -1
        return tuple(vec)

    return tuple(
        sorted(
            combination_to_vec(comb)
            for i in range(world_dim + 1)
            for comb in itertools.combinations(list(range(world_dim)), i)
        )
    )


@lru_cache(1000)
def _base_world_tensor(world_dim: int, world_radius: int):
    tensor = np.full((2 * world_radius + 1,) * world_dim, fill_value=EMPTY)

    slices: List[Union[slice, int]] = [slice(0, 2 * world_radius + 1)] * world_dim
    for i in range(world_dim):
        tmp_slices = [*slices]
        tmp_slices[i] = 0
        tensor[tuple(tmp_slices)] = WALL
        tmp_slices[i] = 2 * world_radius
        tensor[tuple(tmp_slices)] = WALL

    for corner in _get_world_corners(world_dim=world_dim, world_radius=world_radius):
        tensor[tuple([loc + world_radius for loc in corner])] = WRONG_CORNER

    return tensor


class LightHouseEnvironment(object):
    EMPTY = 0
    GOAL = 1
    WRONG_CORNER = 2
    WALL = 3
    SPACE_LEVELS = [EMPTY, GOAL, WRONG_CORNER, WALL]

    def __init__(self, world_dim: int, world_radius: int, **kwargs):
        self.world_dim = world_dim
        self.world_radius = world_radius

        self.world_corners = np.array(
            _get_world_corners(world_dim=world_dim, world_radius=world_radius),
            dtype=int,
        )

        self.curses_screen: Optional[Any] = None

        self.world_tensor: np.ndarray = copy.deepcopy(
            _base_world_tensor(world_radius=world_radius, world_dim=world_dim)
        )
        self.current_position = np.zeros(world_dim, dtype=int)
        self.closest_distance_to_corners = np.full(
            2**world_dim, fill_value=world_radius, dtype=int
        )
        self.positions: List[Tuple[int, ...]] = [tuple(self.current_position)]
        self.goal_position: Optional[np.ndarray] = None
        self.last_action: Optional[int] = None

        self.seed: Optional[int] = None
        self.np_seeded_random_gen: Optional[np.random.RandomState] = None
        self.set_seed(seed=int(kwargs.get("seed", np.random.randint(0, 2**31 - 1))))

        self.random_reset()

    def set_seed(self, seed: int):
        # More information about why `np_seeded_random_gen` is used rather than just `np.random.seed`
        # can be found at gym/utils/seeding.py
        # There's literature indicating that having linear correlations between seeds of multiple
        # PRNG's can correlate the outputs
        self.seed = seed
        self.np_seeded_random_gen, _ = cast(
            Tuple[np.random.RandomState, Any], seeding.np_random(self.seed)
        )

    def random_reset(self, goal_position: Optional[bool] = None):
        self.last_action = None
        self.world_tensor = copy.deepcopy(
            _base_world_tensor(world_radius=self.world_radius, world_dim=self.world_dim)
        )
        if goal_position is None:
            self.goal_position = self.world_corners[
                self.np_seeded_random_gen.randint(low=0, high=len(self.world_corners))
            ]
        self.world_tensor[
            tuple(cast(np.ndarray, self.world_radius + self.goal_position))
        ] = GOAL

        if self.curses_screen is not None:
            curses.nocbreak()
            self.curses_screen.keypad(False)
            curses.echo()
            curses.endwin()

        self.curses_screen = None

        self.current_position = np.zeros(self.world_dim, dtype=int)
        self.closest_distance_to_corners = np.abs(
            (self.world_corners - self.current_position.reshape(1, -1))
        ).max(1)

        self.positions = [tuple(self.current_position)]

    def step(self, action: int) -> bool:
        assert 0 <= action < 2 * self.world_dim
        self.last_action = action

        delta = -1 if action >= self.world_dim else 1
        ind = action % self.world_dim
        old = self.current_position[ind]
        new = min(max(delta + old, -self.world_radius), self.world_radius)
        if new == old:
            self.positions.append(self.positions[-1])
            return False
        else:
            self.current_position[ind] = new
            self.closest_distance_to_corners = np.minimum(
                np.abs((self.world_corners - self.current_position.reshape(1, -1))).max(
                    1
                ),
                self.closest_distance_to_corners,
            )
            self.positions.append(tuple(self.current_position))
            return True

    def render(self, mode="array", **kwargs):
        if mode == "array":
            arr = copy.deepcopy(self.world_tensor)
            arr[tuple(self.world_radius + self.current_position)] = 9
            return arr

        elif mode == "curses":
            if self.world_dim == 1:
                space_list = ["_"] * (1 + 2 * self.world_radius)

                goal_ind = self.goal_position[0] + self.world_radius
                space_list[goal_ind] = "G"
                space_list[2 * self.world_radius - goal_ind] = "W"
                space_list[self.current_position[0] + self.world_radius] = "X"

                to_print = " ".join(space_list)

                if self.curses_screen is None:
                    self.curses_screen = curses.initscr()

                self.curses_screen.addstr(0, 0, to_print)
                if "extra_text" in kwargs:
                    self.curses_screen.addstr(1, 0, kwargs["extra_text"])
                self.curses_screen.refresh()
            elif self.world_dim == 2:
                space_list = [
                    ["_"] * (1 + 2 * self.world_radius)
                    for _ in range(1 + 2 * self.world_radius)
                ]

                for row_ind in range(1 + 2 * self.world_radius):
                    for col_ind in range(1 + 2 * self.world_radius):
                        if self.world_tensor[row_ind][col_ind] == self.GOAL:
                            space_list[row_ind][col_ind] = "G"

                        if self.world_tensor[row_ind][col_ind] == self.WRONG_CORNER:
                            space_list[row_ind][col_ind] = "C"

                        if self.world_tensor[row_ind][col_ind] == self.WALL:
                            space_list[row_ind][col_ind] = "W"

                        if (
                            (row_ind, col_ind)
                            == self.world_radius + self.current_position
                        ).all():
                            space_list[row_ind][col_ind] = "X"

                if self.curses_screen is None:
                    self.curses_screen = curses.initscr()

                for i, sl in enumerate(space_list):
                    self.curses_screen.addstr(i, 0, " ".join(sl))

                self.curses_screen.addstr(len(space_list), 0, str(self.state()))
                if "extra_text" in kwargs:
                    self.curses_screen.addstr(
                        len(space_list) + 1, 0, kwargs["extra_text"]
                    )

                self.curses_screen.refresh()
            else:
                raise NotImplementedError("Cannot render worlds of > 2 dimensions.")
        elif mode == "minigrid":
            height = width = 2 * self.world_radius + 2
            grid = minigrid.Grid(width, height)

            # Generate the surrounding walls
            grid.horz_wall(0, 0)
            grid.horz_wall(0, height - 1)
            grid.vert_wall(0, 0)
            grid.vert_wall(width - 1, 0)

            # Place fake agent at the center
            agent_pos = np.array(self.positions[-1]) + 1 + self.world_radius
            # grid.set(*agent_pos, None)
            agent = minigrid.Goal()
            agent.color = "red"
            grid.set(agent_pos[0], agent_pos[1], agent)
            agent.init_pos = tuple(agent_pos)
            agent.cur_pos = tuple(agent_pos)

            goal_pos = self.goal_position + self.world_radius

            goal = minigrid.Goal()
            grid.set(goal_pos[0], goal_pos[1], goal)
            goal.init_pos = tuple(goal_pos)
            goal.cur_pos = tuple(goal_pos)

            highlight_mask = np.zeros((height, width), dtype=bool)

            minx, maxx = max(1, agent_pos[0] - 5), min(height - 1, agent_pos[0] + 5)
            miny, maxy = max(1, agent_pos[1] - 5), min(height - 1, agent_pos[1] + 5)
            highlight_mask[minx : (maxx + 1), miny : (maxy + 1)] = True

            img = grid.render(
                minigrid.TILE_PIXELS, agent_pos, None, highlight_mask=highlight_mask
            )

            return img

        else:
            raise NotImplementedError("Unknown render mode {}.".format(mode))

        time.sleep(0.0 if "sleep_time" not in kwargs else kwargs["sleep_time"])

    def close(self):
        if self.curses_screen is not None:
            curses.nocbreak()
            self.curses_screen.keypad(False)
            curses.echo()
            curses.endwin()

    @staticmethod
    def optimal_ave_ep_length(world_dim: int, world_radius: int, view_radius: int):
        if world_dim == 1:
            max_steps_wrong_dir = max(world_radius - view_radius, 0)

            return max_steps_wrong_dir + world_radius

        elif world_dim == 2:
            tau = 2 * (world_radius - view_radius)

            average_steps_needed = 0.25 * (4 * 2 * view_radius + 10 * tau)

            return average_steps_needed
        else:
            raise NotImplementedError(
                "`optimal_average_ep_length` is only implemented"
                " for when the `world_dim` is 1 or 2 ({} given).".format(world_dim)
            )


================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_models.py
================================================
from typing import Optional, Tuple, cast

import gym
import torch
import torch.nn as nn
from gym.spaces.dict import Dict as SpaceDict

from allenact.algorithms.onpolicy_sync.policy import (
    ActorCriticModel,
    Memory,
    ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput, DistributionType


class LinearAdvisorActorCritic(ActorCriticModel[CategoricalDistr]):
    def __init__(
        self,
        input_uuid: str,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        ensure_same_init_aux_weights: bool = True,
    ):
        super().__init__(action_space=action_space, observation_space=observation_space)

        assert (
            input_uuid in observation_space.spaces
        ), "LinearActorCritic expects only a single observational input."
        self.input_uuid = input_uuid

        box_space: gym.spaces.Box = observation_space[self.input_uuid]
        assert isinstance(box_space, gym.spaces.Box), (
            "LinearActorCritic requires that"
            "observation space corresponding to the input key is a Box space."
        )
        assert len(box_space.shape) == 1
        self.in_dim = box_space.shape[0]

        self.num_actions = action_space.n
        self.linear = nn.Linear(self.in_dim, 2 * self.num_actions + 1)

        nn.init.orthogonal_(self.linear.weight)
        if ensure_same_init_aux_weights:
            # Ensure main actor / auxiliary actor start with the same weights
            self.linear.weight.data[self.num_actions : -1, :] = self.linear.weight[
                : self.num_actions, :
            ]
        nn.init.constant_(self.linear.bias, 0)

    # noinspection PyMethodMayBeStatic
    def _recurrent_memory_specification(self):
        return None

    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        out = self.linear(cast(torch.Tensor, observations[self.input_uuid]))

        main_logits = out[..., : self.num_actions]
        aux_logits = out[..., self.num_actions : -1]
        values = out[..., -1:]

        # noinspection PyArgumentList
        return (
            ActorCriticOutput(
                distributions=cast(
                    DistributionType, CategoricalDistr(logits=main_logits)
                ),  # step x sampler x ...
                values=cast(
                    torch.FloatTensor, values.view(values.shape[:2] + (-1,))
                ),  # step x sampler x flattened
                extras={"auxiliary_distributions": CategoricalDistr(logits=aux_logits)},
            ),
            None,
        )


================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_sensors.py
================================================
import itertools
from typing import Any, Dict, Optional, Tuple, Sequence

import gym
import numpy as np
import pandas as pd
import patsy

from allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super
from allenact.base_abstractions.task import Task
from allenact_plugins.lighthouse_plugin.lighthouse_environment import (
    LightHouseEnvironment,
)


def get_corner_observation(
    env: LightHouseEnvironment,
    view_radius: int,
    view_corner_offsets: Optional[np.array],
):
    if view_corner_offsets is None:
        view_corner_offsets = view_radius * (2 * (env.world_corners > 0) - 1)

    world_corners_offset = env.world_corners + env.world_radius
    multidim_view_corner_indices = np.clip(
        np.reshape(env.current_position, (1, -1))
        + view_corner_offsets
        + env.world_radius,
        a_min=0,
        a_max=2 * env.world_radius,
    )
    flat_view_corner_indices = np.ravel_multi_index(
        np.transpose(multidim_view_corner_indices), env.world_tensor.shape
    )
    view_values = env.world_tensor.reshape(-1)[flat_view_corner_indices]

    last_action = 2 * env.world_dim if env.last_action is None else env.last_action
    on_border_bools = np.concatenate(
        (
            env.current_position == env.world_radius,
            env.current_position == -env.world_radius,
        ),
        axis=0,
    )

    if last_action == 2 * env.world_dim or on_border_bools[last_action]:
        on_border_value = last_action
    elif on_border_bools.any():
        on_border_value = np.argwhere(on_border_bools).reshape(-1)[0]
    else:
        on_border_value = 2 * env.world_dim

    seen_mask = np.array(env.closest_distance_to_corners <= view_radius, dtype=int)
    seen_corner_values = (
        env.world_tensor.reshape(-1)[
            np.ravel_multi_index(
                np.transpose(world_corners_offset), env.world_tensor.shape
            )
        ]
        * seen_mask
    )

    return np.concatenate(
        (
            seen_corner_values + view_values * (1 - seen_mask),
            [on_border_value, last_action],
        ),
        axis=0,
        out=np.zeros(
            (seen_corner_values.shape[0] + 2,),
            dtype=np.float32,
        ),
    )


class CornerSensor(Sensor[LightHouseEnvironment, Any]):
    def __init__(
        self,
        view_radius: int,
        world_dim: int,
        uuid: str = "corner_fixed_radius",
        **kwargs: Any
    ):
        self.view_radius = view_radius
        self.world_dim = world_dim
        self.view_corner_offsets: Optional[np.ndarray] = None

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self):
        return gym.spaces.Box(
            low=min(LightHouseEnvironment.SPACE_LEVELS),
            high=max(LightHouseEnvironment.SPACE_LEVELS),
            shape=(2**self.world_dim + 2,),
            dtype=int,
        )

    def get_observation(
        self,
        env: LightHouseEnvironment,
        task: Optional[Task],
        *args: Any,
        **kwargs: Any
    ) -> Any:
        if self.view_corner_offsets is None:
            self.view_corner_offsets = self.view_radius * (
                2 * (env.world_corners > 0) - 1
            )

        return get_corner_observation(
            env=env,
            view_radius=self.view_radius,
            view_corner_offsets=self.view_corner_offsets,
        )


class FactorialDesignCornerSensor(Sensor[LightHouseEnvironment, Any]):
    _DESIGN_MAT_CACHE: Dict[Tuple, Any] = {}

    def __init__(
        self,
        view_radius: int,
        world_dim: int,
        degree: int,
        uuid: str = "corner_fixed_radius_categorical",
        **kwargs: Any
    ):
        self.view_radius = view_radius
        self.world_dim = world_dim
        self.degree = degree

        if self.world_dim > 2:
            raise NotImplementedError(
                "When using the `FactorialDesignCornerSensor`,"
                "`world_dim` must be <= 2 due to memory constraints."
                "In the current implementation, creating the design"
                "matrix in the `world_dim == 3` case would require"
                "instantiating a matrix of size ~ 3Mx3M (9 trillion entries)."
            )

        self.view_corner_offsets: Optional[np.ndarray] = None
        # self.world_corners_offset: Optional[List[typing.Tuple[int, ...]]] = None

        self.corner_sensor = CornerSensor(self.view_radius, self.world_dim)

        self.variables_and_levels = self._get_variables_and_levels(
            world_dim=self.world_dim
        )
        self._design_mat_formula = self._create_formula(
            variables_and_levels=self._get_variables_and_levels(
                world_dim=self.world_dim
            ),
            degree=self.degree,
        )
        self.single_row_df = pd.DataFrame(
            data=[[0] * len(self.variables_and_levels)],
            columns=[x[0] for x in self.variables_and_levels],
        )
        self._view_tuple_to_design_array: Dict[Tuple[int, ...], np.ndarray] = {}

        (
            design_matrix,
            tuple_to_ind,
        ) = self._create_full_design_matrix_and_tuple_to_ind_dict(
            variables_and_levels=tuple(self.variables_and_levels), degree=self.degree
        )

        self.design_matrix = design_matrix
        self.tuple_to_ind = tuple_to_ind

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self):
        return gym.spaces.Box(
            low=min(LightHouseEnvironment.SPACE_LEVELS),
            high=max(LightHouseEnvironment.SPACE_LEVELS),
            shape=(
                len(
                    self.view_tuple_to_design_array(
                        (0,) * len(self.variables_and_levels)
                    )
                ),
            ),
            dtype=int,
        )

    def view_tuple_to_design_array(self, view_tuple: Tuple):
        return np.array(
            self.design_matrix[self.tuple_to_ind[view_tuple], :], dtype=np.float32
        )

    @classmethod
    def output_dim(cls, world_dim: int):
        return ((3 if world_dim == 1 else 4) ** (2**world_dim)) * (
            2 * world_dim + 1
        ) ** 2

    @classmethod
    def _create_full_design_matrix_and_tuple_to_ind_dict(
        cls, variables_and_levels: Sequence[Tuple[str, Sequence[int]]], degree: int
    ):
        variables_and_levels = tuple((x, tuple(y)) for x, y in variables_and_levels)
        key = (variables_and_levels, degree)
        if key not in cls._DESIGN_MAT_CACHE:
            all_tuples = [
                tuple(x)
                for x in itertools.product(
                    *[levels for _, levels in variables_and_levels]
                )
            ]

            tuple_to_ind = {}
            for i, t in enumerate(all_tuples):
                tuple_to_ind[t] = i

            df = pd.DataFrame(
                data=all_tuples,
                columns=[var_name for var_name, _ in variables_and_levels],
            )

            cls._DESIGN_MAT_CACHE[key] = (
                np.array(
                    1.0
                    * patsy.dmatrix(
                        cls._create_formula(
                            variables_and_levels=variables_and_levels, degree=degree
                        ),
                        data=df,
                    ),
                    dtype=bool,
                ),
                tuple_to_ind,
            )
        return cls._DESIGN_MAT_CACHE[key]

    @staticmethod
    def _get_variables_and_levels(world_dim: int):
        return (
            [
                ("s{}".format(i), list(range(3 if world_dim == 1 else 4)))
                for i in range(2**world_dim)
            ]
            + [("b{}".format(i), list(range(2 * world_dim + 1))) for i in range(1)]
            + [("a{}".format(i), list(range(2 * world_dim + 1))) for i in range(1)]
        )

    @classmethod
    def _create_formula(
        cls, variables_and_levels: Sequence[Tuple[str, Sequence[int]]], degree: int
    ):
        def make_categorial(var_name, levels):
            return "C({}, levels={})".format(var_name, levels)

        if degree == -1:
            return ":".join(
                make_categorial(var_name, levels)
                for var_name, levels in variables_and_levels
            )
        else:
            return "({})**{}".format(
                "+".join(
                    make_categorial(var_name, levels)
                    for var_name, levels in variables_and_levels
                ),
                degree,
            )

    def get_observation(
        self,
        env: LightHouseEnvironment,
        task: Optional[Task],
        *args: Any,
        **kwargs: Any
    ) -> Any:
        kwargs["as_tuple"] = True
        view_array = self.corner_sensor.get_observation(env, task, *args, **kwargs)
        return self.view_tuple_to_design_array(tuple(view_array))


================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_tasks.py
================================================
import abc
import string
from typing import List, Dict, Any, Optional, Tuple, Union, Sequence, cast

import gym
import numpy as np
from gym.utils import seeding

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.base_abstractions.task import Task, TaskSampler
from allenact.utils.experiment_utils import set_seed
from allenact.utils.system import get_logger
from allenact_plugins.lighthouse_plugin.lighthouse_environment import (
    LightHouseEnvironment,
)
from allenact_plugins.lighthouse_plugin.lighthouse_sensors import get_corner_observation

DISCOUNT_FACTOR = 0.99
STEP_PENALTY = -0.01
FOUND_TARGET_REWARD = 1.0


class LightHouseTask(Task[LightHouseEnvironment], abc.ABC):
    """Defines an abstract embodied task in the light house gridworld.

    # Attributes

    env : The light house environment.
    sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer.
    task_info : Dictionary of (k, v) pairs defining task goals and other task information.
    max_steps : The maximum number of steps an agent can take an in the task before it is considered failed.
    observation_space: The observation space returned on each step from the sensors.
    """

    def __init__(
        self,
        env: LightHouseEnvironment,
        sensors: Union[SensorSuite, List[Sensor]],
        task_info: Dict[str, Any],
        max_steps: int,
        **kwargs,
    ) -> None:
        """Initializer.

        See class documentation for parameter definitions.
        """
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )

        self._last_action: Optional[int] = None

    @property
    def last_action(self) -> int:
        return self._last_action

    @last_action.setter
    def last_action(self, value: int):
        self._last_action = value

    def step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        self.last_action = action
        return super(LightHouseTask, self).step(action=action)

    def render(self, mode: str = "array", *args, **kwargs) -> np.ndarray:
        if mode == "array":
            return self.env.render(mode, **kwargs)
        elif mode in ["rgb", "rgb_array", "human"]:
            arr = self.env.render("array", **kwargs)
            colors = np.array(
                [
                    (31, 119, 180),
                    (255, 127, 14),
                    (44, 160, 44),
                    (214, 39, 40),
                    (148, 103, 189),
                    (140, 86, 75),
                    (227, 119, 194),
                    (127, 127, 127),
                    (188, 189, 34),
                    (23, 190, 207),
                ],
                dtype=np.uint8,
            )
            return colors[arr]
        else:
            raise NotImplementedError("Render mode '{}' is not supported.".format(mode))


class FindGoalLightHouseTask(LightHouseTask):
    _CACHED_ACTION_NAMES: Dict[int, Tuple[str, ...]] = {}

    def __init__(
        self,
        env: LightHouseEnvironment,
        sensors: Union[SensorSuite, List[Sensor]],
        task_info: Dict[str, Any],
        max_steps: int,
        **kwargs,
    ):
        super().__init__(env, sensors, task_info, max_steps, **kwargs)

        self._found_target = False

    @property
    def action_space(self) -> gym.spaces.Discrete:
        return gym.spaces.Discrete(2 * self.env.world_dim)

    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        self.env.step(action)
        reward = STEP_PENALTY

        if np.all(self.env.current_position == self.env.goal_position):
            self._found_target = True
            reward += FOUND_TARGET_REWARD
        elif self.num_steps_taken() == self.max_steps - 1:
            reward = STEP_PENALTY / (1 - DISCOUNT_FACTOR)

        return RLStepResult(
            observation=self.get_observations(),
            reward=reward,
            done=self.is_done(),
            info=None,
        )

    def reached_terminal_state(self) -> bool:
        return self._found_target

    @classmethod
    def class_action_names(cls, world_dim: int = 2, **kwargs) -> Tuple[str, ...]:
        assert 1 <= world_dim <= 26, "Too many dimensions."
        if world_dim not in cls._CACHED_ACTION_NAMES:
            action_names = [
                "{}(+1)".format(string.ascii_lowercase[i] for i in range(world_dim))
            ]
            action_names.extend(
                "{}(-1)".format(string.ascii_lowercase[i] for i in range(world_dim))
            )
            cls._CACHED_ACTION_NAMES[world_dim] = tuple(action_names)

        return cls._CACHED_ACTION_NAMES[world_dim]

    def action_names(self) -> Tuple[str, ...]:
        return self.class_action_names(world_dim=self.env.world_dim)

    def close(self) -> None:
        pass

    def query_expert(
        self,
        expert_view_radius: int,
        return_policy: bool = False,
        deterministic: bool = False,
        **kwargs,
    ) -> Tuple[Any, bool]:
        view_tuple = get_corner_observation(
            env=self.env,
            view_radius=expert_view_radius,
            view_corner_offsets=None,
        )

        goal = self.env.GOAL
        wrong = self.env.WRONG_CORNER

        if self.env.world_dim == 1:
            left_view, right_view, hitting, last_action = view_tuple

            left = 1
            right = 0

            expert_action: Optional[int] = None
            policy: Optional[np.ndarray] = None

            if left_view == goal:
                expert_action = left
            elif right_view == goal:
                expert_action = right
            elif hitting != 2 * self.env.world_dim:
                expert_action = left if last_action == right else right
            elif left_view == wrong:
                expert_action = right
            elif right_view == wrong:
                expert_action = left
            elif last_action == 2 * self.env.world_dim:
                policy = np.array([0.5, 0.5])
            else:
                expert_action = last_action

            if policy is None:
                policy = np.array([expert_action == right, expert_action == left])

        elif self.env.world_dim == 2:

            tl, tr, bl, br, hitting, last_action = view_tuple

            wall = self.env.WALL

            d, r, u, l, none = 0, 1, 2, 3, 4

            if tr == goal:
                if hitting != r:
                    expert_action = r
                else:
                    expert_action = u
            elif br == goal:
                if hitting != d:
                    expert_action = d
                else:
                    expert_action = r
            elif bl == goal:
                if hitting != l:
                    expert_action = l
                else:
                    expert_action = d
            elif tl == goal:
                if hitting != u:
                    expert_action = u
                else:
                    expert_action = l

            elif tr == wrong and not any(x == wrong for x in [br, bl, tl]):
                expert_action = l
            elif br == wrong and not any(x == wrong for x in [bl, tl, tr]):
                expert_action = u
            elif bl == wrong and not any(x == wrong for x in [tl, tr, br]):
                expert_action = r
            elif tl == wrong and not any(x == wrong for x in [tr, br, bl]):
                expert_action = d

            elif all(x == wrong for x in [tr, br]) and not any(
                x == wrong for x in [bl, tl]
            ):
                expert_action = l
            elif all(x == wrong for x in [br, bl]) and not any(
                x == wrong for x in [tl, tr]
            ):
                expert_action = u

            elif all(x == wrong for x in [bl, tl]) and not any(
                x == wrong for x in [tr, br]
            ):
                expert_action = r
            elif all(x == wrong for x in [tl, tr]) and not any(
                x == wrong for x in [br, bl]
            ):
                expert_action = d

            elif hitting != none and tr == br == bl == tl:
                # Only possible if in 0 vis setting
                if tr == self.env.WRONG_CORNER or last_action == hitting:
                    if last_action == r:
                        expert_action = u
                    elif last_action == u:
                        expert_action = l
                    elif last_action == l:
                        expert_action = d
                    elif last_action == d:
                        expert_action = r
                    else:
                        raise NotImplementedError()
                else:
                    expert_action = last_action

            elif last_action == r and tr == wall:
                expert_action = u

            elif last_action == u and tl == wall:
                expert_action = l

            elif last_action == l and bl == wall:
                expert_action = d

            elif last_action == d and br == wall:
                expert_action = r

            elif last_action == none:
                expert_action = r

            else:
                expert_action = last_action

            policy = np.array(
                [
                    expert_action == d,
                    expert_action == r,
                    expert_action == u,
                    expert_action == l,
                ]
            )
        else:
            raise NotImplementedError("Can only query expert for world dims of 1 or 2.")

        if return_policy:
            return policy, True
        elif deterministic:
            return int(np.argmax(policy)), True
        else:
            return (
                int(np.argmax(np.random.multinomial(1, policy / (1.0 * policy.sum())))),
                True,
            )


class FindGoalLightHouseTaskSampler(TaskSampler):
    def __init__(
        self,
        world_dim: int,
        world_radius: int,
        sensors: Union[SensorSuite, List[Sensor]],
        max_steps: int,
        max_tasks: Optional[int] = None,
        num_unique_seeds: Optional[int] = None,
        task_seeds_list: Optional[List[int]] = None,
        deterministic_sampling: bool = False,
        seed: Optional[int] = None,
        **kwargs,
    ):
        self.env = LightHouseEnvironment(world_dim=world_dim, world_radius=world_radius)

        self._last_sampled_task: Optional[FindGoalLightHouseTask] = None
        self.sensors = (
            SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors
        )
        self.max_steps = max_steps
        self.max_tasks = max_tasks
        self.num_tasks_generated = 0
        self.deterministic_sampling = deterministic_sampling

        self.num_unique_seeds = num_unique_seeds
        self.task_seeds_list = task_seeds_list
        assert (self.num_unique_seeds is None) or (
            0 < self.num_unique_seeds
        ), "`num_unique_seeds` must be a positive integer."

        self.num_unique_seeds = num_unique_seeds
        self.task_seeds_list = task_seeds_list
        if self.task_seeds_list is not None:
            if self.num_unique_seeds is not None:
                assert self.num_unique_seeds == len(
                    self.task_seeds_list
                ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
            self.num_unique_seeds = len(self.task_seeds_list)
        elif self.num_unique_seeds is not None:
            self.task_seeds_list = list(range(self.num_unique_seeds))

        assert (not deterministic_sampling) or (
            self.num_unique_seeds is not None
        ), "Cannot use deterministic sampling when `num_unique_seeds` is `None`."

        if (not deterministic_sampling) and self.max_tasks:
            get_logger().warning(
                "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
                " this might be a mistake when running testing."
            )

        self.seed: int = int(
            seed if seed is not None else np.random.randint(0, 2**31 - 1)
        )
        self.np_seeded_random_gen: Optional[np.random.RandomState] = None
        self.set_seed(self.seed)

    @property
    def world_dim(self):
        return self.env.world_dim

    @property
    def world_radius(self):
        return self.env.world_radius

    @property
    def length(self) -> Union[int, float]:
        return (
            float("inf")
            if self.max_tasks is None
            else self.max_tasks - self.num_tasks_generated
        )

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        n = 2**self.world_dim
        return n if self.num_unique_seeds is None else min(n, self.num_unique_seeds)

    @property
    def last_sampled_task(self) -> Optional[Task]:
        return self._last_sampled_task

    def next_task(self, force_advance_scene: bool = False) -> Optional[Task]:
        if self.length <= 0:
            return None

        if self.num_unique_seeds is not None:
            if self.deterministic_sampling:
                seed = self.task_seeds_list[
                    self.num_tasks_generated % len(self.task_seeds_list)
                ]
            else:
                seed = self.np_seeded_random_gen.choice(self.task_seeds_list)
        else:
            seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)

        self.num_tasks_generated += 1

        self.env.set_seed(seed)
        self.env.random_reset()
        return FindGoalLightHouseTask(
            env=self.env, sensors=self.sensors, task_info={}, max_steps=self.max_steps
        )

    def close(self) -> None:
        pass

    @property
    def all_observation_spaces_equal(self) -> bool:
        return True

    def reset(self) -> None:
        self.num_tasks_generated = 0
        self.set_seed(seed=self.seed)

    def set_seed(self, seed: int) -> None:
        set_seed(seed)
        self.np_seeded_random_gen, _ = seeding.np_random(seed)
        self.seed = seed


================================================
FILE: allenact_plugins/lighthouse_plugin/lighthouse_util.py
================================================
import numpy as np

from allenact.utils.experiment_utils import EarlyStoppingCriterion, ScalarMeanTracker


class StopIfNearOptimal(EarlyStoppingCriterion):
    def __init__(self, optimal: float, deviation: float, min_memory_size: int = 100):
        self.optimal = optimal
        self.deviation = deviation

        self.current_pos = 0
        self.has_filled = False
        self.memory: np.ndarray = np.zeros(min_memory_size)

    def __call__(
        self,
        stage_steps: int,
        total_steps: int,
        training_metrics: ScalarMeanTracker,
    ) -> bool:
        sums = training_metrics.sums()
        counts = training_metrics.counts()

        k = "ep_length"
        if k in sums:
            count = counts[k]
            ep_length_ave = sums[k] / count

            n = self.memory.shape[0]
            if count >= n:
                if count > n:
                    # Increase memory size to fit all of the new values
                    self.memory = np.full(count, fill_value=ep_length_ave)
                else:
                    # We have exactly as many values as the memory size,
                    # simply set the whole memory to be equal to the new
                    # average ep length.
                    self.memory[:] = ep_length_ave
                self.current_pos = 0
                self.has_filled = True
            else:
                self.memory[self.current_pos : (self.current_pos + count)] = (
                    ep_length_ave
                )

                if self.current_pos + count > n:
                    self.has_filled = True
                    self.current_pos = self.current_pos + count % n
                    self.memory[: self.current_pos] = ep_length_ave

        if not self.has_filled:
            return False

        return self.memory.mean() < self.optimal + self.deviation


================================================
FILE: allenact_plugins/lighthouse_plugin/scripts/__init__.py
================================================


================================================
FILE: allenact_plugins/manipulathor_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker

with ImportChecker(
    "Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`)."
):
    # noinspection PyUnresolvedReferences
    import ai2thor


================================================
FILE: allenact_plugins/manipulathor_plugin/arm_calculation_utils.py
================================================
"""Utility classes and functions for calculating the arm relative and absolute
position."""

from typing import Dict

import numpy as np
import torch
from scipy.spatial.transform import Rotation as R

from allenact.utils.system import get_logger


def state_dict_to_tensor(state: Dict):
    result = []
    if "position" in state:
        result += [
            state["position"]["x"],
            state["position"]["y"],
            state["position"]["z"],
        ]
    if "rotation" in state:
        result += [
            state["rotation"]["x"],
            state["rotation"]["y"],
            state["rotation"]["z"],
        ]
    return torch.Tensor(result)


def diff_position(state_goal, state_curr, absolute: bool = True):
    p1 = state_goal["position"]
    p2 = state_curr["position"]
    if absolute:
        result = {k: abs(p1[k] - p2[k]) for k in p1.keys()}
    else:
        result = {k: (p1[k] - p2[k]) for k in p1.keys()}
    return result


def coord_system_transform(position: Dict, coord_system: str):
    assert coord_system in [
        "xyz_unsigned",
        "xyz_signed",
        "polar_radian",
        "polar_trigo",
    ]

    if "xyz" in coord_system:
        result = [
            position["x"],
            position["y"],
            position["z"],
        ]
        result = torch.Tensor(result)
        if coord_system == "xyz_unsigned":
            return torch.abs(result)
        else:  # xyz_signed
            return result

    else:
        hxy = np.hypot(position["x"], position["y"])
        r = np.hypot(hxy, position["z"])
        el = np.arctan2(position["z"], hxy)  # elevation angle: [-pi/2, pi/2]
        az = np.arctan2(position["y"], position["x"])  # azimuthal angle: [-pi, pi]

        if coord_system == "polar_radian":
            result = [
                r,
                el / (0.5 * np.pi),
                az / np.pi,
            ]  # normalize to [-1, 1]
            return torch.Tensor(result)
        else:  # polar_trigo
            result = [
                r,
                np.cos(el),
                np.sin(el),
                np.cos(az),
                np.sin(az),
            ]
            return torch.Tensor(result)


def position_rotation_to_matrix(position, rotation):
    result = np.zeros((4, 4))
    r = R.from_euler("xyz", [rotation["x"], rotation["y"], rotation["z"]], degrees=True)
    result[:3, :3] = r.as_matrix()
    result[3, 3] = 1
    result[:3, 3] = [position["x"], position["y"], position["z"]]
    return result


def inverse_rot_trans_matrix(mat):
    mat = np.linalg.inv(mat)
    return mat


def matrix_to_position_rotation(matrix):
    result = {"position": None, "rotation": None}
    rotation = R.from_matrix(matrix[:3, :3]).as_euler("xyz", degrees=True)
    rotation_dict = {"x": rotation[0], "y": rotation[1], "z": rotation[2]}
    result["rotation"] = rotation_dict
    position = matrix[:3, 3]
    result["position"] = {"x": position[0], "y": position[1], "z": position[2]}
    return result


def find_closest_inverse(deg, use_cache):
    if use_cache:
        for k in _saved_inverse_rotation_mats.keys():
            if abs(k - deg) < 5:
                return _saved_inverse_rotation_mats[k]
    # if it reaches here it means it had not calculated the degree before
    rotation = R.from_euler("xyz", [0, deg, 0], degrees=True)
    result = rotation.as_matrix()
    inverse = inverse_rot_trans_matrix(result)
    if use_cache:
        get_logger().warning(f"Had to calculate the matrix for {deg}")
    return inverse


def calc_inverse(deg):
    rotation = R.from_euler("xyz", [0, deg, 0], degrees=True)
    result = rotation.as_matrix()
    inverse = inverse_rot_trans_matrix(result)
    return inverse


_saved_inverse_rotation_mats = {i: calc_inverse(i) for i in range(0, 360, 45)}
_saved_inverse_rotation_mats[360] = _saved_inverse_rotation_mats[0]


def world_coords_to_agent_coords(world_obj, agent_state, use_cache=True):
    position = agent_state["position"]
    rotation = agent_state["rotation"]
    agent_translation = [position["x"], position["y"], position["z"]]
    assert abs(rotation["x"]) < 0.01 and abs(rotation["z"]) < 0.01
    inverse_agent_rotation = find_closest_inverse(rotation["y"], use_cache=use_cache)
    obj_matrix = position_rotation_to_matrix(
        world_obj["position"], world_obj["rotation"]
    )
    obj_translation = np.matmul(
        inverse_agent_rotation, (obj_matrix[:3, 3] - agent_translation)
    )
    # add rotation later
    obj_matrix[:3, 3] = obj_translation
    result = matrix_to_position_rotation(obj_matrix)
    return result


================================================
FILE: allenact_plugins/manipulathor_plugin/armpointnav_constants.py
================================================
import json
import os
from typing import Dict, Optional, Any

from constants import ABS_PATH_OF_TOP_LEVEL_DIR

TRAIN_OBJECTS = ["Apple", "Bread", "Tomato", "Lettuce", "Pot", "Mug"]
TEST_OBJECTS = ["Potato", "SoapBottle", "Pan", "Egg", "Spatula", "Cup"]
MOVE_ARM_CONSTANT = 0.05
MOVE_ARM_HEIGHT_CONSTANT = MOVE_ARM_CONSTANT
UNWANTED_MOVE_THR = 0.01
DISTANCE_EPS = 1e-9
DISTANCE_MAX = 10.0

dataset_json_file = os.path.join(
    ABS_PATH_OF_TOP_LEVEL_DIR, "datasets", "apnd-dataset", "starting_pose.json"
)

_ARM_START_POSITIONS: Optional[Dict[str, Any]] = None


def get_agent_start_positions():
    global _ARM_START_POSITIONS
    if _ARM_START_POSITIONS is not None:
        try:
            with open(dataset_json_file) as f:
                _ARM_START_POSITIONS = json.load(f)
        except Exception:
            raise Exception(f"Dataset not found in {dataset_json_file}")

    return _ARM_START_POSITIONS


================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_constants.py
================================================
"""Constant values and hyperparameters that are used by the environment."""

import ai2thor.fifo_server


ARM_MIN_HEIGHT = 0.450998873
ARM_MAX_HEIGHT = 1.8009994


ADDITIONAL_ARM_ARGS = {
    "disableRendering": True,
    "returnToStart": True,
    "speed": 1,
}

MOVE_AHEAD = "MoveAheadContinuous"
MOVE_BACK = "MoveBackContinuous"
ROTATE_LEFT = "RotateLeftContinuous"
ROTATE_RIGHT = "RotateRightContinuous"
MOVE_ARM_HEIGHT_P = "MoveArmHeightP"
MOVE_ARM_HEIGHT_M = "MoveArmHeightM"
MOVE_ARM_X_P = "MoveArmXP"
MOVE_ARM_X_M = "MoveArmXM"
MOVE_ARM_Y_P = "MoveArmYP"
MOVE_ARM_Y_M = "MoveArmYM"
MOVE_ARM_Z_P = "MoveArmZP"
MOVE_ARM_Z_M = "MoveArmZM"
ROTATE_WRIST_PITCH_P = "RotateArmWristPitchP"
ROTATE_WRIST_PITCH_M = "RotateArmWristPitchM"
ROTATE_WRIST_YAW_P = "RotateArmWristYawP"
ROTATE_WRIST_YAW_M = "RotateArmWristYawM"
ROTATE_WRIST_ROLL_P = "RotateArmWristRollP"
ROTATE_WRIST_ROLL_M = "RotateArmWristRollM"
ROTATE_ELBOW_P = "RotateArmElbowP"
ROTATE_ELBOW_M = "RotateArmElbowM"
LOOK_UP = "LookUp"
LOOK_DOWN = "LookDown"
PICKUP = "PickUpMidLevel"
DROP = "DropMidLevel"
DONE = "DoneMidLevel"


ENV_ARGS = dict(
    gridSize=0.25,
    width=224,
    height=224,
    visibilityDistance=1.0,
    agentMode="arm",
    fieldOfView=100,
    agentControllerType="mid-level",
    server_class=ai2thor.fifo_server.FifoServer,
    useMassThreshold=True,
    massThreshold=10,
    autoSimulation=False,
    autoSyncTransforms=True,
)

VALID_OBJECT_LIST = [
    "Knife",
    "Bread",
    "Fork",
    "Potato",
    "SoapBottle",
    "Pan",
    "Plate",
    "Tomato",
    "Egg",
    "Pot",
    "Spatula",
    "Cup",
    "Bowl",
    "SaltShaker",
    "PepperShaker",
    "Lettuce",
    "ButterKnife",
    "Apple",
    "DishSponge",
    "Spoon",
    "Mug",
]


================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_environment.py
================================================
"""A wrapper for engaging with the ManipulaTHOR environment."""

import copy
import math
import warnings
from typing import Dict, Union, Any, Optional, cast

import ai2thor.server
import numpy as np
from ai2thor.controller import Controller

from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.manipulathor_plugin.armpointnav_constants import (
    MOVE_ARM_HEIGHT_CONSTANT,
    MOVE_ARM_CONSTANT,
    UNWANTED_MOVE_THR,
    DISTANCE_MAX,
)
from allenact_plugins.manipulathor_plugin.manipulathor_constants import (
    ADDITIONAL_ARM_ARGS,
    ARM_MIN_HEIGHT,
    ARM_MAX_HEIGHT,
)
from allenact_plugins.manipulathor_plugin.manipulathor_constants import (
    ROTATE_WRIST_PITCH_P,
    ROTATE_WRIST_PITCH_M,
    ROTATE_WRIST_YAW_P,
    ROTATE_WRIST_YAW_M,
    ROTATE_ELBOW_P,
    ROTATE_ELBOW_M,
    LOOK_UP,
    LOOK_DOWN,
    MOVE_AHEAD,
    ROTATE_RIGHT,
    ROTATE_LEFT,
    PICKUP,
    DONE,
)
from allenact_plugins.manipulathor_plugin.manipulathor_utils import (
    reset_environment_and_additional_commands,
)


def position_distance(s1, s2, filter_nan: bool = False):
    position1 = s1["position"]
    position2 = s2["position"]
    dist = (
        (position1["x"] - position2["x"]) ** 2
        + (position1["y"] - position2["y"]) ** 2
        + (position1["z"] - position2["z"]) ** 2
    ) ** 0.5
    if filter_nan:
        dist = DISTANCE_MAX if math.isnan(dist) or dist > DISTANCE_MAX else dist
    return dist


def rotation_distance(s1: Dict[str, Dict[str, float]], s2: Dict[str, Dict[str, float]]):
    """Distance between rotations."""
    rotation1 = s1["rotation"]
    rotation2 = s2["rotation"]

    def deg_dist(d0: float, d1: float):
        dist = (d0 - d1) % 360
        return min(dist, 360 - dist)

    return sum(deg_dist(rotation1[k], rotation2[k]) for k in ["x", "y", "z"])


class ManipulaTHOREnvironment(IThorEnvironment):
    """Wrapper for the manipulathor controller providing arm functionality and
    bookkeeping.

    See [here](https://ai2thor.allenai.org/documentation/installation) for comprehensive
     documentation on AI2-THOR.

    # Attributes

    controller : The ai2thor controller.
    """

    def __init__(
        self,
        x_display: Optional[str] = None,
        docker_enabled: bool = False,
        local_thor_build: Optional[str] = None,
        visibility_distance: float = VISIBILITY_DISTANCE,
        fov: float = FOV,
        player_screen_width: int = 224,
        player_screen_height: int = 224,
        quality: str = "Very Low",
        restrict_to_initially_reachable_points: bool = False,
        make_agents_visible: bool = True,
        object_open_speed: float = 1.0,
        simplify_physics: bool = False,
        verbose: bool = False,
        env_args=None,
    ) -> None:
        """Initializer.

        # Parameters

        x_display : The x display into which to launch ai2thor (possibly necessarily if you are running on a server
            without an attached display).
        docker_enabled : Whether or not to run thor in a docker container (useful on a server without an attached
            display so that you don't have to start an x display).
        local_thor_build : The path to a local build of ai2thor. This is probably not necessary for your use case
            and can be safely ignored.
        visibility_distance : The distance (in meters) at which objects, in the viewport of the agent,
            are considered visible by ai2thor and will have their "visible" flag be set to `True` in the metadata.
        fov : The agent's camera's field of view.
        width : The width resolution (in pixels) of the images returned by ai2thor.
        height : The height resolution (in pixels) of the images returned by ai2thor.
        quality : The quality at which to render. Possible quality settings can be found in
            `ai2thor._quality_settings.QUALITY_SETTINGS`.
        restrict_to_initially_reachable_points : Whether or not to restrict the agent to locations in ai2thor
            that were found to be (initially) reachable by the agent (i.e. reachable by the agent after resetting
            the scene). This can be useful if you want to ensure there are only a fixed set of locations where the
            agent can go.
        make_agents_visible : Whether or not the agent should be visible. Most noticable when there are multiple agents
            or when quality settings are high so that the agent casts a shadow.
        object_open_speed : How quickly objects should be opened. High speeds mean faster simulation but also mean
            that opening objects have a lot of kinetic energy and can, possibly, knock other objects away.
        simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object
            interactions when opening drawers (when simplified, objects within a drawer do not slide around on
            their own when the drawer is opened or closed, instead they are effectively glued down).
        """
        self._verbose = verbose
        self.env_args = env_args
        del verbose
        del env_args
        super(ManipulaTHOREnvironment, self).__init__(
            **prepare_locals_for_super(locals())
        )

    def create_controller(self):
        controller = Controller(**self.env_args)

        return controller

    def start(
        self,
        scene_name: Optional[str],
        move_mag: float = 0.25,
        **kwargs,
    ) -> None:
        """Starts the ai2thor controller if it was previously stopped.

        After starting, `reset` will be called with the scene name and move magnitude.

        # Parameters

        scene_name : The scene to load.
        move_mag : The amount of distance the agent moves in a single `MoveAhead` step.
        kwargs : additional kwargs, passed to reset.
        """
        if self._started:
            raise RuntimeError(
                "Trying to start the environment but it is already started."
            )

        self.controller = self.create_controller()

        self._started = True
        self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs)

    def reset(
        self,
        scene_name: Optional[str],
        move_mag: float = 0.25,
        **kwargs,
    ):
        self._move_mag = move_mag
        self._grid_size = self._move_mag

        if scene_name is None:
            scene_name = self.controller.last_event.metadata["sceneName"]
        # self.reset_init_params()#**kwargs) removing this fixes one of the crashing problem

        # to solve the crash issue
        # TODO why do we still have this crashing problem?
        try:
            reset_environment_and_additional_commands(self.controller, scene_name)
        except Exception as e:
            print("RESETTING THE SCENE,", scene_name, "because of", str(e))
            self.controller = ai2thor.controller.Controller(**self.env_args)
            reset_environment_and_additional_commands(self.controller, scene_name)

        if self.object_open_speed != 1.0:
            self.controller.step(
                {"action": "ChangeOpenSpeed", "x": self.object_open_speed}
            )

        self._initially_reachable_points = None
        self._initially_reachable_points_set = None
        self.controller.step({"action": "GetReachablePositions"})
        if not self.controller.last_event.metadata["lastActionSuccess"]:
            warnings.warn(
                "Error when getting reachable points: {}".format(
                    self.controller.last_event.metadata["errorMessage"]
                )
            )
        self._initially_reachable_points = self.last_action_return

        self.list_of_actions_so_far = []

    def randomize_agent_location(
        self, seed: int = None, partial_position: Optional[Dict[str, float]] = None
    ) -> Dict:
        raise NotImplementedError

    def is_object_at_low_level_hand(self, object_id):
        current_objects_in_hand = self.controller.last_event.metadata["arm"][
            "heldObjects"
        ]
        return object_id in current_objects_in_hand

    def object_in_hand(self):
        """Object metadata for the object in the agent's hand."""
        inv_objs = self.last_event.metadata["inventoryObjects"]
        if len(inv_objs) == 0:
            return None
        elif len(inv_objs) == 1:
            return self.get_object_by_id(
                self.last_event.metadata["inventoryObjects"][0]["objectId"]
            )
        else:
            raise AttributeError("Must be <= 1 inventory objects.")

    @classmethod
    def correct_nan_inf(cls, flawed_dict, extra_tag=""):
        corrected_dict = copy.deepcopy(flawed_dict)
        for k, v in corrected_dict.items():
            if math.isnan(v) or math.isinf(v):
                corrected_dict[k] = 0
        return corrected_dict

    def get_object_by_id(self, object_id: str) -> Optional[Dict[str, Any]]:
        for o in self.last_event.metadata["objects"]:
            if o["objectId"] == object_id:
                o["position"] = self.correct_nan_inf(o["position"], "obj id")
                return o
        return None

    def get_current_arm_state(self):
        h_min = ARM_MIN_HEIGHT
        h_max = ARM_MAX_HEIGHT
        agent_base_location = 0.9009995460510254
        event = self.controller.last_event
        offset = event.metadata["agent"]["position"]["y"] - agent_base_location
        h_max += offset
        h_min += offset
        joints = event.metadata["arm"]["joints"]
        arm = joints[-1]
        assert arm["name"] == "robot_arm_4_jnt"
        xyz_dict = copy.deepcopy(arm["rootRelativePosition"])
        height_arm = joints[0]["position"]["y"]
        xyz_dict["h"] = (height_arm - h_min) / (h_max - h_min)
        xyz_dict = self.correct_nan_inf(xyz_dict, "realtive hand")
        return xyz_dict

    def get_absolute_hand_state(self):
        event = self.controller.last_event
        joints = event.metadata["arm"]["joints"]
        arm = copy.deepcopy(joints[-1])
        assert arm["name"] == "robot_arm_4_jnt"
        xyz_dict = arm["position"]
        xyz_dict = self.correct_nan_inf(xyz_dict, "absolute hand")
        return dict(position=xyz_dict, rotation={"x": 0, "y": 0, "z": 0})

    def get_pickupable_objects(self):

        event = self.controller.last_event
        object_list = event.metadata["arm"]["pickupableObjects"]

        return object_list

    def get_current_object_locations(self):
        obj_loc_dict = {}
        metadata = self.controller.last_event.metadata["objects"]
        for o in metadata:
            obj_loc_dict[o["objectId"]] = dict(
                position=o["position"],
                rotation=o["rotation"],
                visible=o["visible"],
            )
        return copy.deepcopy(obj_loc_dict)

    def close_enough(self, current_obj_pose, init_obj_pose, threshold):
        position_close = [
            abs(current_obj_pose["position"][k] - init_obj_pose["position"][k])
            <= threshold
            for k in ["x", "y", "z"]
        ]
        position_is_close = sum(position_close) == 3
        return position_is_close

    def get_objects_moved(
        self,
        previous_object_locations,
        current_object_locations,
        target_object_id,
        thres_dict: Optional[Dict] = None,
    ):
        moved_objects = []
        scene_id = self.scene_name.split("_")[0]

        for object_id in current_object_locations.keys():
            if object_id == target_object_id:
                continue
            if object_id not in previous_object_locations:
                continue

            threshold = UNWANTED_MOVE_THR
            if thres_dict is not None:
                threshold = max(threshold, thres_dict[scene_id + "-" + object_id])

            if not self.close_enough(
                current_object_locations[object_id],
                previous_object_locations[object_id],
                threshold=threshold,
            ):
                moved_objects.append(object_id)

        return moved_objects

    def get_objects_move_distance(
        self,
        initial_object_locations,
        previous_object_locations,
        current_object_locations,
        target_object_id,
        only_visible: bool = False,
        thres_dict: Optional[Dict] = None,
    ):
        moved_objects_position_distance = {}
        scene_id = self.scene_name.split("_")[0]

        for object_id in current_object_locations.keys():
            if object_id == target_object_id:
                continue
            if object_id not in previous_object_locations:
                continue
            if only_visible:
                # current is visible
                if not current_object_locations[object_id]["visible"]:
                    continue

            p_initial2current = position_distance(
                current_object_locations[object_id],
                initial_object_locations[object_id],
                filter_nan=True,
            )
            p_initial2previous = position_distance(
                previous_object_locations[object_id],
                initial_object_locations[object_id],
                filter_nan=True,
            )

            threshold = 0.0
            if thres_dict is not None:
                threshold = max(threshold, thres_dict[scene_id + "-" + object_id])

            p_initial2current = max(0.0, p_initial2current - threshold)
            p_initial2previous = max(0.0, p_initial2previous - threshold)

            moved_objects_position_distance[object_id] = (
                p_initial2current - p_initial2previous
            )

        return sum(moved_objects_position_distance.values())

    def step(
        self, action_dict: Dict[str, Union[str, int, float]]
    ) -> ai2thor.server.Event:
        """Take a step in the ai2thor environment."""
        action = cast(str, action_dict["action"])

        skip_render = "renderImage" in action_dict and not action_dict["renderImage"]
        last_frame: Optional[np.ndarray] = None
        if skip_render:
            last_frame = self.current_frame

        if self.simplify_physics:
            action_dict["simplifyPhysics"] = True
        if action in [PICKUP, DONE]:
            if action == PICKUP:
                object_id = action_dict["object_id"]
                if not self.is_object_at_low_level_hand(object_id):
                    pickupable_objects = self.get_pickupable_objects()
                    #
                    if object_id in pickupable_objects:
                        # This version of the task is actually harder # consider making it easier, are we penalizing failed pickup? yes
                        self.step(dict(action="PickupObject"))
                        #  we are doing an additional pass here, label is not right and if we fail we will do it twice
                        object_inventory = self.controller.last_event.metadata["arm"][
                            "heldObjects"
                        ]
                        if (
                            len(object_inventory) > 0
                            and object_id not in object_inventory
                        ):
                            self.step(dict(action="ReleaseObject"))
            action_dict = {"action": "Pass"}

        elif action in [MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT]:
            copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)

            action_dict = {**action_dict, **copy_additions}
            if action in [MOVE_AHEAD]:
                action_dict["action"] = "MoveAgent"
                action_dict["ahead"] = 0.2

            elif action in [ROTATE_RIGHT]:
                action_dict["action"] = "RotateAgent"
                action_dict["degrees"] = 45

            elif action in [ROTATE_LEFT]:
                action_dict["action"] = "RotateAgent"
                action_dict["degrees"] = -45

        elif "MoveArm" in action:
            copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)
            action_dict = {**action_dict, **copy_additions}
            base_position = self.get_current_arm_state()
            if "MoveArmHeight" in action:
                action_dict["action"] = "MoveArmBase"

                if action == "MoveArmHeightP":
                    base_position["h"] += MOVE_ARM_HEIGHT_CONSTANT
                if action == "MoveArmHeightM":
                    base_position[
                        "h"
                    ] -= MOVE_ARM_HEIGHT_CONSTANT  # height is pretty big!
                action_dict["y"] = base_position["h"]
            else:
                action_dict["action"] = "MoveArm"
                if action == "MoveArmXP":
                    base_position["x"] += MOVE_ARM_CONSTANT
                elif action == "MoveArmXM":
                    base_position["x"] -= MOVE_ARM_CONSTANT
                elif action == "MoveArmYP":
                    base_position["y"] += MOVE_ARM_CONSTANT
                elif action == "MoveArmYM":
                    base_position["y"] -= MOVE_ARM_CONSTANT
                elif action == "MoveArmZP":
                    base_position["z"] += MOVE_ARM_CONSTANT
                elif action == "MoveArmZM":
                    base_position["z"] -= MOVE_ARM_CONSTANT
                action_dict["position"] = {
                    k: v for (k, v) in base_position.items() if k in ["x", "y", "z"]
                }

        elif "RotateArm" in action:
            copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)
            action_dict = {**action_dict, **copy_additions}

            if action == ROTATE_WRIST_PITCH_P:
                action_dict["action"] = "RotateWristRelative"
                action_dict["pitch"] = 15
            elif action == ROTATE_WRIST_PITCH_M:
                action_dict["action"] = "RotateWristRelative"
                action_dict["pitch"] = -15
            elif action == ROTATE_WRIST_YAW_P:
                action_dict["action"] = "RotateWristRelative"
                action_dict["yaw"] = 15
            elif action == ROTATE_WRIST_YAW_M:
                action_dict["action"] = "RotateWristRelative"
                action_dict["yaw"] = -15
            elif action == ROTATE_ELBOW_P:
                action_dict["action"] = "RotateElbowRelative"
                action_dict["degrees"] = 15
            elif action == ROTATE_ELBOW_M:
                action_dict["action"] = "RotateElbowRelative"
                action_dict["degrees"] = -15
            else:
                raise ValueError("invalid action " + str(action))

        elif action in [LOOK_UP, LOOK_DOWN]:
            copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)
            action_dict = {**action_dict, **copy_additions}
            if action == LOOK_UP:
                action_dict["action"] = LOOK_UP
            elif action == LOOK_DOWN:
                action_dict["action"] = LOOK_DOWN

        # there exists other actions e.g. "PlaceObjectAtPoint"

        sr = self.controller.step(action_dict)
        self.list_of_actions_so_far.append(action_dict)

        if self._verbose:
            print(self.controller.last_event)

        if self.restrict_to_initially_reachable_points:
            self._snap_agent_to_initially_reachable()

        if skip_render:
            assert last_frame is not None
            self.last_event.frame = last_frame

        return sr


================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_sensors.py
================================================
"""Utility classes and functions for sensory inputs used by the models."""

from typing import Any, Union, Optional

import gym
import numpy as np
from allenact.base_abstractions.sensor import Sensor
from allenact.embodiedai.sensors.vision_sensors import DepthSensor, RGBSensor
from allenact.base_abstractions.task import Task
from allenact.utils.misc_utils import prepare_locals_for_super

from allenact_plugins.manipulathor_plugin.arm_calculation_utils import (
    world_coords_to_agent_coords,
    state_dict_to_tensor,
    diff_position,
    coord_system_transform,
)
from allenact_plugins.manipulathor_plugin.manipulathor_environment import (
    ManipulaTHOREnvironment,
)


class DepthSensorThor(
    DepthSensor[
        Union[ManipulaTHOREnvironment],
        Union[Task[ManipulaTHOREnvironment]],
    ]
):
    """Sensor for Depth images in THOR.

    Returns from a running ManipulaTHOREnvironment instance, the current
    RGB frame corresponding to the agent's egocentric view.
    """

    def frame_from_env(
        self, env: ManipulaTHOREnvironment, task: Optional[Task]
    ) -> np.ndarray:
        return env.controller.last_event.depth_frame.copy()


class NoVisionSensorThor(
    RGBSensor[
        Union[ManipulaTHOREnvironment],
        Union[Task[ManipulaTHOREnvironment]],
    ]
):
    """Sensor for RGB images in THOR.

    Returns from a running ManipulaTHOREnvironment instance, the current
    RGB frame corresponding to the agent's egocentric view.
    """

    def frame_from_env(
        self, env: ManipulaTHOREnvironment, task: Optional[Task]
    ) -> np.ndarray:
        return np.zeros_like(env.current_frame)


class AgentRelativeCurrentObjectStateThorSensor(Sensor):
    def __init__(self, uuid: str = "relative_current_obj_state", **kwargs: Any):
        observation_space = gym.spaces.Box(
            low=-100, high=100, shape=(6,), dtype=np.float32
        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(
        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
    ) -> Any:
        object_id = task.task_info["objectId"]
        current_object_state = env.get_object_by_id(object_id)
        relative_current_obj = world_coords_to_agent_coords(
            current_object_state, env.controller.last_event.metadata["agent"]
        )
        result = state_dict_to_tensor(
            dict(
                position=relative_current_obj["position"],
                rotation=relative_current_obj["rotation"],
            )
        )
        return result


class RelativeObjectToGoalSensor(Sensor):
    def __init__(
        self,
        uuid: str = "relative_obj_to_goal",
        coord_system: str = "xyz_unsigned",
        **kwargs: Any
    ):
        assert coord_system in [
            "xyz_unsigned",
            "xyz_signed",
            "polar_radian",
            "polar_trigo",
        ]
        self.coord_system = coord_system
        if coord_system == "polar_trigo":
            obs_dim = 5
        else:
            obs_dim = 3
        observation_space = gym.spaces.Box(
            low=-100, high=100, shape=(obs_dim,), dtype=np.float32
        )
        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(
        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
    ) -> Any:
        goal_obj_id = task.task_info["objectId"]
        object_info = env.get_object_by_id(goal_obj_id)
        target_state = task.task_info["target_location"]

        agent_state = env.controller.last_event.metadata["agent"]

        relative_current_obj = world_coords_to_agent_coords(object_info, agent_state)
        relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)
        relative_distance = diff_position(
            relative_current_obj,
            relative_goal_state,
            absolute=False,
        )

        result = coord_system_transform(relative_distance, self.coord_system)
        return result


class InitialObjectToGoalSensor(Sensor):
    def __init__(self, uuid: str = "initial_obj_to_goal", **kwargs: Any):
        # observation_space = gym.spaces.Discrete(len(self.detector_types))
        observation_space = gym.spaces.Box(
            low=-100, high=100, shape=(3,), dtype=np.float32
        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(
        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
    ) -> Any:
        object_source_location = task.task_info["initial_object_location"]
        target_state = task.task_info["target_location"]
        agent_state = task.task_info["agent_initial_state"]

        relative_current_obj = world_coords_to_agent_coords(
            object_source_location, agent_state
        )
        relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)
        relative_distance = diff_position(relative_current_obj, relative_goal_state)
        result = state_dict_to_tensor(dict(position=relative_distance))
        return result


class DistanceObjectToGoalSensor(Sensor):
    def __init__(self, uuid: str = "distance_obj_to_goal", **kwargs: Any):
        # observation_space = gym.spaces.Discrete(len(self.detector_types))
        observation_space = gym.spaces.Box(
            low=-100, high=100, shape=(3,), dtype=np.float32
        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(
        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
    ) -> Any:
        goal_obj_id = task.task_info["objectId"]
        object_info = env.get_object_by_id(goal_obj_id)
        target_state = task.task_info["target_location"]

        agent_state = env.controller.last_event.metadata["agent"]

        relative_current_obj = world_coords_to_agent_coords(object_info, agent_state)
        relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)
        relative_distance = diff_position(relative_current_obj, relative_goal_state)
        result = state_dict_to_tensor(dict(position=relative_distance))

        result = ((result**2).sum() ** 0.5).view(1)
        return result


class RelativeAgentArmToObjectSensor(Sensor):
    def __init__(
        self,
        uuid: str = "relative_agent_arm_to_obj",
        coord_system: str = "xyz_unsigned",
        **kwargs: Any
    ):
        assert coord_system in [
            "xyz_unsigned",
            "xyz_signed",
            "polar_radian",
            "polar_trigo",
        ]
        self.coord_system = coord_system
        if coord_system == "polar_trigo":
            obs_dim = 5
        else:
            obs_dim = 3
        observation_space = gym.spaces.Box(
            low=-100, high=100, shape=(obs_dim,), dtype=np.float32
        )
        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(
        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
    ) -> Any:
        goal_obj_id = task.task_info["objectId"]
        object_info = env.get_object_by_id(goal_obj_id)
        hand_state = env.get_absolute_hand_state()

        relative_goal_obj = world_coords_to_agent_coords(
            object_info, env.controller.last_event.metadata["agent"]
        )
        relative_hand_state = world_coords_to_agent_coords(
            hand_state, env.controller.last_event.metadata["agent"]
        )
        relative_distance = diff_position(
            relative_goal_obj,
            relative_hand_state,
            absolute=False,
        )
        result = coord_system_transform(relative_distance, self.coord_system)
        return result


class InitialAgentArmToObjectSensor(Sensor):
    def __init__(self, uuid: str = "initial_agent_arm_to_obj", **kwargs: Any):
        observation_space = gym.spaces.Box(
            low=-100, high=100, shape=(3,), dtype=np.float32
        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(
        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
    ) -> Any:

        object_source_location = task.task_info["initial_object_location"]
        initial_hand_state = task.task_info["initial_hand_state"]

        relative_goal_obj = world_coords_to_agent_coords(
            object_source_location, env.controller.last_event.metadata["agent"]
        )
        relative_hand_state = world_coords_to_agent_coords(
            initial_hand_state, env.controller.last_event.metadata["agent"]
        )
        relative_distance = diff_position(relative_goal_obj, relative_hand_state)
        result = state_dict_to_tensor(dict(position=relative_distance))

        return result


class DistanceAgentArmToObjectSensor(Sensor):
    def __init__(self, uuid: str = "distance_agent_arm_to_obj", **kwargs: Any):
        observation_space = gym.spaces.Box(
            low=-100, high=100, shape=(3,), dtype=np.float32
        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(
        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
    ) -> Any:
        goal_obj_id = task.task_info["objectId"]
        object_info = env.get_object_by_id(goal_obj_id)
        hand_state = env.get_absolute_hand_state()

        relative_goal_obj = world_coords_to_agent_coords(
            object_info, env.controller.last_event.metadata["agent"]
        )
        relative_hand_state = world_coords_to_agent_coords(
            hand_state, env.controller.last_event.metadata["agent"]
        )
        relative_distance = diff_position(relative_goal_obj, relative_hand_state)
        result = state_dict_to_tensor(dict(position=relative_distance))

        result = ((result**2).sum() ** 0.5).view(1)
        return result


class PickedUpObjSensor(Sensor):
    def __init__(self, uuid: str = "pickedup_object", **kwargs: Any):
        observation_space = gym.spaces.Box(
            low=0, high=1, shape=(1,), dtype=np.float32
        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
        super().__init__(**prepare_locals_for_super(locals()))

    def get_observation(
        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
    ) -> Any:
        return task.object_picked_up


================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.py
================================================
"""Task Samplers for the task of ArmPointNav."""

import json
import random
from typing import List, Dict, Optional, Any, Union

import gym

from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import set_deterministic_cudnn, set_seed
from allenact_plugins.manipulathor_plugin.manipulathor_environment import (
    ManipulaTHOREnvironment,
)
from allenact_plugins.manipulathor_plugin.manipulathor_tasks import (
    AbstractPickUpDropOffTask,
    ArmPointNavTask,
    RotateArmPointNavTask,
    CamRotateArmPointNavTask,
    EasyArmPointNavTask,
)
from allenact_plugins.manipulathor_plugin.manipulathor_utils import (
    transport_wrapper,
    initialize_arm,
)
from allenact_plugins.manipulathor_plugin.manipulathor_viz import (
    ImageVisualizer,
    LoggerVisualizer,
)


class AbstractMidLevelArmTaskSampler(TaskSampler):

    _TASK_TYPE = Task

    def __init__(
        self,
        scenes: List[str],
        sensors: List[Sensor],
        max_steps: int,
        env_args: Dict[str, Any],
        action_space: gym.Space,
        rewards_config: Dict,
        objects: List[str],
        scene_period: Optional[Union[int, str]] = None,
        max_tasks: Optional[int] = None,
        num_task_per_scene: Optional[int] = None,
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        fixed_tasks: Optional[List[Dict[str, Any]]] = None,
        visualizers: Optional[List[LoggerVisualizer]] = None,
        *args,
        **kwargs
    ) -> None:
        self.rewards_config = rewards_config
        self.env_args = env_args
        self.scenes = scenes
        self.grid_size = 0.25
        self.env: Optional[ManipulaTHOREnvironment] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_space = action_space
        self.objects = objects
        self.num_task_per_scene = num_task_per_scene

        self.scene_counter: Optional[int] = None
        self.scene_order: Optional[List[str]] = None
        self.scene_id: Optional[int] = None
        self.scene_period: Optional[Union[str, int]] = (
            scene_period  # default makes a random choice
        )
        self.max_tasks: Optional[int] = None
        self.reset_tasks = max_tasks

        self._last_sampled_task: Optional[Task] = None

        self.seed: Optional[int] = None
        self.set_seed(seed)

        if deterministic_cudnn:
            set_deterministic_cudnn()

        self.reset()
        self.visualizers = visualizers if visualizers is not None else []
        self.sampler_mode = kwargs["sampler_mode"]
        self.cap_training = kwargs["cap_training"]

    def _create_environment(self, **kwargs) -> ManipulaTHOREnvironment:
        env = ManipulaTHOREnvironment(
            make_agents_visible=False,
            object_open_speed=0.05,
            env_args=self.env_args,
        )

        return env

    @property
    def last_sampled_task(self) -> Optional[Task]:
        return self._last_sampled_task

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    @property
    def all_observation_spaces_equal(self) -> bool:
        """Check if observation spaces equal.

        # Returns

        True if all Tasks that can be sampled by this sampler have the
            same observation space. Otherwise False.
        """
        return True

    def reset(self):
        self.scene_counter = 0
        self.scene_order = list(range(len(self.scenes)))
        random.shuffle(self.scene_order)
        self.scene_id = 0
        self.sampler_index = 0

        self.max_tasks = self.reset_tasks

    def set_seed(self, seed: int):
        self.seed = seed
        if seed is not None:
            set_seed(seed)


class SimpleArmPointNavGeneralSampler(AbstractMidLevelArmTaskSampler):

    _TASK_TYPE = AbstractPickUpDropOffTask

    def __init__(self, **kwargs) -> None:

        super(SimpleArmPointNavGeneralSampler, self).__init__(**kwargs)
        self.all_possible_points = []
        for scene in self.scenes:
            for object in self.objects:
                valid_position_adr = "datasets/apnd-dataset/valid_object_positions/valid_{}_positions_in_{}.json".format(
                    object, scene
                )
                try:
                    with open(valid_position_adr) as f:
                        data_points = json.load(f)
                except Exception:
                    print("Failed to load", valid_position_adr)
                    continue
                visible_data = [
                    data for data in data_points[scene] if data["visibility"]
                ]
                self.all_possible_points += visible_data

        self.countertop_object_to_data_id = self.calc_possible_trajectories(
            self.all_possible_points
        )

        scene_names = set(
            [
                self.all_possible_points[counter[0]]["scene_name"]
                for counter in self.countertop_object_to_data_id.values()
                if len(counter) > 1
            ]
        )

        if len(set(scene_names)) < len(self.scenes):
            print("Not all scenes appear")

        print(
            "Len dataset",
            len(self.all_possible_points),
            "total_remained",
            sum([len(v) for v in self.countertop_object_to_data_id.values()]),
        )

        if (
            self.sampler_mode != "train"
        ):  # Be aware that this totally overrides some stuff
            self.deterministic_data_list = []
            for scene in self.scenes:
                for object in self.objects:
                    valid_position_adr = "datasets/apnd-dataset/deterministic_tasks/tasks_{}_positions_in_{}.json".format(
                        object, scene
                    )
                    try:
                        with open(valid_position_adr) as f:
                            data_points = json.load(f)
                    except Exception:
                        print("Failed to load", valid_position_adr)
                        continue
                    visible_data = [
                        dict(scene=scene, index=i, datapoint=data)
                        for (i, data) in enumerate(data_points[scene])
                    ]
                    if self.num_task_per_scene is None:
                        self.deterministic_data_list += visible_data
                    else:  # select a small number of data points for fast evaluation
                        self.deterministic_data_list += visible_data[
                            : min(self.num_task_per_scene, len(visible_data))
                        ]

        if self.sampler_mode == "test":
            random.shuffle(self.deterministic_data_list)
            self.max_tasks = self.reset_tasks = len(self.deterministic_data_list)

    def next_task(
        self, force_advance_scene: bool = False
    ) -> Optional[AbstractPickUpDropOffTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.sampler_mode != "train" and self.length <= 0:
            return None

        source_data_point, target_data_point = self.get_source_target_indices()

        scene = source_data_point["scene_name"]

        assert source_data_point["object_id"] == target_data_point["object_id"]
        assert source_data_point["scene_name"] == target_data_point["scene_name"]

        if self.env is None:
            self.env = self._create_environment()

        self.env.reset(
            scene_name=scene, agentMode="arm", agentControllerType="mid-level"
        )

        initialize_arm(self.env.controller)

        source_location = source_data_point
        target_location = dict(
            position=target_data_point["object_location"],
            rotation={"x": 0, "y": 0, "z": 0},
        )

        task_info = {
            "objectId": source_location["object_id"],
            "countertop_id": source_location["countertop_id"],
            "source_location": source_location,
            "target_location": target_location,
        }

        this_controller = self.env

        transport_wrapper(
            this_controller,
            source_location["object_id"],
            source_location["object_location"],
        )
        agent_state = source_location["agent_pose"]

        this_controller.step(
            dict(
                action="TeleportFull",
                standing=True,
                x=agent_state["position"]["x"],
                y=agent_state["position"]["y"],
                z=agent_state["position"]["z"],
                rotation=dict(
                    x=agent_state["rotation"]["x"],
                    y=agent_state["rotation"]["y"],
                    z=agent_state["rotation"]["z"],
                ),
                horizon=agent_state["cameraHorizon"],
            )
        )

        should_visualize_goal_start = [
            x for x in self.visualizers if issubclass(type(x), ImageVisualizer)
        ]
        if len(should_visualize_goal_start) > 0:
            task_info["visualization_source"] = source_data_point
            task_info["visualization_target"] = target_data_point

        self._last_sampled_task = self._TASK_TYPE(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            visualizers=self.visualizers,
            reward_configs=self.rewards_config,
        )

        return self._last_sampled_task

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        if self.sampler_mode == "train":
            return None
        else:
            return min(self.max_tasks, len(self.deterministic_data_list))

    @property
    def length(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled. Can be float('inf').
        """
        return (
            self.total_unique - self.sampler_index
            if self.sampler_mode != "train"
            else (float("inf") if self.max_tasks is None else self.max_tasks)
        )

    def get_source_target_indices(self):
        if self.sampler_mode == "train":
            valid_countertops = [
                k for (k, v) in self.countertop_object_to_data_id.items() if len(v) > 1
            ]
            countertop_id = random.choice(valid_countertops)
            indices = random.sample(self.countertop_object_to_data_id[countertop_id], 2)
            result = (
                self.all_possible_points[indices[0]],
                self.all_possible_points[indices[1]],
            )
        else:
            result = self.deterministic_data_list[self.sampler_index]["datapoint"]
            self.sampler_index += 1

        return result

    def calc_possible_trajectories(self, all_possible_points):

        object_to_data_id = {}

        for i in range(len(all_possible_points)):
            object_id = all_possible_points[i]["object_id"]
            object_to_data_id.setdefault(object_id, [])
            object_to_data_id[object_id].append(i)

        return object_to_data_id


class ArmPointNavTaskSampler(SimpleArmPointNavGeneralSampler):
    _TASK_TYPE = ArmPointNavTask

    def __init__(self, **kwargs) -> None:

        super(ArmPointNavTaskSampler, self).__init__(**kwargs)
        possible_initial_locations = (
            "datasets/apnd-dataset/valid_agent_initial_locations.json"
        )
        if self.sampler_mode == "test":
            possible_initial_locations = (
                "datasets/apnd-dataset/deterministic_valid_agent_initial_locations.json"
            )
        with open(possible_initial_locations) as f:
            self.possible_agent_reachable_poses = json.load(f)

    def next_task(
        self, force_advance_scene: bool = False
    ) -> Optional[AbstractPickUpDropOffTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.sampler_mode != "train" and self.length <= 0:
            return None

        source_data_point, target_data_point = self.get_source_target_indices()

        scene = source_data_point["scene_name"]

        assert source_data_point["object_id"] == target_data_point["object_id"]
        assert source_data_point["scene_name"] == target_data_point["scene_name"]

        if self.env is None:
            self.env = self._create_environment()

        self.env.reset(
            scene_name=scene, agentMode="arm", agentControllerType="mid-level"
        )

        initialize_arm(self.env.controller)

        source_location = source_data_point
        target_location = dict(
            position=target_data_point["object_location"],
            rotation={"x": 0, "y": 0, "z": 0},
            countertop_id=target_data_point["countertop_id"],
        )

        this_controller = self.env

        transport_wrapper(
            this_controller,
            source_location["object_id"],
            source_location["object_location"],
        )

        agent_state = source_location[
            "initial_agent_pose"
        ]  # THe only line different from father

        this_controller.step(
            dict(
                action="TeleportFull",
                standing=True,
                x=agent_state["position"]["x"],
                y=agent_state["position"]["y"],
                z=agent_state["position"]["z"],
                rotation=dict(
                    x=agent_state["rotation"]["x"],
                    y=agent_state["rotation"]["y"],
                    z=agent_state["rotation"]["z"],
                ),
                horizon=agent_state["cameraHorizon"],
            )
        )

        should_visualize_goal_start = [
            x for x in self.visualizers if issubclass(type(x), ImageVisualizer)
        ]

        initial_object_info = self.env.get_object_by_id(source_location["object_id"])
        initial_agent_location = self.env.controller.last_event.metadata["agent"]
        initial_hand_state = self.env.get_absolute_hand_state()

        task_info = {
            "objectId": source_location["object_id"],
            "source_location": source_location,  # used in analysis
            "target_location": target_location,  # used in analysis
            "agent_initial_state": initial_agent_location,  # not used
            "initial_object_location": initial_object_info,  # not used
            "initial_hand_state": initial_hand_state,
        }

        if len(should_visualize_goal_start) > 0:
            task_info["visualization_source"] = source_data_point
            task_info["visualization_target"] = target_data_point

        self._last_sampled_task = self._TASK_TYPE(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            visualizers=self.visualizers,
            reward_configs=self.rewards_config,
        )

        return self._last_sampled_task

    def get_source_target_indices(self):
        if self.sampler_mode == "train":
            valid_countertops = [
                k for (k, v) in self.countertop_object_to_data_id.items() if len(v) > 1
            ]
            countertop_id = random.choice(valid_countertops)
            indices = random.sample(self.countertop_object_to_data_id[countertop_id], 2)
            result = (
                self.all_possible_points[indices[0]],
                self.all_possible_points[indices[1]],
            )
            scene_name = result[0]["scene_name"]
            selected_agent_init_loc = random.choice(
                self.possible_agent_reachable_poses[scene_name]
            )
            initial_agent_pose = {
                "name": "agent",
                "position": {
                    "x": selected_agent_init_loc["x"],
                    "y": selected_agent_init_loc["y"],
                    "z": selected_agent_init_loc["z"],
                },
                "rotation": {
                    "x": -0.0,
                    "y": selected_agent_init_loc["rotation"],
                    "z": 0.0,
                },
                "cameraHorizon": selected_agent_init_loc["horizon"],
                "isStanding": True,
            }
            result[0]["initial_agent_pose"] = initial_agent_pose
        else:  # agent init location needs to be fixed, therefore we load a fixed valid agent init that is previously randomized
            result = self.deterministic_data_list[self.sampler_index]["datapoint"]
            scene_name = self.deterministic_data_list[self.sampler_index]["scene"]
            datapoint_original_index = self.deterministic_data_list[self.sampler_index][
                "index"
            ]
            selected_agent_init_loc = self.possible_agent_reachable_poses[scene_name][
                datapoint_original_index
            ]
            initial_agent_pose = {
                "name": "agent",
                "position": {
                    "x": selected_agent_init_loc["x"],
                    "y": selected_agent_init_loc["y"],
                    "z": selected_agent_init_loc["z"],
                },
                "rotation": {
                    "x": -0.0,
                    "y": selected_agent_init_loc["rotation"],
                    "z": 0.0,
                },
                "cameraHorizon": selected_agent_init_loc["horizon"],
                "isStanding": True,
            }
            result[0]["initial_agent_pose"] = initial_agent_pose
            self.sampler_index += 1

        return result


class RotateArmPointNavTaskSampler(ArmPointNavTaskSampler):
    _TASK_TYPE = RotateArmPointNavTask


class CamRotateArmPointNavTaskSampler(ArmPointNavTaskSampler):
    _TASK_TYPE = CamRotateArmPointNavTask


class EasyArmPointNavTaskSampler(ArmPointNavTaskSampler):
    _TASK_TYPE = EasyArmPointNavTask


def get_all_tuples_from_list(list):
    result = []
    for first_ind in range(len(list) - 1):
        for second_ind in range(first_ind + 1, len(list)):
            result.append([list[first_ind], list[second_ind]])
    return result


================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_tasks.py
================================================
"""Task Definions for the task of ArmPointNav."""

import copy
from typing import Dict, Tuple, List, Any, Optional

import gym
import numpy as np

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact_plugins.manipulathor_plugin.armpointnav_constants import (
    MOVE_ARM_CONSTANT,
    DISTANCE_EPS,
)
from allenact_plugins.manipulathor_plugin.manipulathor_constants import (
    MOVE_ARM_HEIGHT_P,
    MOVE_ARM_HEIGHT_M,
    MOVE_ARM_X_P,
    MOVE_ARM_X_M,
    MOVE_ARM_Y_P,
    MOVE_ARM_Y_M,
    MOVE_ARM_Z_P,
    MOVE_ARM_Z_M,
    ROTATE_WRIST_PITCH_P,
    ROTATE_WRIST_PITCH_M,
    ROTATE_WRIST_YAW_P,
    ROTATE_WRIST_YAW_M,
    ROTATE_ELBOW_P,
    ROTATE_ELBOW_M,
    LOOK_UP,
    LOOK_DOWN,
    MOVE_AHEAD,
    ROTATE_RIGHT,
    ROTATE_LEFT,
    PICKUP,
    DONE,
)
from allenact_plugins.manipulathor_plugin.manipulathor_environment import (
    ManipulaTHOREnvironment,
    position_distance,
)
from allenact_plugins.manipulathor_plugin.manipulathor_viz import LoggerVisualizer


class AbstractPickUpDropOffTask(Task[ManipulaTHOREnvironment]):

    _actions = (
        MOVE_ARM_HEIGHT_P,
        MOVE_ARM_HEIGHT_M,
        MOVE_ARM_X_P,
        MOVE_ARM_X_M,
        MOVE_ARM_Y_P,
        MOVE_ARM_Y_M,
        MOVE_ARM_Z_P,
        MOVE_ARM_Z_M,
        MOVE_AHEAD,
        ROTATE_RIGHT,
        ROTATE_LEFT,
    )

    # New commit of AI2THOR has some issue that the objects will vibrate a bit
    # without any external force. To eliminate the vibration effect, we have to
    # introduce _vibration_dist_dict when checking the disturbance, from an external csv file.
    # By default it is None, i.e. we assume there is no vibration.

    _vibration_dist_dict: Optional[Dict] = None

    def __init__(
        self,
        env: ManipulaTHOREnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        visualizers: Optional[List[LoggerVisualizer]] = None,
        **kwargs
    ) -> None:
        """Initializer.

        See class documentation for parameter definitions.
        """
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self._took_end_action: bool = False
        self._success: Optional[bool] = False
        self._subsampled_locations_from_which_obj_visible: Optional[
            List[Tuple[float, float, int, int]]
        ] = None
        self.visualizers = visualizers if visualizers is not None else []
        self.start_visualize()
        self.action_sequence_and_success = []
        self._took_end_action: bool = False
        self._success: Optional[bool] = False
        self._subsampled_locations_from_which_obj_visible: Optional[
            List[Tuple[float, float, int, int]]
        ] = None

        # in allenact initialization is with 0.2
        self.last_obj_to_goal_distance = None
        self.last_arm_to_obj_distance = None
        self.object_picked_up = False
        self.got_reward_for_pickup = False
        self.reward_configs = kwargs["reward_configs"]
        self.initial_object_locations = self.env.get_current_object_locations()

    @property
    def action_space(self):
        return gym.spaces.Discrete(len(self._actions))

    def reached_terminal_state(self) -> bool:
        return self._took_end_action

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return cls._actions

    def close(self) -> None:
        self.env.stop()

    def obj_state_aproximity(self, s1, s2):
        # KIANA ignore rotation for now
        position1 = s1["position"]
        position2 = s2["position"]
        eps = MOVE_ARM_CONSTANT * 2
        return (
            abs(position1["x"] - position2["x"]) < eps
            and abs(position1["y"] - position2["y"]) < eps
            and abs(position1["z"] - position2["z"]) < eps
        )

    def start_visualize(self):
        for visualizer in self.visualizers:
            if not visualizer.is_empty():
                print("OH NO VISUALIZER WAS NOT EMPTY")
                visualizer.finish_episode(self.env, self, self.task_info)
                visualizer.finish_episode_metrics(self, self.task_info, None)
            visualizer.log(self.env)

    def visualize(self, action_str):

        for vizualizer in self.visualizers:
            vizualizer.log(self.env, action_str)

    def finish_visualizer(self):

        for visualizer in self.visualizers:
            visualizer.finish_episode(self.env, self, self.task_info)

    def finish_visualizer_metrics(self, metric_results):

        for visualizer in self.visualizers:
            visualizer.finish_episode_metrics(self, self.task_info, metric_results)

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        assert mode == "rgb", "only rgb rendering is implemented"
        return self.env.current_frame

    def calc_action_stat_metrics(self) -> Dict[str, Any]:
        action_stat = {"action_stat/" + action_str: 0.0 for action_str in self._actions}
        action_success_stat = {
            "action_success/" + action_str: 0.0 for action_str in self._actions
        }
        action_success_stat["action_success/total"] = 0.0

        seq_len = len(self.action_sequence_and_success)
        for action_name, action_success in self.action_sequence_and_success:
            action_stat["action_stat/" + action_name] += 1.0
            action_success_stat[
                "action_success/{}".format(action_name)
            ] += action_success
            action_success_stat["action_success/total"] += action_success

        action_success_stat["action_success/total"] /= seq_len

        for action_name in self._actions:
            action_success_stat["action_success/{}".format(action_name)] /= max(
                action_stat["action_stat/" + action_name], 1.0
            )
            action_stat["action_stat/" + action_name] /= seq_len

        result = {**action_stat, **action_success_stat}

        return result

    def metrics(self) -> Dict[str, Any]:
        result = super(AbstractPickUpDropOffTask, self).metrics()

        if self.is_done():
            result = {**result, **self.calc_action_stat_metrics()}

            # 1. goal object metrics
            final_obj_distance_from_goal = self.obj_distance_from_goal()
            result["average/final_obj_distance_from_goal"] = (
                final_obj_distance_from_goal
            )
            final_arm_distance_from_obj = self.arm_distance_from_obj()
            result["average/final_arm_distance_from_obj"] = final_arm_distance_from_obj

            final_obj_pickup = 1 if self.object_picked_up else 0
            result["average/final_obj_pickup"] = final_obj_pickup

            original_distance = self.get_original_object_distance() + DISTANCE_EPS
            result["average/original_distance"] = original_distance

            # this ratio can be more than 1
            if self.object_picked_up:
                ratio_distance_left = final_obj_distance_from_goal / original_distance
                result["average/ratio_distance_left"] = ratio_distance_left
                result["average/eplen_pickup"] = self.eplen_pickup

            # 2. disturbance with other objects
            current_object_locations = self.env.get_current_object_locations()
            objects_moved = self.env.get_objects_moved(
                self.initial_object_locations,
                current_object_locations,
                self.task_info["objectId"],
                self._vibration_dist_dict,
            )
            result["disturbance/objects_moved_num"] = len(objects_moved)

            # 3. conditioned on success
            if self._success:
                result["average/eplen_success"] = result["ep_length"]
                result["average/success_wo_disturb"] = len(objects_moved) == 0

            else:
                result["average/success_wo_disturb"] = 0.0

            result["success"] = self._success

            self.finish_visualizer_metrics(result)
            self.finish_visualizer()
            self.action_sequence_and_success = []

        return result

    def _step(self, action: int) -> RLStepResult:
        raise Exception("Not implemented")

    def arm_distance_from_obj(self):
        goal_obj_id = self.task_info["objectId"]
        object_info = self.env.get_object_by_id(goal_obj_id)
        hand_state = self.env.get_absolute_hand_state()
        return position_distance(object_info, hand_state)

    def obj_distance_from_goal(self):
        goal_obj_id = self.task_info["objectId"]
        object_info = self.env.get_object_by_id(goal_obj_id)
        goal_state = self.task_info["target_location"]
        return position_distance(object_info, goal_state)

    def get_original_object_distance(self):
        goal_obj_id = self.task_info["objectId"]
        s_init = dict(position=self.task_info["source_location"]["object_location"])
        current_location = self.env.get_object_by_id(goal_obj_id)

        original_object_distance = position_distance(s_init, current_location)
        return original_object_distance

    def judge(self) -> float:
        """Compute the reward after having taken a step."""
        raise Exception("Not implemented")


class ArmPointNavTask(AbstractPickUpDropOffTask):
    _actions = (
        MOVE_ARM_HEIGHT_P,
        MOVE_ARM_HEIGHT_M,
        MOVE_ARM_X_P,
        MOVE_ARM_X_M,
        MOVE_ARM_Y_P,
        MOVE_ARM_Y_M,
        MOVE_ARM_Z_P,
        MOVE_ARM_Z_M,
        MOVE_AHEAD,
        ROTATE_RIGHT,
        ROTATE_LEFT,
        PICKUP,
        DONE,
    )

    def __init__(
        self,
        env: ManipulaTHOREnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        visualizers: Optional[List[LoggerVisualizer]] = None,
        **kwargs
    ) -> None:
        super().__init__(
            env=env,
            sensors=sensors,
            task_info=task_info,
            max_steps=max_steps,
            visualizers=visualizers,
            **kwargs
        )
        self.cumulated_disturb_distance_all = 0.0
        self.cumulated_disturb_distance_visible = 0.0
        # NOTE: visible distance can be negative, no determinitic relation with
        #   all distance
        self.previous_object_locations = copy.deepcopy(self.initial_object_locations)
        self.current_penalized_distance = 0.0  # used in Sensor for auxiliary task

    def metrics(self) -> Dict[str, Any]:
        result = super(ArmPointNavTask, self).metrics()

        if self.is_done():
            # add disturbance distance metrics
            result["disturbance/objects_moved_distance"] = (
                self.cumulated_disturb_distance_all
            )
            result["disturbance/objects_moved_distance_vis"] = (
                self.cumulated_disturb_distance_visible
            )

        return result

    def visualize(self, **kwargs):

        for vizualizer in self.visualizers:
            vizualizer.log(self.env, **kwargs)

    def _step(self, action: int) -> RLStepResult:

        action_str = self.class_action_names()[action]

        self._last_action_str = action_str
        action_dict = {"action": action_str}
        object_id = self.task_info["objectId"]
        if action_str == PICKUP:
            action_dict = {**action_dict, "object_id": object_id}
        self.env.step(action_dict)
        self.last_action_success = self.env.last_action_success

        last_action_name = self._last_action_str
        last_action_success = float(self.last_action_success)
        self.action_sequence_and_success.append((last_action_name, last_action_success))

        # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward
        if not self.object_picked_up:

            if self.env.is_object_at_low_level_hand(object_id):
                self.object_picked_up = True
                self.eplen_pickup = (
                    self._num_steps_taken + 1
                )  # plus one because this step has not been counted yet

        if action_str == DONE:
            self._took_end_action = True
            object_state = self.env.get_object_by_id(object_id)
            goal_state = self.task_info["target_location"]
            goal_achieved = self.object_picked_up and self.obj_state_aproximity(
                object_state, goal_state
            )
            self.last_action_success = goal_achieved
            self._success = goal_achieved

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result

    def judge(self) -> float:
        """Compute the reward after having taken a step."""
        reward = self.reward_configs["step_penalty"]

        if not self.last_action_success or (
            self._last_action_str == PICKUP and not self.object_picked_up
        ):
            reward += self.reward_configs["failed_action_penalty"]

        if self._took_end_action:
            reward += (
                self.reward_configs["goal_success_reward"]
                if self._success
                else self.reward_configs["failed_stop_reward"]
            )

        # increase reward if object pickup and only do it once
        if not self.got_reward_for_pickup and self.object_picked_up:
            reward += self.reward_configs["pickup_success_reward"]
            self.got_reward_for_pickup = True

        current_obj_to_arm_distance = self.arm_distance_from_obj()
        if self.last_arm_to_obj_distance is None:
            delta_arm_to_obj_distance_reward = 0
        else:
            delta_arm_to_obj_distance_reward = (
                self.last_arm_to_obj_distance - current_obj_to_arm_distance
            )
        self.last_arm_to_obj_distance = current_obj_to_arm_distance
        reward += delta_arm_to_obj_distance_reward

        current_obj_to_goal_distance = self.obj_distance_from_goal()
        if self.last_obj_to_goal_distance is None:
            delta_obj_to_goal_distance_reward = 0
        else:
            delta_obj_to_goal_distance_reward = (
                self.last_obj_to_goal_distance - current_obj_to_goal_distance
            )
        self.last_obj_to_goal_distance = current_obj_to_goal_distance
        reward += delta_obj_to_goal_distance_reward

        # add disturbance cost
        ## here we measure disturbance by the sum of moving distance of all objects
        ## note that collided object may move for a while wo external force due to inertia
        ## and we may also consider mass
        current_object_locations = self.env.get_current_object_locations()

        disturb_distance_visible = self.env.get_objects_move_distance(
            initial_object_locations=self.initial_object_locations,
            previous_object_locations=self.previous_object_locations,
            current_object_locations=current_object_locations,
            target_object_id=self.task_info["objectId"],
            only_visible=True,
            thres_dict=self._vibration_dist_dict,
        )
        disturb_distance_all = self.env.get_objects_move_distance(
            initial_object_locations=self.initial_object_locations,
            previous_object_locations=self.previous_object_locations,
            current_object_locations=current_object_locations,
            target_object_id=self.task_info["objectId"],
            only_visible=False,
            thres_dict=self._vibration_dist_dict,
        )

        self.cumulated_disturb_distance_all += disturb_distance_all
        self.cumulated_disturb_distance_visible += disturb_distance_visible

        penalized_distance = (
            disturb_distance_visible
            if self.reward_configs["disturb_visible"]
            else disturb_distance_all
        )
        reward += self.reward_configs["disturb_penalty"] * penalized_distance
        self.current_penalized_distance = penalized_distance

        self.previous_object_locations = current_object_locations

        self.visualize(
            action_str=self._last_action_str,
            disturbance_str=str(round(penalized_distance, 4)),
        )

        return float(reward)


class RotateArmPointNavTask(ArmPointNavTask):
    _actions = (
        MOVE_ARM_HEIGHT_P,
        MOVE_ARM_HEIGHT_M,
        MOVE_ARM_X_P,
        MOVE_ARM_X_M,
        MOVE_ARM_Y_P,
        MOVE_ARM_Y_M,
        MOVE_ARM_Z_P,
        MOVE_ARM_Z_M,
        ROTATE_WRIST_PITCH_P,
        ROTATE_WRIST_PITCH_M,
        ROTATE_WRIST_YAW_P,
        ROTATE_WRIST_YAW_M,
        ROTATE_ELBOW_P,
        ROTATE_ELBOW_M,
        MOVE_AHEAD,
        ROTATE_RIGHT,
        ROTATE_LEFT,
        PICKUP,
        DONE,
    )


class CamRotateArmPointNavTask(ArmPointNavTask):
    _actions = (
        MOVE_ARM_HEIGHT_P,
        MOVE_ARM_HEIGHT_M,
        MOVE_ARM_X_P,
        MOVE_ARM_X_M,
        MOVE_ARM_Y_P,
        MOVE_ARM_Y_M,
        MOVE_ARM_Z_P,
        MOVE_ARM_Z_M,
        ROTATE_WRIST_PITCH_P,
        ROTATE_WRIST_PITCH_M,
        ROTATE_WRIST_YAW_P,
        ROTATE_WRIST_YAW_M,
        ROTATE_ELBOW_P,
        ROTATE_ELBOW_M,
        LOOK_UP,
        LOOK_DOWN,
        MOVE_AHEAD,
        ROTATE_RIGHT,
        ROTATE_LEFT,
        PICKUP,
        DONE,
    )


class EasyArmPointNavTask(ArmPointNavTask):
    _actions = (
        MOVE_ARM_HEIGHT_P,
        MOVE_ARM_HEIGHT_M,
        MOVE_ARM_X_P,
        MOVE_ARM_X_M,
        MOVE_ARM_Y_P,
        MOVE_ARM_Y_M,
        MOVE_ARM_Z_P,
        MOVE_ARM_Z_M,
        MOVE_AHEAD,
        ROTATE_RIGHT,
        ROTATE_LEFT,
        # PICKUP,
        # DONE,
    )

    def _step(self, action: int) -> RLStepResult:

        action_str = self.class_action_names()[action]

        self._last_action_str = action_str
        action_dict = {"action": action_str}
        object_id = self.task_info["objectId"]
        if action_str == PICKUP:
            action_dict = {**action_dict, "object_id": object_id}
        self.env.step(action_dict)
        self.last_action_success = self.env.last_action_success

        last_action_name = self._last_action_str
        last_action_success = float(self.last_action_success)
        self.action_sequence_and_success.append((last_action_name, last_action_success))
        self.visualize(last_action_name)

        # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward
        if not self.object_picked_up:
            if (
                object_id
                in self.env.controller.last_event.metadata["arm"]["pickupableObjects"]
            ):
                self.env.step(dict(action="PickupObject"))
                #  we are doing an additional pass here, label is not right and if we fail we will do it twice
                object_inventory = self.env.controller.last_event.metadata["arm"][
                    "heldObjects"
                ]
                if len(object_inventory) > 0 and object_id not in object_inventory:
                    self.env.step(dict(action="ReleaseObject"))

            if self.env.is_object_at_low_level_hand(object_id):
                self.object_picked_up = True
                self.eplen_pickup = (
                    self._num_steps_taken + 1
                )  # plus one because this step has not been counted yet

        if self.object_picked_up:

            object_state = self.env.get_object_by_id(object_id)
            goal_state = self.task_info["target_location"]
            goal_achieved = self.object_picked_up and self.obj_state_aproximity(
                object_state, goal_state
            )
            if goal_achieved:
                self._took_end_action = True
                self.last_action_success = goal_achieved
                self._success = goal_achieved

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result

    # def judge(self) -> float: Seems like we are fine on this


================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_utils.py
================================================
import ai2thor

from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.manipulathor_plugin.armpointnav_constants import (
    get_agent_start_positions,
)
from allenact_plugins.manipulathor_plugin.manipulathor_constants import (
    ADDITIONAL_ARM_ARGS,
)


def make_all_objects_unbreakable(controller):
    all_breakable_objects = [
        o["objectType"]
        for o in controller.last_event.metadata["objects"]
        if o["breakable"] is True
    ]
    all_breakable_objects = set(all_breakable_objects)
    for obj_type in all_breakable_objects:
        controller.step(action="MakeObjectsOfTypeUnbreakable", objectType=obj_type)


def reset_environment_and_additional_commands(controller, scene_name):
    controller.reset(scene_name)
    controller.step(action="MakeAllObjectsMoveable")
    controller.step(action="MakeObjectsStaticKinematicMassThreshold")
    make_all_objects_unbreakable(controller)
    return


def transport_wrapper(controller, target_object, target_location):
    transport_detail = dict(
        action="PlaceObjectAtPoint",
        objectId=target_object,
        position=target_location,
        forceKinematic=True,
    )
    advance_detail = dict(action="AdvancePhysicsStep", simSeconds=1.0)

    if issubclass(type(controller), IThorEnvironment):
        event = controller.step(transport_detail)
        controller.step(advance_detail)
    elif type(controller) == ai2thor.controller.Controller:
        event = controller.step(**transport_detail)
        controller.step(**advance_detail)
    else:
        raise NotImplementedError
    return event


def initialize_arm(controller):
    # for start arm from high up,
    scene = controller.last_event.metadata["sceneName"]
    initial_pose = get_agent_start_positions()[scene]
    event1 = controller.step(
        dict(
            action="TeleportFull",
            standing=True,
            x=initial_pose["x"],
            y=initial_pose["y"],
            z=initial_pose["z"],
            rotation=dict(x=0, y=initial_pose["rotation"], z=0),
            horizon=initial_pose["horizon"],
        )
    )
    event2 = controller.step(
        dict(action="MoveArm", position=dict(x=0.0, y=0, z=0.35), **ADDITIONAL_ARM_ARGS)
    )
    event3 = controller.step(dict(action="MoveArmBase", y=0.8, **ADDITIONAL_ARM_ARGS))
    return event1, event2, event3


================================================
FILE: allenact_plugins/manipulathor_plugin/manipulathor_viz.py
================================================
"""Utility functions and classes for visualization and logging."""

import os
from datetime import datetime

import cv2
import imageio
import matplotlib
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np

from allenact_plugins.manipulathor_plugin.manipulathor_utils import initialize_arm
from allenact_plugins.manipulathor_plugin.manipulathor_utils import (
    reset_environment_and_additional_commands,
    transport_wrapper,
)


class LoggerVisualizer:
    def __init__(self, exp_name="", log_dir=""):
        if log_dir == "":
            log_dir = self.__class__.__name__
        if exp_name == "":
            exp_name = "NoNameExp"
        self.exp_name = exp_name
        log_dir = os.path.join(
            exp_name,
            log_dir,
        )
        self.log_dir = log_dir
        os.makedirs(self.log_dir, exist_ok=True)
        self.log_queue = []
        self.action_queue = []
        self.logger_index = 0

    def log(self, environment, action_str):
        raise Exception("Not Implemented")

    def is_empty(self):
        return len(self.log_queue) == 0

    def finish_episode_metrics(self, episode_info, task_info, metric_results):
        pass

    def finish_episode(self, environment, episode_info, task_info):
        pass


class TestMetricLogger(LoggerVisualizer):
    def __init__(self, exp_name="", log_dir="", **kwargs):
        super().__init__(exp_name=exp_name, log_dir=log_dir)
        self.total_metric_dict = {}
        log_file_name = os.path.join(self.log_dir, "test_metric.txt")
        self.metric_log_file = open(log_file_name, "w")
        self.disturbance_distance_queue = []

    def average_dict(self):
        result = {}
        for k, v in self.total_metric_dict.items():
            result[k] = sum(v) / len(v)
        return result

    def finish_episode_metrics(self, episode_info, task_info, metric_results=None):

        if metric_results is None:
            print("had to reset")
            self.action_queue = []
            self.disturbance_distance_queue = []
            return

        for k in metric_results.keys():
            if "metric" in k or k in ["ep_length", "reward", "success"]:
                self.total_metric_dict.setdefault(k, [])
                self.total_metric_dict[k].append(metric_results[k])
        print(
            "total",
            len(self.total_metric_dict["success"]),
            "average test metric",
            self.average_dict(),
        )

        # save the task info and all the action queue and results
        log_dict = {
            "logger_number": self.logger_index,
            "action_sequence": self.action_queue,
            "disturbance_sequence": self.disturbance_distance_queue,
            "task_info_metrics": metric_results,
        }
        self.logger_index += 1
        self.metric_log_file.write(str(log_dict))
        self.metric_log_file.write("\n")
        self.metric_log_file.flush()
        print("Logging to", self.metric_log_file.name)

        self.action_queue = []
        self.disturbance_distance_queue = []

    def log(self, environment, action_str="", disturbance_str=""):
        # We can add agent arm and state location if needed
        self.action_queue.append(action_str)
        self.disturbance_distance_queue.append(disturbance_str)


class BringObjImageVisualizer(LoggerVisualizer):
    def finish_episode(self, environment, episode_info, task_info):
        now = datetime.now()
        time_to_write = now.strftime("%Y_%m_%d_%H_%M_%S_%f")
        time_to_write += "log_ind_{}".format(self.logger_index)
        self.logger_index += 1
        print("Loggigng", time_to_write, "len", len(self.log_queue))

        source_object_id = task_info["source_object_id"]
        goal_object_id = task_info["goal_object_id"]
        pickup_success = episode_info.object_picked_up
        episode_success = episode_info._success

        # Put back if you want the images
        # for i, img in enumerate(self.log_queue):
        #     image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i)))
        #     cv2.imwrite(image_dir, img[:,:,[2,1,0]])

        episode_success_offset = "succ" if episode_success else "fail"
        pickup_success_offset = "succ" if pickup_success else "fail"

        gif_name = (
            time_to_write
            + "_from_"
            + source_object_id.split("|")[0]
            + "_to_"
            + goal_object_id.split("|")[0]
            + "_pickup_"
            + pickup_success_offset
            + "_episode_"
            + episode_success_offset
            + ".gif"
        )
        concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1)
        save_image_list_to_gif(concat_all_images, gif_name, self.log_dir)
        this_controller = environment.controller
        scene = this_controller.last_event.metadata["sceneName"]
        reset_environment_and_additional_commands(this_controller, scene)
        self.log_start_goal(
            environment,
            task_info["visualization_source"],
            tag="start",
            img_adr=os.path.join(self.log_dir, time_to_write),
        )
        self.log_start_goal(
            environment,
            task_info["visualization_target"],
            tag="goal",
            img_adr=os.path.join(self.log_dir, time_to_write),
        )

        self.log_queue = []
        self.action_queue = []

    def log(self, environment, action_str):
        image_tensor = environment.current_frame
        self.action_queue.append(action_str)
        self.log_queue.append(image_tensor)

    def log_start_goal(self, env, task_info, tag, img_adr):
        object_location = task_info["object_location"]
        object_id = task_info["object_id"]
        agent_state = task_info["agent_pose"]
        this_controller = env.controller
        # We should not reset here
        # for start arm from high up as a cheating, this block is very important. never remove
        event1, event2, event3 = initialize_arm(this_controller)
        if not (
            event1.metadata["lastActionSuccess"]
            and event2.metadata["lastActionSuccess"]
            and event3.metadata["lastActionSuccess"]
        ):
            print("ERROR: ARM MOVEMENT FAILED in logging! SHOULD NEVER HAPPEN")

        event = transport_wrapper(this_controller, object_id, object_location)
        if not event.metadata["lastActionSuccess"]:
            print("ERROR: oh no could not transport in logging")

        event = this_controller.step(
            dict(
                action="TeleportFull",
                standing=True,
                x=agent_state["position"]["x"],
                y=agent_state["position"]["y"],
                z=agent_state["position"]["z"],
                rotation=dict(
                    x=agent_state["rotation"]["x"],
                    y=agent_state["rotation"]["y"],
                    z=agent_state["rotation"]["z"],
                ),
                horizon=agent_state["cameraHorizon"],
            )
        )
        if not event.metadata["lastActionSuccess"]:
            print("ERROR: oh no could not teleport in logging")

        image_tensor = this_controller.last_event.frame
        image_dir = (
            img_adr + "_obj_" + object_id.split("|")[0] + "_pickup_" + tag + ".png"
        )
        cv2.imwrite(image_dir, image_tensor[:, :, [2, 1, 0]])

        # Saving the mask
        target_object_id = task_info["object_id"]
        all_visible_masks = this_controller.last_event.instance_masks
        if target_object_id in all_visible_masks:
            mask_frame = all_visible_masks[target_object_id]
        else:
            mask_frame = np.zeros(env.controller.last_event.frame[:, :, 0].shape)
        mask_dir = (
            img_adr + "_obj_" + object_id.split("|")[0] + "_pickup_" + tag + "_mask.png"
        )
        cv2.imwrite(mask_dir, mask_frame.astype(float) * 255.0)


class ImageVisualizer(LoggerVisualizer):
    def __init__(
        self,
        exp_name="",
        log_dir="",
        add_top_down_view: bool = False,
        add_depth_map: bool = False,
    ):
        super().__init__(exp_name=exp_name, log_dir=log_dir)
        self.add_top_down_view = add_top_down_view
        self.add_depth_map = add_depth_map
        if self.add_top_down_view:
            self.top_down_queue = []
        self.disturbance_distance_queue = []

    def finish_episode(self, environment, episode_info, task_info):
        time_to_write = "log_ind_{:03d}".format(self.logger_index)
        self.logger_index += 1
        print("Logging", time_to_write, "len", len(self.log_queue))
        object_id = task_info["objectId"]
        scene_name = task_info["source_location"]["scene_name"]
        source_countertop = task_info["source_location"]["countertop_id"]
        target_countertop = task_info["target_location"]["countertop_id"]

        pickup_success = episode_info.object_picked_up
        episode_success = episode_info._success

        # Put back if you want the images
        # for i, img in enumerate(self.log_queue):
        #     image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i)))
        #     cv2.imwrite(image_dir, img[:,:,[2,1,0]])

        episode_success_offset = "succ" if episode_success else "fail"
        pickup_success_offset = "succ" if pickup_success else "fail"
        gif_name = (
            time_to_write
            + "_pickup_"
            + pickup_success_offset
            + "_episode_"
            + episode_success_offset
            + "_"
            + scene_name.split("_")[0]
            + "_obj_"
            + object_id.split("|")[0]
            + "_from_"
            + source_countertop.split("|")[0]
            + "_to_"
            + target_countertop.split("|")[0]
            + ".gif"
        )

        self.log_queue = put_annotation_on_image(
            self.log_queue, self.disturbance_distance_queue
        )

        concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1)
        if self.add_top_down_view:
            topdown_all_images = np.expand_dims(
                np.stack(self.top_down_queue, axis=0), axis=1
            )  # (T, 1, H, W, 3)
            concat_all_images = np.concatenate(
                [concat_all_images, topdown_all_images], axis=1
            )  # (T, 2, H, W, 3)

        save_image_list_to_gif(concat_all_images, gif_name, self.log_dir)

        self.log_start_goal(
            environment,
            task_info["visualization_source"],
            tag="start",
            img_adr=os.path.join(self.log_dir, time_to_write),
        )
        self.log_start_goal(
            environment,
            task_info["visualization_target"],
            tag="goal",
            img_adr=os.path.join(self.log_dir, time_to_write),
        )

        self.log_queue = []
        self.action_queue = []
        self.disturbance_distance_queue = []
        if self.add_top_down_view:
            self.top_down_queue = []

    def log(self, environment, action_str="", disturbance_str=""):
        self.action_queue.append(action_str)
        self.disturbance_distance_queue.append(disturbance_str)

        image_tensor = environment.current_frame
        self.log_queue.append(image_tensor)

        if self.add_top_down_view:
            # Reference: https://github.com/allenai/ai2thor/pull/814
            event = environment.controller.step(action="GetMapViewCameraProperties")
            event = environment.controller.step(
                action="AddThirdPartyCamera", **event.metadata["actionReturn"]
            )
            self.top_down_queue.append(event.third_party_camera_frames[0])

    def log_start_goal(self, env, task_info, tag, img_adr):
        object_location = task_info["object_location"]
        object_id = task_info["object_id"]
        agent_state = task_info["agent_pose"]
        this_controller = env.controller
        scene = this_controller.last_event.metadata[
            "sceneName"
        ]  # maybe we need to reset env actually]
        reset_environment_and_additional_commands(this_controller, scene)
        # for start arm from high up as a cheating, this block is very important. never remove
        event1, event2, event3 = initialize_arm(this_controller)
        if not (
            event1.metadata["lastActionSuccess"]
            and event2.metadata["lastActionSuccess"]
            and event3.metadata["lastActionSuccess"]
        ):
            print("ERROR: ARM MOVEMENT FAILED in logging! SHOULD NEVER HAPPEN")

        event = transport_wrapper(this_controller, object_id, object_location)
        if not event.metadata["lastActionSuccess"]:
            print("ERROR: oh no could not transport in logging")

        event = this_controller.step(
            dict(
                action="TeleportFull",
                standing=True,
                x=agent_state["position"]["x"],
                y=agent_state["position"]["y"],
                z=agent_state["position"]["z"],
                rotation=dict(
                    x=agent_state["rotation"]["x"],
                    y=agent_state["rotation"]["y"],
                    z=agent_state["rotation"]["z"],
                ),
                horizon=agent_state["cameraHorizon"],
            )
        )
        if not event.metadata["lastActionSuccess"]:
            print("ERROR: oh no could not teleport in logging")

        image_tensor = this_controller.last_event.frame
        image_dir = img_adr + "_" + tag + ".png"
        cv2.imwrite(image_dir, image_tensor[:, :, [2, 1, 0]])

        if self.add_depth_map:
            depth = this_controller.last_event.depth_frame.copy()  # (H, W)
            depth[depth > 5.0] = 5.0
            norm = matplotlib.colors.Normalize(vmin=depth.min(), vmax=depth.max())
            rgb = cm.get_cmap(plt.get_cmap("viridis"))(norm(depth))[:, :, :3]  # [0,1]
            rgb = (rgb * 255).astype(np.uint8)

            depth_dir = img_adr + "_" + tag + "_depth.png"
            cv2.imwrite(depth_dir, rgb[:, :, [2, 1, 0]])


def save_image_list_to_gif(image_list, gif_name, gif_dir):
    gif_adr = os.path.join(gif_dir, gif_name)

    seq_len, cols, w, h, c = image_list.shape

    pallet = np.zeros(
        (seq_len, w, h * cols, c)
    )  # to support multiple animations in one gif

    for col_ind in range(cols):
        pallet[:, :, col_ind * h : (col_ind + 1) * h, :] = image_list[:, col_ind]

    if not os.path.exists(gif_dir):
        os.makedirs(gif_dir)
    imageio.mimsave(gif_adr, pallet.astype(np.uint8), format="GIF", duration=1 / 5)
    print("Saved result in ", gif_adr)


def put_annotation_on_image(images, annotations):
    all_images = []
    for img, annot in zip(images, annotations):
        position = (10, 10)

        from PIL import Image, ImageDraw

        pil_img = Image.fromarray(img)
        draw = ImageDraw.Draw(pil_img)
        draw.text(position, annot, (0, 0, 0))
        all_images.append(np.array(pil_img))

    return all_images


================================================
FILE: allenact_plugins/minigrid_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker

with ImportChecker(
    "\n\nPlease install babyai with:\n\n"
    "pip install -e git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\n",
):
    import babyai


================================================
FILE: allenact_plugins/minigrid_plugin/configs/__init__.py
================================================


================================================
FILE: allenact_plugins/minigrid_plugin/configs/minigrid_nomemory.py
================================================
"""Experiment Config for MiniGrid tutorial."""

import gym
import torch.nn as nn

from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConv
from allenact_plugins.minigrid_plugin.minigrid_tasks import MiniGridTask
from projects.tutorials.minigrid_tutorial import MiniGridTutorialExperimentConfig


class MiniGridNoMemoryExperimentConfig(MiniGridTutorialExperimentConfig):
    @classmethod
    def tag(cls) -> str:
        return "MiniGridNoMemory"

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return MiniGridSimpleConv(
            action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())),
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            num_objects=cls.SENSORS[0].num_objects,
            num_colors=cls.SENSORS[0].num_colors,
            num_states=cls.SENSORS[0].num_states,
        )


================================================
FILE: allenact_plugins/minigrid_plugin/data/__init__.py
================================================


================================================
FILE: allenact_plugins/minigrid_plugin/extra_environment.yml
================================================
dependencies:
  - patsy>=0.5.1
  - pip
  - pip:
      - gym-minigrid>=1.0.1
      - pickle5


================================================
FILE: allenact_plugins/minigrid_plugin/extra_requirements.txt
================================================
patsy>=0.5.1
gym-minigrid>=1.0.1
pickle5


================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_environments.py
================================================
import copy
from typing import Optional, Set

import numpy as np
from gym import register
from gym_minigrid.envs import CrossingEnv
from gym_minigrid.minigrid import Lava, Wall


class FastCrossing(CrossingEnv):
    """Similar to `CrossingEnv`, but to support faster task sampling as per
    `repeat_failed_task_for_min_steps` flag in MiniGridTaskSampler."""

    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None):
        self.init_agent_pos: Optional[np.ndarray] = None
        self.init_agent_dir: Optional[int] = None
        self.step_count: Optional[int] = None
        super(FastCrossing, self).__init__(
            size=size,
            num_crossings=num_crossings,
            obstacle_type=obstacle_type,
            seed=seed,
        )

    def same_seed_reset(self):
        assert self.init_agent_pos is not None

        # Current position and direction of the agent
        self.agent_pos = self.init_agent_pos
        self.agent_dir = self.init_agent_dir

        # Check that the agent doesn't overlap with an object
        start_cell = self.grid.get(*self.agent_pos)
        assert start_cell is None or start_cell.can_overlap()

        assert self.carrying is None

        # Step count since episode start
        self.step_count = 0

        # Return first observation
        obs = self.gen_obs()
        return obs

    def reset(self, partial_reset: bool = False):
        super(FastCrossing, self).reset()
        self.init_agent_pos = copy.deepcopy(self.agent_pos)
        self.init_agent_dir = self.agent_dir


class AskForHelpSimpleCrossing(CrossingEnv):
    """Corresponds to WC FAULTY SWITCH environment."""

    def __init__(
        self,
        size=9,
        num_crossings=1,
        obstacle_type=Wall,
        seed=None,
        exploration_reward: Optional[float] = None,
        death_penalty: Optional[float] = None,
        toggle_is_permenant: bool = False,
    ):
        self.init_agent_pos: Optional[np.ndarray] = None
        self.init_agent_dir: Optional[int] = None
        self.should_reveal_image: bool = False
        self.exploration_reward = exploration_reward
        self.death_penalty = death_penalty

        self.explored_points: Set = set()
        self._was_successful = False
        self.toggle_is_permanent = toggle_is_permenant

        self.step_count: Optional[int] = None

        super(AskForHelpSimpleCrossing, self).__init__(
            size=size,
            num_crossings=num_crossings,
            obstacle_type=obstacle_type,
            seed=seed,
        )

    @property
    def was_successful(self) -> bool:
        return self._was_successful

    def gen_obs(self):
        obs = super(AskForHelpSimpleCrossing, self).gen_obs()
        if not self.should_reveal_image:
            obs["image"] *= 0
        return obs

    def metrics(self):
        return {
            "explored_count": len(self.explored_points),
            "final_distance": float(
                min(
                    abs(x - (self.width - 2)) + abs(y - (self.height - 2))
                    for x, y in self.explored_points
                )
            ),
        }

    def step(self, action: int):
        """Reveal the observation only if the `toggle` action is executed."""
        if action == self.actions.toggle:
            self.should_reveal_image = True
        else:
            self.should_reveal_image = (
                self.should_reveal_image and self.toggle_is_permanent
            )

        minigrid_obs, reward, done, info = super(AskForHelpSimpleCrossing, self).step(
            action=action
        )

        assert not self._was_successful, "Called step after done."
        self._was_successful = self._was_successful or (reward > 0)

        if (
            done
            and self.steps_remaining != 0
            and (not self._was_successful)
            and self.death_penalty is not None
        ):
            reward += self.death_penalty

        t = tuple(self.agent_pos)
        if self.exploration_reward is not None:
            if t not in self.explored_points:
                reward += self.exploration_reward
        self.explored_points.add(t)

        return minigrid_obs, reward, done, info

    def same_seed_reset(self):
        assert self.init_agent_pos is not None
        self._was_successful = False

        # Current position and direction of the agent
        self.agent_pos = self.init_agent_pos
        self.agent_dir = self.init_agent_dir

        self.explored_points.clear()
        self.explored_points.add(tuple(self.agent_pos))
        self.should_reveal_image = False

        # Check that the agent doesn't overlap with an object
        start_cell = self.grid.get(*self.agent_pos)
        assert start_cell is None or start_cell.can_overlap()

        assert self.carrying is None

        # Step count since episode start
        self.step_count = 0

        # Return first observation
        obs = self.gen_obs()
        return obs

    def reset(self, partial_reset: bool = False):
        super(AskForHelpSimpleCrossing, self).reset()
        self.explored_points.clear()
        self.explored_points.add(tuple(self.agent_pos))
        self.init_agent_pos = copy.deepcopy(self.agent_pos)
        self.init_agent_dir = self.agent_dir
        self._was_successful = False
        self.should_reveal_image = False


class LavaCrossingS25N10(CrossingEnv):
    def __init__(self):
        super(LavaCrossingS25N10, self).__init__(size=25, num_crossings=10)


class LavaCrossingS15N7(CrossingEnv):
    def __init__(self):
        super(LavaCrossingS15N7, self).__init__(size=15, num_crossings=7)


class LavaCrossingS11N7(CrossingEnv):
    def __init__(self):
        super(LavaCrossingS11N7, self).__init__(size=9, num_crossings=4)


register(
    id="MiniGrid-LavaCrossingS25N10-v0",
    entry_point="allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS25N10",
)

register(
    id="MiniGrid-LavaCrossingS15N7-v0",
    entry_point="allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS15N7",
)

register(
    id="MiniGrid-LavaCrossingS11N7-v0",
    entry_point="allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS11N7",
)


================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_models.py
================================================
import abc
from typing import Callable, Dict, Optional, Tuple, cast

import gym
import numpy as np
import torch
from gym.spaces.dict import Dict as SpaceDict
import torch.nn as nn

from allenact.algorithms.onpolicy_sync.policy import (
    ActorCriticModel,
    Memory,
    DistributionType,
    ActorCriticOutput,
    ObservationType,
)
from allenact.base_abstractions.distributions import Distr, CategoricalDistr
from allenact.embodiedai.models.basic_models import LinearActorCritic, RNNActorCritic
from allenact.utils.misc_utils import prepare_locals_for_super


class MiniGridSimpleConvBase(ActorCriticModel[Distr], abc.ABC):
    actor_critic: ActorCriticModel

    def __init__(
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        num_objects: int,
        num_colors: int,
        num_states: int,
        object_embedding_dim: int = 8,
        **kwargs,
    ):
        super().__init__(action_space=action_space, observation_space=observation_space)

        self.num_objects = num_objects
        self.object_embedding_dim = object_embedding_dim

        vis_input_shape = observation_space["minigrid_ego_image"].shape
        agent_view_x, agent_view_y, view_channels = vis_input_shape
        assert agent_view_x == agent_view_y
        self.agent_view = agent_view_x
        self.view_channels = view_channels

        assert (np.array(vis_input_shape[:2]) >= 3).all(), (
            "MiniGridSimpleConvRNN requires" "that the input size be at least 3x3."
        )

        self.num_channels = 0

        if self.num_objects > 0:
            # Object embedding
            self.object_embedding = nn.Embedding(
                num_embeddings=num_objects, embedding_dim=self.object_embedding_dim
            )
            self.object_channel = self.num_channels
            self.num_channels += 1

        self.num_colors = num_colors
        if self.num_colors > 0:
            # Same dimensionality used for colors and states
            self.color_embedding = nn.Embedding(
                num_embeddings=num_colors, embedding_dim=self.object_embedding_dim
            )
            self.color_channel = self.num_channels
            self.num_channels += 1

        self.num_states = num_states
        if self.num_states > 0:
            self.state_embedding = nn.Embedding(
                num_embeddings=num_states, embedding_dim=self.object_embedding_dim
            )
            self.state_channel = self.num_channels
            self.num_channels += 1

        assert self.num_channels == self.view_channels > 0

        self.ac_key = "enc"
        self.observations_for_ac: Dict[str, Optional[torch.Tensor]] = {
            self.ac_key: None
        }

        self.num_agents = 1

    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        minigrid_ego_image = cast(torch.Tensor, observations["minigrid_ego_image"])
        use_agent = minigrid_ego_image.shape == 6
        nrow, ncol, nchannels = minigrid_ego_image.shape[-3:]
        nsteps, nsamplers, nagents = masks.shape[:3]

        assert nrow == ncol == self.agent_view
        assert nchannels == self.view_channels == self.num_channels

        embed_list = []
        if self.num_objects > 0:
            ego_object_embeds = self.object_embedding(
                minigrid_ego_image[..., self.object_channel].long()
            )
            embed_list.append(ego_object_embeds)
        if self.num_colors > 0:
            ego_color_embeds = self.color_embedding(
                minigrid_ego_image[..., self.color_channel].long()
            )
            embed_list.append(ego_color_embeds)
        if self.num_states > 0:
            ego_state_embeds = self.state_embedding(
                minigrid_ego_image[..., self.state_channel].long()
            )
            embed_list.append(ego_state_embeds)
        ego_embeds = torch.cat(embed_list, dim=-1)

        if use_agent:
            self.observations_for_ac[self.ac_key] = ego_embeds.view(
                nsteps, nsamplers, nagents, -1
            )
        else:
            self.observations_for_ac[self.ac_key] = ego_embeds.view(
                nsteps, nsamplers * nagents, -1
            )

        # noinspection PyCallingNonCallable
        out, mem_return = self.actor_critic(
            observations=self.observations_for_ac,
            memory=memory,
            prev_actions=prev_actions,
            masks=masks,
        )

        self.observations_for_ac[self.ac_key] = None

        return out, mem_return


class MiniGridSimpleConvRNN(MiniGridSimpleConvBase):
    def __init__(
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        num_objects: int,
        num_colors: int,
        num_states: int,
        object_embedding_dim: int = 8,
        hidden_size=512,
        num_layers=1,
        rnn_type="GRU",
        head_type: Callable[
            ..., ActorCriticModel[CategoricalDistr]
        ] = LinearActorCritic,
        **kwargs,
    ):
        super().__init__(**prepare_locals_for_super(locals()))

        self._hidden_size = hidden_size
        agent_view_x, agent_view_y, view_channels = observation_space[
            "minigrid_ego_image"
        ].shape
        self.actor_critic = RNNActorCritic(
            input_uuid=self.ac_key,
            action_space=action_space,
            observation_space=SpaceDict(
                {
                    self.ac_key: gym.spaces.Box(
                        low=np.float32(-1.0),
                        high=np.float32(1.0),
                        shape=(
                            self.object_embedding_dim
                            * agent_view_x
                            * agent_view_y
                            * view_channels,
                        ),
                    )
                }
            ),
            hidden_size=hidden_size,
            num_layers=num_layers,
            rnn_type=rnn_type,
            head_type=head_type,
        )
        self.memory_key = "rnn"

        self.train()

    @property
    def num_recurrent_layers(self):
        return self.actor_critic.num_recurrent_layers

    @property
    def recurrent_hidden_state_size(self):
        return self._hidden_size

    def _recurrent_memory_specification(self):
        return {
            self.memory_key: (
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
        }


class MiniGridSimpleConv(MiniGridSimpleConvBase):
    def __init__(
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        num_objects: int,
        num_colors: int,
        num_states: int,
        object_embedding_dim: int = 8,
        **kwargs,
    ):
        super().__init__(**prepare_locals_for_super(locals()))

        agent_view_x, agent_view_y, view_channels = observation_space[
            "minigrid_ego_image"
        ].shape
        self.actor_critic = LinearActorCritic(
            self.ac_key,
            action_space=action_space,
            observation_space=SpaceDict(
                {
                    self.ac_key: gym.spaces.Box(
                        low=np.float32(-1.0),
                        high=np.float32(1.0),
                        shape=(
                            self.object_embedding_dim
                            * agent_view_x
                            * agent_view_y
                            * view_channels,
                        ),
                    )
                }
            ),
        )
        self.memory_key = None

        self.train()

    @property
    def num_recurrent_layers(self):
        return 0

    @property
    def recurrent_hidden_state_size(self):
        return 0

    # noinspection PyMethodMayBeStatic
    def _recurrent_memory_specification(self):
        return None


================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_offpolicy.py
================================================
import math
import queue
import random
from collections import defaultdict
from typing import Dict, Tuple, Any, cast, List, Union, Optional

import babyai
import blosc
import numpy as np
import pickle5 as pickle
import torch
from gym_minigrid.minigrid import MiniGridEnv

from allenact.algorithms.offpolicy_sync.losses.abstract_offpolicy_loss import Memory
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.algorithms.onpolicy_sync.storage import (
    ExperienceStorage,
    StreamingStorageMixin,
)
from allenact.base_abstractions.misc import GenericAbstractLoss, LossOutput, ModelType
from allenact.utils.misc_utils import partition_limits
from allenact.utils.system import get_logger
from allenact_plugins.minigrid_plugin.minigrid_sensors import MiniGridMissionSensor

_DATASET_CACHE: Dict[str, Any] = {}


class MiniGridOffPolicyExpertCELoss(GenericAbstractLoss):
    def __init__(self, total_episodes_in_epoch: Optional[int] = None):
        super().__init__()
        self.total_episodes_in_epoch = total_episodes_in_epoch

    def loss(  # type: ignore
        self,
        *,  # No positional arguments
        model: ModelType,
        batch: ObservationType,
        batch_memory: Memory,
        stream_memory: Memory,
    ) -> LossOutput:
        rollout_len, nrollouts = cast(torch.Tensor, batch["minigrid_ego_image"]).shape[
            :2
        ]

        # Initialize Memory if empty
        if len(stream_memory) == 0:
            spec = model.recurrent_memory_specification
            for key in spec:
                dims_template, dtype = spec[key]
                # get sampler_dim and all_dims from dims_template (and nrollouts)

                dim_names = [d[0] for d in dims_template]
                sampler_dim = dim_names.index("sampler")

                all_dims = [d[1] for d in dims_template]
                all_dims[sampler_dim] = nrollouts

                stream_memory.check_append(
                    key=key,
                    tensor=torch.zeros(
                        *all_dims,
                        dtype=dtype,
                        device=cast(torch.Tensor, batch["minigrid_ego_image"]).device,
                    ),
                    sampler_dim=sampler_dim,
                )

        # Forward data (through the actor and critic)
        ac_out, stream_memory = model.forward(
            observations=batch,
            memory=stream_memory,
            prev_actions=None,  # type:ignore
            masks=cast(torch.FloatTensor, batch["masks"]),
        )

        # Compute the loss from the actor's output and expert action
        expert_ce_loss = -ac_out.distributions.log_prob(batch["expert_action"]).mean()

        info = {"expert_ce": expert_ce_loss.item()}

        return LossOutput(
            value=expert_ce_loss,
            info=info,
            per_epoch_info={},
            batch_memory=batch_memory,
            stream_memory=stream_memory,
            bsize=rollout_len * nrollouts,
        )


def transform_demos(demos):
    # A modified version of babyai.utils.demos.transform_demos
    # where we use pickle 5 instead of standard pickle
    new_demos = []
    for demo in demos:
        new_demo = []

        mission = demo[0]
        all_images = demo[1]
        directions = demo[2]
        actions = demo[3]

        # First decompress the pickle
        pickled_array = blosc.blosc_extension.decompress(all_images, False)
        # ... and unpickle
        all_images = pickle.loads(pickled_array)

        n_observations = all_images.shape[0]
        assert (
            len(directions) == len(actions) == n_observations
        ), "error transforming demos"
        for i in range(n_observations):
            obs = {
                "image": all_images[i],
                "direction": directions[i],
                "mission": mission,
            }
            action = actions[i]
            done = i == n_observations - 1
            new_demo.append((obs, action, done))
        new_demos.append(new_demo)
    return new_demos


class MiniGridExpertTrajectoryStorage(ExperienceStorage, StreamingStorageMixin):
    def __init__(
        self,
        data_path: str,
        num_samplers: int,
        rollout_len: int,
        instr_len: Optional[int],
        restrict_max_steps_in_dataset: Optional[int] = None,
        device: torch.device = torch.device("cpu"),
    ):
        super(MiniGridExpertTrajectoryStorage, self).__init__()
        self.data_path = data_path
        self._data: Optional[
            List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]
        ] = None
        self.restrict_max_steps_in_dataset = restrict_max_steps_in_dataset

        self.original_num_samplers = num_samplers
        self.num_samplers = num_samplers

        self.rollout_len = rollout_len
        self.instr_len = instr_len

        self.current_worker = 0
        self.num_workers = 1

        self.minigrid_mission_sensor: Optional[MiniGridMissionSensor] = None
        if instr_len is not None:
            self.minigrid_mission_sensor = MiniGridMissionSensor(instr_len)

        self.rollout_queues = []
        self._remaining_inds = []
        self.sampler_to_num_steps_in_queue = []
        self._total_experiences = 0

        self.device = device

    @property
    def data(self) -> List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]:
        if self._data is None:
            if self.data_path not in _DATASET_CACHE:
                get_logger().info(
                    f"Loading minigrid dataset from {self.data_path} for first time..."
                )
                _DATASET_CACHE[self.data_path] = babyai.utils.load_demos(self.data_path)
                assert (
                    _DATASET_CACHE[self.data_path] is not None
                    and len(_DATASET_CACHE[self.data_path]) != 0
                )
                get_logger().info(
                    "Loading minigrid dataset complete, it contains {} trajectories".format(
                        len(_DATASET_CACHE[self.data_path])
                    )
                )
            self._data = _DATASET_CACHE[self.data_path]

            if self.restrict_max_steps_in_dataset is not None:
                restricted_data = []
                cur_len = 0
                for i, d in enumerate(self._data):
                    if cur_len >= self.restrict_max_steps_in_dataset:
                        break
                    restricted_data.append(d)
                    cur_len += len(d[2])
                self._data = restricted_data

            parts = partition_limits(len(self._data), self.num_workers)
            self._data = self._data[
                parts[self.current_worker] : parts[self.current_worker + 1]
            ]

            self.rollout_queues = [queue.Queue() for _ in range(self.num_samplers)]
            self.sampler_to_num_steps_in_queue = [0 for _ in range(self.num_samplers)]
            for it, q in enumerate(self.rollout_queues):
                self._fill_rollout_queue(q, it)

        return self._data

    def set_partition(self, index: int, num_parts: int):
        self.current_worker = index
        self.num_workers = num_parts

        self.num_samplers = int(math.ceil(self.original_num_samplers / num_parts))

        self._data = None

        for q in self.rollout_queues:
            try:
                while True:
                    q.get_nowait()
            except queue.Empty:
                pass
        self.rollout_queues = []

    def initialize(self, *, observations: ObservationType, **kwargs):
        self.reset_stream()
        assert len(self.data) != 0

    def add(
        self,
        observations: ObservationType,
        memory: Optional[Memory],
        actions: torch.Tensor,
        action_log_probs: torch.Tensor,
        value_preds: torch.Tensor,
        rewards: torch.Tensor,
        masks: torch.Tensor,
    ):
        pass

    def to(self, device: torch.device):
        self.device = device

    @property
    def total_experiences(self) -> int:
        return self._total_experiences

    def reset_stream(self):
        self.set_partition(index=self.current_worker, num_parts=self.num_workers)

    def empty(self) -> bool:
        return False

    def _get_next_ind(self):
        if len(self._remaining_inds) == 0:
            self._remaining_inds = list(range(len(self.data)))
            random.shuffle(self._remaining_inds)
        return self._remaining_inds.pop()

    def _fill_rollout_queue(self, q: queue.Queue, sampler: int):
        assert q.empty()

        while self.sampler_to_num_steps_in_queue[sampler] < self.rollout_len:
            next_ind = self._get_next_ind()

            for i, step in enumerate(transform_demos([self.data[next_ind]])[0]):
                q.put((*step, i == 0))
                self.sampler_to_num_steps_in_queue[sampler] += 1

        return True

    def get_data_for_rollout_ind(self, sampler_ind: int) -> Dict[str, np.ndarray]:
        masks: List[bool] = []
        minigrid_ego_image = []
        minigrid_mission = []
        expert_actions = []
        q = self.rollout_queues[sampler_ind]
        while len(masks) != self.rollout_len:
            if q.empty():
                assert self.sampler_to_num_steps_in_queue[sampler_ind] == 0
                self._fill_rollout_queue(q, sampler_ind)

            obs, expert_action, _, is_first_obs = cast(
                Tuple[
                    Dict[str, Union[np.array, int, str]],
                    MiniGridEnv.Actions,
                    bool,
                    bool,
                ],
                q.get_nowait(),
            )
            self.sampler_to_num_steps_in_queue[sampler_ind] -= 1

            masks.append(not is_first_obs)
            minigrid_ego_image.append(obs["image"])
            if self.minigrid_mission_sensor is not None:
                # noinspection PyTypeChecker
                minigrid_mission.append(
                    self.minigrid_mission_sensor.get_observation(
                        env=None, task=None, minigrid_output_obs=obs
                    )
                )
            expert_actions.append([expert_action])

        to_return = {
            "masks": torch.tensor(masks, device=self.device, dtype=torch.float32).view(
                self.rollout_len, 1  # steps x mask
            ),
            "minigrid_ego_image": torch.stack(
                [torch.tensor(img, device=self.device) for img in minigrid_ego_image],
                dim=0,
            ),  # steps x height x width x channels
            "expert_action": torch.tensor(
                expert_actions, device=self.device, dtype=torch.int64
            ).view(
                self.rollout_len  # steps
            ),
        }
        if self.minigrid_mission_sensor is not None:
            to_return["minigrid_mission"] = torch.stack(
                [torch.tensor(m, device=self.device) for m in minigrid_mission], dim=0
            )  # steps x mission_dims
        return to_return

    def next_batch(self) -> Dict[str, torch.Tensor]:
        all_data = defaultdict(lambda: [])
        for rollout_ind in range(self.num_samplers):
            data_for_ind = self.get_data_for_rollout_ind(sampler_ind=rollout_ind)
            for key in data_for_ind:
                all_data[key].append(data_for_ind[key])

        self._total_experiences += self.num_samplers * self.rollout_len
        return {
            key: torch.stack(
                all_data[key],
                dim=1,
            )  # new sampler dim
            for key in all_data
        }


================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_sensors.py
================================================
from typing import Optional, Any, cast

import gym
import gym_minigrid.minigrid
import numpy as np
import torch
from babyai.utils.format import InstructionsPreprocessor
from gym_minigrid.minigrid import MiniGridEnv

from allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super
from allenact.base_abstractions.task import Task, SubTaskType

# fmt: off
ALL_VOCAB_TOKENS = [
    "a", "after", "and", "ball", "behind", "blue", "box",
    "door", "front", "go", "green", "grey", "in", "key",
    "left", "next", "of", "on", "open", "pick", "purple",
    "put", "red", "right", "the", "then", "to", "up", "yellow",
    "you", "your",
]
# fmt: on


class EgocentricMiniGridSensor(Sensor[MiniGridEnv, Task[MiniGridEnv]]):
    def __init__(
        self,
        agent_view_size: int,
        view_channels: int = 1,
        uuid: str = "minigrid_ego_image",
        **kwargs: Any
    ):
        self.agent_view_size = agent_view_size
        self.view_channels = view_channels
        self.num_objects = (
            cast(
                int, max(map(abs, gym_minigrid.minigrid.OBJECT_TO_IDX.values()))  # type: ignore
            )
            + 1
        )
        self.num_colors = (
            cast(int, max(map(abs, gym_minigrid.minigrid.COLOR_TO_IDX.values())))  # type: ignore
            + 1
        )
        self.num_states = (
            cast(int, max(map(abs, gym_minigrid.minigrid.STATE_TO_IDX.values())))  # type: ignore
            + 1
        )

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self) -> gym.Space:
        return gym.spaces.Box(
            low=0,
            high=max(self.num_objects, self.num_colors, self.num_states) - 1,
            shape=(self.agent_view_size, self.agent_view_size, self.view_channels),
            dtype=int,
        )

    def get_observation(
        self,
        env: MiniGridEnv,
        task: Optional[SubTaskType],
        *args,
        minigrid_output_obs: Optional[np.ndarray] = None,
        **kwargs: Any
    ) -> Any:
        if minigrid_output_obs is not None and minigrid_output_obs["image"].shape == (
            self.agent_view_size,
            self.agent_view_size,
        ):
            img = minigrid_output_obs["image"][:, :, : self.view_channels]
        else:
            env.agent_view_size = self.agent_view_size
            img = env.gen_obs()["image"][:, :, : self.view_channels]

        assert img.dtype == np.uint8
        return img


class MiniGridMissionSensor(Sensor[MiniGridEnv, Task[MiniGridEnv]]):
    def __init__(self, instr_len: int, uuid: str = "minigrid_mission", **kwargs: Any):

        self.instr_preprocessor = InstructionsPreprocessor(
            model_name="TMP_SENSOR", load_vocab_from=None
        )

        # We initialize the vocabulary with a fixed collection of tokens
        # and then ensure that the size cannot exceed this number. This
        # guarantees that sensors on all processes will produce the same
        # values.
        for token in ALL_VOCAB_TOKENS:
            _ = self.instr_preprocessor.vocab[token]
        self.instr_preprocessor.vocab.max_size = len(ALL_VOCAB_TOKENS)

        self.instr_len = instr_len

        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self) -> gym.Space:
        return gym.spaces.Box(
            low=0,
            high=self.instr_preprocessor.vocab.max_size,
            shape=(self.instr_len,),
            dtype=int,
        )

    def get_observation(
        self,
        env: MiniGridEnv,
        task: Optional[SubTaskType],
        *args,
        minigrid_output_obs: Optional[np.ndarray] = None,
        **kwargs: Any
    ) -> Any:
        if minigrid_output_obs is None:
            minigrid_output_obs = env.gen_obs()

        out = self.instr_preprocessor([minigrid_output_obs]).view(-1)

        n: int = out.shape[0]
        if n > self.instr_len:
            out = out[: self.instr_len]
        elif n < self.instr_len:
            out = torch.nn.functional.pad(
                input=out,
                pad=[0, self.instr_len - n],
                value=0,
            )

        return out.long().numpy()


================================================
FILE: allenact_plugins/minigrid_plugin/minigrid_tasks.py
================================================
import random
from typing import Tuple, Any, List, Dict, Optional, Union, Callable, Sequence, cast

import gym
import networkx as nx
import numpy as np
from gym.utils import seeding
from gym_minigrid.envs import CrossingEnv
from gym_minigrid.minigrid import (
    DIR_TO_VEC,
    IDX_TO_OBJECT,
    MiniGridEnv,
    OBJECT_TO_IDX,
)

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor, SensorSuite
from allenact.base_abstractions.task import Task, TaskSampler
from allenact.utils.system import get_logger
from allenact_plugins.minigrid_plugin.minigrid_environments import (
    AskForHelpSimpleCrossing,
)


class MiniGridTask(Task[CrossingEnv]):
    _ACTION_NAMES: Tuple[str, ...] = ("left", "right", "forward")
    _ACTION_IND_TO_MINIGRID_IND = tuple(
        MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES
    )
    _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}
    _NEIGHBOR_OFFSETS = tuple(
        [
            (-1, 0, 0),
            (0, -1, 0),
            (0, 0, -1),
            (1, 0, 0),
            (0, 1, 0),
            (0, 0, 1),
        ]
    )

    _XY_DIFF_TO_AGENT_DIR = {
        tuple(vec): dir_ind for dir_ind, vec in enumerate(DIR_TO_VEC)
    }

    """ Task around a MiniGrid Env, allows interfacing allenact with
    MiniGrid tasks. (currently focussed towards LavaCrossing)
    """

    def __init__(
        self,
        env: Union[CrossingEnv],
        sensors: Union[SensorSuite, List[Sensor]],
        task_info: Dict[str, Any],
        max_steps: int,
        task_cache_uid: Optional[str] = None,
        corrupt_expert_within_actions_of_goal: Optional[int] = None,
        **kwargs,
    ):
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self._graph: Optional[nx.DiGraph] = None
        self._minigrid_done = False
        self._task_cache_uid = task_cache_uid
        self.corrupt_expert_within_actions_of_goal = (
            corrupt_expert_within_actions_of_goal
        )
        self.closest_agent_has_been_to_goal: Optional[float] = None

    @property
    def action_space(self) -> gym.spaces.Discrete:
        return gym.spaces.Discrete(len(self._ACTION_NAMES))

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        return self.env.render(mode=mode)

    def _step(self, action: int) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        minigrid_obs, reward, self._minigrid_done, info = self.env.step(
            action=self._ACTION_IND_TO_MINIGRID_IND[action]
        )

        # self.env.render()

        return RLStepResult(
            observation=self.get_observations(minigrid_output_obs=minigrid_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )

    def get_observations(
        self, *args, minigrid_output_obs: Optional[Dict[str, Any]] = None, **kwargs
    ) -> Any:
        return self.sensor_suite.get_observations(
            env=self.env, task=self, minigrid_output_obs=minigrid_output_obs
        )

    def reached_terminal_state(self) -> bool:
        return self._minigrid_done

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return cls._ACTION_NAMES

    def close(self) -> None:
        pass

    def metrics(self) -> Dict[str, Any]:
        # noinspection PyUnresolvedReferences,PyCallingNonCallable
        env_metrics = self.env.metrics() if hasattr(self.env, "metrics") else {}
        return {
            **super(MiniGridTask, self).metrics(),
            **{k: float(v) for k, v in env_metrics.items()},
            "success": int(
                self.env.was_successful
                if hasattr(self.env, "was_successful")
                else self.cumulative_reward > 0
            ),
        }

    @property
    def graph_created(self):
        return self._graph is not None

    @property
    def graph(self):
        if self._graph is None:
            if self._task_cache_uid is not None:
                if self._task_cache_uid not in self._CACHED_GRAPHS:
                    self._CACHED_GRAPHS[self._task_cache_uid] = self.generate_graph()
                self._graph = self._CACHED_GRAPHS[self._task_cache_uid]
            else:
                self._graph = self.generate_graph()
        return self._graph

    @graph.setter
    def graph(self, graph: nx.DiGraph):
        self._graph = graph

    @classmethod
    def possible_neighbor_offsets(cls) -> Tuple[Tuple[int, int, int], ...]:
        # Tuples of format:
        # (X translation, Y translation, rotation by 90 degrees)
        # A constant is returned, this function can be changed if anything
        # more complex needs to be done.

        # offsets_superset = itertools.product(
        #     [-1, 0, 1], [-1, 0, 1], [-1, 0, 1]
        # )
        #
        # valid_offsets = []
        # for off in offsets_superset:
        #     if (int(off[0] != 0) + int(off[1] != 0) + int(off[2] != 0)) == 1:
        #         valid_offsets.append(off)
        #
        # return tuple(valid_offsets)

        return cls._NEIGHBOR_OFFSETS

    @classmethod
    def _add_from_to_edge(
        cls,
        g: nx.DiGraph,
        s: Tuple[int, int, int],
        t: Tuple[int, int, int],
    ):
        """Adds nodes and corresponding edges to existing nodes.
        This approach avoids adding the same edge multiple times.
        Pre-requisite knowledge about MiniGrid:
        DIR_TO_VEC = [
            # Pointing right (positive X)
            np.array((1, 0)),
            # Down (positive Y)
            np.array((0, 1)),
            # Pointing left (negative X)
            np.array((-1, 0)),
            # Up (negative Y)
            np.array((0, -1)),
        ]
        or
        AGENT_DIR_TO_STR = {
            0: '>',
            1: 'V',
            2: '<',
            3: '^'
        }
        This also implies turning right (clockwise) means:
            agent_dir += 1
        """

        s_x, s_y, s_rot = s
        t_x, t_y, t_rot = t

        x_diff = t_x - s_x
        y_diff = t_y - s_y
        angle_diff = (t_rot - s_rot) % 4

        # If source and target differ by more than one action, continue
        if (x_diff != 0) + (y_diff != 0) + (angle_diff != 0) != 1 or angle_diff == 2:
            return

        action = None
        if angle_diff == 1:
            action = "right"
        elif angle_diff == 3:
            action = "left"
        elif cls._XY_DIFF_TO_AGENT_DIR[(x_diff, y_diff)] == s_rot:
            # if translation is the same direction as source
            # orientation, then it's a valid forward action
            action = "forward"
        else:
            # This is when the source and target aren't one action
            # apart, despite having dx=1 or dy=1
            pass

        if action is not None:
            g.add_edge(s, t, action=action)

    def _add_node_to_graph(
        self,
        graph: nx.DiGraph,
        s: Tuple[int, int, int],
        valid_node_types: Tuple[str, ...],
        attr_dict: Dict[Any, Any] = None,
        include_rotation_free_leaves: bool = False,
    ):
        if s in graph:
            return
        if attr_dict is None:
            get_logger().warning("adding a node with neighbor checks and no attributes")
        graph.add_node(s, **attr_dict)

        if include_rotation_free_leaves:
            rot_free_leaf = (*s[:-1], None)
            if rot_free_leaf not in graph:
                graph.add_node(rot_free_leaf)
            graph.add_edge(s, rot_free_leaf, action="NA")

        if attr_dict["type"] in valid_node_types:
            for o in self.possible_neighbor_offsets():
                t = (s[0] + o[0], s[1] + o[1], (s[2] + o[2]) % 4)
                if t in graph and graph.nodes[t]["type"] in valid_node_types:
                    self._add_from_to_edge(graph, s, t)
                    self._add_from_to_edge(graph, t, s)

    def generate_graph(
        self,
    ) -> nx.DiGraph:
        """The generated graph is based on the fully observable grid (as the
        expert sees it all).

        env: environment to generate the graph over
        """

        image = self.env.grid.encode()
        width, height, _ = image.shape
        graph = nx.DiGraph()

        # In fully observable grid, there shouldn't be any "unseen"
        # Currently dealing with "empty", "wall", "goal", "lava"

        valid_object_ids = np.sort(
            [OBJECT_TO_IDX[o] for o in ["empty", "wall", "lava", "goal"]]
        )

        assert np.all(np.union1d(image[:, :, 0], valid_object_ids) == valid_object_ids)

        # Grid to nodes
        for x in range(width):
            for y in range(height):
                for rotation in range(4):
                    type, color, state = image[x, y]
                    self._add_node_to_graph(
                        graph,
                        (x, y, rotation),
                        attr_dict={
                            "type": IDX_TO_OBJECT[type],
                            "color": color,
                            "state": state,
                        },
                        valid_node_types=("empty", "goal"),
                    )
                    if IDX_TO_OBJECT[type] == "goal":
                        if not graph.has_node("unified_goal"):
                            graph.add_node("unified_goal")
                        graph.add_edge((x, y, rotation), "unified_goal")

        return graph

    def query_expert(self, **kwargs) -> Tuple[int, bool]:
        if self._minigrid_done:
            get_logger().warning("Episode is completed, but expert is still queried.")
            return -1, False

        paths = []
        agent_x, agent_y = self.env.agent_pos
        agent_rot = self.env.agent_dir
        source_state_key = (agent_x, agent_y, agent_rot)
        assert source_state_key in self.graph

        paths.append(nx.shortest_path(self.graph, source_state_key, "unified_goal"))

        if len(paths) == 0:
            return -1, False

        shortest_path_ind = int(np.argmin([len(p) for p in paths]))

        if self.closest_agent_has_been_to_goal is None:
            self.closest_agent_has_been_to_goal = len(paths[shortest_path_ind]) - 1
        else:
            self.closest_agent_has_been_to_goal = min(
                len(paths[shortest_path_ind]) - 1, self.closest_agent_has_been_to_goal
            )

        if (
            self.corrupt_expert_within_actions_of_goal is not None
            and self.corrupt_expert_within_actions_of_goal
            >= self.closest_agent_has_been_to_goal
        ):
            return (
                int(self.env.np_random.randint(0, len(self.class_action_names()))),
                True,
            )

        if len(paths[shortest_path_ind]) == 2:
            # Since "unified_goal" is 1 step away from actual goals
            # if a path like [actual_goal, unified_goal] exists, then
            # you are already at a goal.
            get_logger().warning(
                "Shortest path computations suggest we are at"
                " the target but episode does not think so."
            )
            return -1, False

        next_key_on_shortest_path = paths[shortest_path_ind][1]
        return (
            self.class_action_names().index(
                self.graph.get_edge_data(source_state_key, next_key_on_shortest_path)[
                    "action"
                ]
            ),
            True,
        )


class AskForHelpSimpleCrossingTask(MiniGridTask):
    _ACTION_NAMES = ("left", "right", "forward", "toggle")
    _ACTION_IND_TO_MINIGRID_IND = tuple(
        MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES
    )
    _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}

    def __init__(
        self,
        env: AskForHelpSimpleCrossing,
        sensors: Union[SensorSuite, List[Sensor]],
        task_info: Dict[str, Any],
        max_steps: int,
        **kwargs,
    ):
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )

        self.did_toggle: List[bool] = []

    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        self.did_toggle.append(self._ACTION_NAMES[action] == "toggle")
        return super(AskForHelpSimpleCrossingTask, self)._step(action=action)

    def metrics(self) -> Dict[str, Any]:
        return {
            **super(AskForHelpSimpleCrossingTask, self).metrics(),
            "toggle_percent": float(
                sum(self.did_toggle) / max(len(self.did_toggle), 1)
            ),
        }


class MiniGridTaskSampler(TaskSampler):
    def __init__(
        self,
        env_class: Callable[..., Union[MiniGridEnv]],
        sensors: Union[SensorSuite, List[Sensor]],
        env_info: Optional[Dict[str, Any]] = None,
        max_tasks: Optional[int] = None,
        num_unique_seeds: Optional[int] = None,
        task_seeds_list: Optional[List[int]] = None,
        deterministic_sampling: bool = False,
        cache_graphs: Optional[bool] = False,
        task_class: Callable[..., MiniGridTask] = MiniGridTask,
        repeat_failed_task_for_min_steps: int = 0,
        extra_task_kwargs: Optional[Dict] = None,
        **kwargs,
    ):
        super(MiniGridTaskSampler, self).__init__()
        self.sensors = (
            SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors
        )
        self.max_tasks = max_tasks
        self.num_unique_seeds = num_unique_seeds
        self.cache_graphs = cache_graphs
        self.deterministic_sampling = deterministic_sampling
        self.repeat_failed_task_for_min_steps = repeat_failed_task_for_min_steps
        self.extra_task_kwargs = (
            extra_task_kwargs if extra_task_kwargs is not None else {}
        )

        self._last_env_seed: Optional[int] = None
        self._last_task: Optional[MiniGridTask] = None
        self._number_of_steps_taken_with_task_seed = 0

        assert (not deterministic_sampling) or repeat_failed_task_for_min_steps <= 0, (
            "If `deterministic_sampling` is True then we require"
            " `repeat_failed_task_for_min_steps <= 0`"
        )
        assert (not self.cache_graphs) or self.num_unique_seeds is not None, (
            "When caching graphs you must specify"
            " a number of unique tasks to sample from."
        )
        assert (self.num_unique_seeds is None) or (
            0 < self.num_unique_seeds
        ), "`num_unique_seeds` must be a positive integer."

        self.num_unique_seeds = num_unique_seeds
        self.task_seeds_list = task_seeds_list
        if self.task_seeds_list is not None:
            if self.num_unique_seeds is not None:
                assert self.num_unique_seeds == len(
                    self.task_seeds_list
                ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
            self.num_unique_seeds = len(self.task_seeds_list)
        elif self.num_unique_seeds is not None:
            self.task_seeds_list = list(range(self.num_unique_seeds))
        if num_unique_seeds is not None and repeat_failed_task_for_min_steps > 0:
            raise NotImplementedError(
                "`repeat_failed_task_for_min_steps` must be <=0 if number"
                " of unique seeds is not None."
            )

        assert (
            not self.cache_graphs
        ) or self.num_unique_seeds <= 1000, "Too many tasks (graphs) to cache"
        assert (not deterministic_sampling) or (
            self.num_unique_seeds is not None
        ), "Cannot use deterministic sampling when `num_unique_seeds` is `None`."

        if (not deterministic_sampling) and self.max_tasks:
            get_logger().warning(
                "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
                " this might be a mistake when running testing."
            )

        self.env = env_class(**env_info)
        self.task_class = task_class

        self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1))

        self.num_tasks_generated = 0

    @property
    def length(self) -> Union[int, float]:
        return (
            float("inf")
            if self.max_tasks is None
            else self.max_tasks - self.num_tasks_generated
        )

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        return None if self.num_unique_seeds is None else self.num_unique_seeds

    @property
    def last_sampled_task(self) -> Optional[Task]:
        raise NotImplementedError

    def next_task(self, force_advance_scene: bool = False) -> Optional[MiniGridTask]:
        if self.length <= 0:
            return None

        task_cache_uid = None
        repeating = False
        if self.num_unique_seeds is not None:
            if self.deterministic_sampling:
                self._last_env_seed = self.task_seeds_list[
                    self.num_tasks_generated % len(self.task_seeds_list)
                ]
            else:
                self._last_env_seed = self.np_seeded_random_gen.choice(
                    self.task_seeds_list
                )
        else:
            if self._last_task is not None:
                self._number_of_steps_taken_with_task_seed += (
                    self._last_task.num_steps_taken()
                )

            if (
                self._last_env_seed is not None
                and self._number_of_steps_taken_with_task_seed
                < self.repeat_failed_task_for_min_steps
                and self._last_task.cumulative_reward == 0
            ):
                repeating = True
            else:
                self._number_of_steps_taken_with_task_seed = 0
                self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)

        task_has_same_seed_reset = hasattr(self.env, "same_seed_reset")

        if self.cache_graphs:
            task_cache_uid = str(self._last_env_seed)

        if repeating and task_has_same_seed_reset:
            # noinspection PyUnresolvedReferences
            self.env.same_seed_reset()
        else:
            self.env.seed(self._last_env_seed)
            self.env.saved_seed = self._last_env_seed
            self.env.reset()

        self.num_tasks_generated += 1
        task = self.task_class(
            **dict(
                env=self.env,
                sensors=self.sensors,
                task_info={},
                max_steps=self.env.max_steps,
                task_cache_uid=task_cache_uid,
            ),
            **self.extra_task_kwargs,
        )

        if repeating and self._last_task.graph_created:
            task.graph = self._last_task.graph

        self._last_task = task
        return task

    def close(self) -> None:
        self.env.close()

    @property
    def all_observation_spaces_equal(self) -> bool:
        return True

    def reset(self) -> None:
        self.num_tasks_generated = 0
        self.env.reset()

    def set_seed(self, seed: int) -> None:
        self.np_seeded_random_gen, _ = seeding.np_random(seed)


================================================
FILE: allenact_plugins/minigrid_plugin/scripts/__init__.py
================================================


================================================
FILE: allenact_plugins/navigation_plugin/__init__.py
================================================


================================================
FILE: allenact_plugins/navigation_plugin/objectnav/__init__.py
================================================


================================================
FILE: allenact_plugins/navigation_plugin/objectnav/models.py
================================================
"""Baseline models for use in the object navigation task.

Object navigation is currently available as a Task in AI2-THOR and
Facebook's Habitat.
"""

from typing import Optional, List, Dict, cast, Tuple, Sequence

import gym
import torch
import torch.nn as nn
from gym.spaces import Dict as SpaceDict

from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.embodiedai.models import resnet as resnet
from allenact.embodiedai.models.basic_models import SimpleCNN
from allenact.embodiedai.models.visual_nav_models import (
    VisualNavActorCritic,
    FusionType,
)


class CatObservations(nn.Module):
    def __init__(self, ordered_uuids: Sequence[str], dim: int):
        super().__init__()
        assert len(ordered_uuids) != 0

        self.ordered_uuids = ordered_uuids
        self.dim = dim

    def forward(self, observations: ObservationType):
        if len(self.ordered_uuids) == 1:
            return observations[self.ordered_uuids[0]]
        return torch.cat(
            [observations[uuid] for uuid in self.ordered_uuids], dim=self.dim
        )


class ObjectNavActorCritic(VisualNavActorCritic):
    """Baseline recurrent actor critic model for object-navigation.

    # Attributes
    action_space : The space of actions available to the agent. Currently only discrete
        actions are allowed (so this space will always be of type `gym.spaces.Discrete`).
    observation_space : The observation space expected by the agent. This observation space
        should include (optionally) 'rgb' images and 'depth' images and is required to
        have a component corresponding to the goal `goal_sensor_uuid`.
    goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor`
        as an example of such a sensor.
    hidden_size : The hidden size of the GRU RNN.
    object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal
        object type.
    """

    def __init__(
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        goal_sensor_uuid: str,
        # RNN
        hidden_size=512,
        num_rnn_layers=1,
        rnn_type="GRU",
        add_prev_actions=False,
        add_prev_action_null_token=False,
        action_embed_size=6,
        # Aux loss
        multiple_beliefs=False,
        beliefs_fusion: Optional[FusionType] = None,
        auxiliary_uuids: Optional[Sequence[str]] = None,
        # below are custom params
        rgb_uuid: Optional[str] = None,
        depth_uuid: Optional[str] = None,
        object_type_embedding_dim=8,
        trainable_masked_hidden_state: bool = False,
        # perception backbone params,
        backbone="gnresnet18",
        resnet_baseplanes=32,
    ):
        """Initializer.

        See class documentation for parameter definitions.
        """
        super().__init__(
            action_space=action_space,
            observation_space=observation_space,
            hidden_size=hidden_size,
            multiple_beliefs=multiple_beliefs,
            beliefs_fusion=beliefs_fusion,
            auxiliary_uuids=auxiliary_uuids,
        )

        self.rgb_uuid = rgb_uuid
        self.depth_uuid = depth_uuid

        self.goal_sensor_uuid = goal_sensor_uuid
        self._n_object_types = self.observation_space.spaces[self.goal_sensor_uuid].n
        self.object_type_embedding_size = object_type_embedding_dim

        self.backbone = backbone
        if backbone == "simple_cnn":
            self.visual_encoder = SimpleCNN(
                observation_space=observation_space,
                output_size=hidden_size,
                rgb_uuid=rgb_uuid,
                depth_uuid=depth_uuid,
            )
            self.visual_encoder_output_size = hidden_size
            assert self.is_blind == self.visual_encoder.is_blind
        elif backbone == "gnresnet18":  # resnet family
            self.visual_encoder = resnet.GroupNormResNetEncoder(
                observation_space=observation_space,
                output_size=hidden_size,
                rgb_uuid=rgb_uuid,
                depth_uuid=depth_uuid,
                baseplanes=resnet_baseplanes,
                ngroups=resnet_baseplanes // 2,
                make_backbone=getattr(resnet, backbone),
            )
            self.visual_encoder_output_size = hidden_size
            assert self.is_blind == self.visual_encoder.is_blind
        elif backbone in ["identity", "projection"]:
            good_uuids = [
                uuid for uuid in [self.rgb_uuid, self.depth_uuid] if uuid is not None
            ]
            cat_model = CatObservations(
                ordered_uuids=good_uuids,
                dim=-1,
            )
            after_cat_size = sum(
                observation_space[uuid].shape[-1] for uuid in good_uuids
            )
            if backbone == "identity":
                self.visual_encoder = cat_model
                self.visual_encoder_output_size = after_cat_size
            else:
                self.visual_encoder = nn.Sequential(
                    cat_model, nn.Linear(after_cat_size, hidden_size), nn.ReLU(True)
                )
                self.visual_encoder_output_size = hidden_size

        else:
            raise NotImplementedError

        self.create_state_encoders(
            obs_embed_size=self.goal_visual_encoder_output_dims,
            num_rnn_layers=num_rnn_layers,
            rnn_type=rnn_type,
            add_prev_actions=add_prev_actions,
            add_prev_action_null_token=add_prev_action_null_token,
            prev_action_embed_size=action_embed_size,
            trainable_masked_hidden_state=trainable_masked_hidden_state,
        )

        self.create_actorcritic_head()

        self.create_aux_models(
            obs_embed_size=self.goal_visual_encoder_output_dims,
            action_embed_size=action_embed_size,
        )

        self.object_type_embedding = nn.Embedding(
            num_embeddings=self._n_object_types,
            embedding_dim=object_type_embedding_dim,
        )

        self.train()

    @property
    def is_blind(self) -> bool:
        """True if the model is blind (e.g. neither 'depth' or 'rgb' is an
        input observation type)."""
        return self.rgb_uuid is None and self.depth_uuid is None

    @property
    def goal_visual_encoder_output_dims(self):
        dims = self.object_type_embedding_size
        if self.is_blind:
            return dims
        return dims + self.visual_encoder_output_size

    def get_object_type_encoding(
        self, observations: Dict[str, torch.Tensor]
    ) -> torch.Tensor:
        """Get the object type encoding from input batched observations."""
        # noinspection PyTypeChecker
        return self.object_type_embedding(  # type:ignore
            observations[self.goal_sensor_uuid].to(torch.int64)
        )

    def forward_encoder(self, observations: ObservationType) -> torch.Tensor:
        target_encoding = self.get_object_type_encoding(
            cast(Dict[str, torch.Tensor], observations)
        )
        obs_embeds = [target_encoding]

        if not self.is_blind:
            perception_embed = self.visual_encoder(observations)
            obs_embeds = [perception_embed] + obs_embeds

        obs_embeds = torch.cat(obs_embeds, dim=-1)
        return obs_embeds


class ResnetTensorNavActorCritic(VisualNavActorCritic):
    def __init__(
        # base params
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        goal_sensor_uuid: str,
        hidden_size=512,
        num_rnn_layers=1,
        rnn_type="GRU",
        add_prev_actions=False,
        add_prev_action_null_token=False,
        action_embed_size=6,
        multiple_beliefs=False,
        beliefs_fusion: Optional[FusionType] = None,
        auxiliary_uuids: Optional[List[str]] = None,
        # custom params
        rgb_resnet_preprocessor_uuid: Optional[str] = None,
        depth_resnet_preprocessor_uuid: Optional[str] = None,
        goal_dims: int = 32,
        resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),
        combiner_hidden_out_dims: Tuple[int, int] = (128, 32),
        **kwargs,
    ):
        super().__init__(
            action_space=action_space,
            observation_space=observation_space,
            hidden_size=hidden_size,
            multiple_beliefs=multiple_beliefs,
            beliefs_fusion=beliefs_fusion,
            auxiliary_uuids=auxiliary_uuids,
            **kwargs,
        )

        if (
            rgb_resnet_preprocessor_uuid is None
            or depth_resnet_preprocessor_uuid is None
        ):
            resnet_preprocessor_uuid = (
                rgb_resnet_preprocessor_uuid
                if rgb_resnet_preprocessor_uuid is not None
                else depth_resnet_preprocessor_uuid
            )
            self.goal_visual_encoder = ResnetTensorGoalEncoder(
                self.observation_space,
                goal_sensor_uuid,
                resnet_preprocessor_uuid,
                goal_dims,
                resnet_compressor_hidden_out_dims,
                combiner_hidden_out_dims,
            )
        else:
            self.goal_visual_encoder = ResnetDualTensorGoalEncoder(  # type:ignore
                self.observation_space,
                goal_sensor_uuid,
                rgb_resnet_preprocessor_uuid,
                depth_resnet_preprocessor_uuid,
                goal_dims,
                resnet_compressor_hidden_out_dims,
                combiner_hidden_out_dims,
            )

        self.create_state_encoders(
            obs_embed_size=self.goal_visual_encoder.output_dims,
            num_rnn_layers=num_rnn_layers,
            rnn_type=rnn_type,
            add_prev_actions=add_prev_actions,
            add_prev_action_null_token=add_prev_action_null_token,
            prev_action_embed_size=action_embed_size,
        )

        self.create_actorcritic_head()

        self.create_aux_models(
            obs_embed_size=self.goal_visual_encoder.output_dims,
            action_embed_size=action_embed_size,
        )

        self.train()

    @property
    def is_blind(self) -> bool:
        """True if the model is blind (e.g. neither 'depth' or 'rgb' is an
        input observation type)."""
        return self.goal_visual_encoder.is_blind

    def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:
        return self.goal_visual_encoder(observations)


class ResnetTensorGoalEncoder(nn.Module):
    def __init__(
        self,
        observation_spaces: SpaceDict,
        goal_sensor_uuid: str,
        resnet_preprocessor_uuid: str,
        goal_embed_dims: int = 32,
        resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),
        combiner_hidden_out_dims: Tuple[int, int] = (128, 32),
    ) -> None:
        super().__init__()
        self.goal_uuid = goal_sensor_uuid
        self.resnet_uuid = resnet_preprocessor_uuid
        self.goal_embed_dims = goal_embed_dims
        self.resnet_hid_out_dims = resnet_compressor_hidden_out_dims
        self.combine_hid_out_dims = combiner_hidden_out_dims

        self.goal_space = observation_spaces.spaces[self.goal_uuid]
        if isinstance(self.goal_space, gym.spaces.Discrete):
            self.embed_goal = nn.Embedding(
                num_embeddings=self.goal_space.n,
                embedding_dim=self.goal_embed_dims,
            )
        elif isinstance(self.goal_space, gym.spaces.Box):
            self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims)
        else:
            raise NotImplementedError

        self.blind = self.resnet_uuid not in observation_spaces.spaces
        if not self.blind:
            self.resnet_tensor_shape = observation_spaces.spaces[self.resnet_uuid].shape
            self.resnet_compressor = nn.Sequential(
                nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1),
                nn.ReLU(),
                nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1),
                nn.ReLU(),
            )
            self.target_obs_combiner = nn.Sequential(
                nn.Conv2d(
                    self.resnet_hid_out_dims[1] + self.goal_embed_dims,
                    self.combine_hid_out_dims[0],
                    1,
                ),
                nn.ReLU(),
                nn.Conv2d(*self.combine_hid_out_dims[0:2], 1),
            )

    @property
    def is_blind(self):
        return self.blind

    @property
    def output_dims(self):
        if self.blind:
            return self.goal_embed_dims
        else:
            return (
                self.combine_hid_out_dims[-1]
                * self.resnet_tensor_shape[1]
                * self.resnet_tensor_shape[2]
            )

    def get_object_type_encoding(
        self, observations: Dict[str, torch.FloatTensor]
    ) -> torch.FloatTensor:
        """Get the object type encoding from input batched observations."""
        return cast(
            torch.FloatTensor,
            self.embed_goal(observations[self.goal_uuid].to(torch.int64)),
        )

    def compress_resnet(self, observations):
        return self.resnet_compressor(observations[self.resnet_uuid])

    def distribute_target(self, observations):
        target_emb = self.embed_goal(observations[self.goal_uuid])
        return target_emb.view(-1, self.goal_embed_dims, 1, 1).expand(
            -1, -1, self.resnet_tensor_shape[-2], self.resnet_tensor_shape[-1]
        )

    def adapt_input(self, observations):
        observations = {**observations}
        resnet = observations[self.resnet_uuid]
        goal = observations[self.goal_uuid]

        use_agent = False
        nagent = 1

        if len(resnet.shape) == 6:
            use_agent = True
            nstep, nsampler, nagent = resnet.shape[:3]
        else:
            nstep, nsampler = resnet.shape[:2]

        observations[self.resnet_uuid] = resnet.view(-1, *resnet.shape[-3:])
        observations[self.goal_uuid] = goal.view(-1, goal.shape[-1])

        return observations, use_agent, nstep, nsampler, nagent

    @staticmethod
    def adapt_output(x, use_agent, nstep, nsampler, nagent):
        if use_agent:
            return x.view(nstep, nsampler, nagent, -1)
        return x.view(nstep, nsampler * nagent, -1)

    def forward(self, observations):
        observations, use_agent, nstep, nsampler, nagent = self.adapt_input(
            observations
        )

        if self.blind:
            return self.embed_goal(observations[self.goal_uuid])
        embs = [
            self.compress_resnet(observations),
            self.distribute_target(observations),
        ]
        x = self.target_obs_combiner(
            torch.cat(
                embs,
                dim=1,
            )
        )
        x = x.reshape(x.size(0), -1)  # flatten

        return self.adapt_output(x, use_agent, nstep, nsampler, nagent)


class ResnetDualTensorGoalEncoder(nn.Module):
    def __init__(
        self,
        observation_spaces: SpaceDict,
        goal_sensor_uuid: str,
        rgb_resnet_preprocessor_uuid: str,
        depth_resnet_preprocessor_uuid: str,
        goal_embed_dims: int = 32,
        resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),
        combiner_hidden_out_dims: Tuple[int, int] = (128, 32),
    ) -> None:
        super().__init__()
        self.goal_uuid = goal_sensor_uuid
        self.rgb_resnet_uuid = rgb_resnet_preprocessor_uuid
        self.depth_resnet_uuid = depth_resnet_preprocessor_uuid
        self.goal_embed_dims = goal_embed_dims
        self.resnet_hid_out_dims = resnet_compressor_hidden_out_dims
        self.combine_hid_out_dims = combiner_hidden_out_dims

        self.goal_space = observation_spaces.spaces[self.goal_uuid]
        if isinstance(self.goal_space, gym.spaces.Discrete):
            self.embed_goal = nn.Embedding(
                num_embeddings=self.goal_space.n,
                embedding_dim=self.goal_embed_dims,
            )
        elif isinstance(self.goal_space, gym.spaces.Box):
            self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims)
        else:
            raise NotImplementedError

        self.blind = (
            self.rgb_resnet_uuid not in observation_spaces.spaces
            or self.depth_resnet_uuid not in observation_spaces.spaces
        )
        if not self.blind:
            self.resnet_tensor_shape = observation_spaces.spaces[
                self.rgb_resnet_uuid
            ].shape
            self.rgb_resnet_compressor = nn.Sequential(
                nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1),
                nn.ReLU(),
                nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1),
                nn.ReLU(),
            )
            self.depth_resnet_compressor = nn.Sequential(
                nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1),
                nn.ReLU(),
                nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1),
                nn.ReLU(),
            )
            self.rgb_target_obs_combiner = nn.Sequential(
                nn.Conv2d(
                    self.resnet_hid_out_dims[1] + self.goal_embed_dims,
                    self.combine_hid_out_dims[0],
                    1,
                ),
                nn.ReLU(),
                nn.Conv2d(*self.combine_hid_out_dims[0:2], 1),
            )
            self.depth_target_obs_combiner = nn.Sequential(
                nn.Conv2d(
                    self.resnet_hid_out_dims[1] + self.goal_embed_dims,
                    self.combine_hid_out_dims[0],
                    1,
                ),
                nn.ReLU(),
                nn.Conv2d(*self.combine_hid_out_dims[0:2], 1),
            )

    @property
    def is_blind(self):
        return self.blind

    @property
    def output_dims(self):
        if self.blind:
            return self.goal_embed_dims
        else:
            return (
                2
                * self.combine_hid_out_dims[-1]
                * self.resnet_tensor_shape[1]
                * self.resnet_tensor_shape[2]
            )

    def get_object_type_encoding(
        self, observations: Dict[str, torch.FloatTensor]
    ) -> torch.FloatTensor:
        """Get the object type encoding from input batched observations."""
        return cast(
            torch.FloatTensor,
            self.embed_goal(observations[self.goal_uuid].to(torch.int64)),
        )

    def compress_rgb_resnet(self, observations):
        return self.rgb_resnet_compressor(observations[self.rgb_resnet_uuid])

    def compress_depth_resnet(self, observations):
        return self.depth_resnet_compressor(observations[self.depth_resnet_uuid])

    def distribute_target(self, observations):
        target_emb = self.embed_goal(observations[self.goal_uuid])
        return target_emb.view(-1, self.goal_embed_dims, 1, 1).expand(
            -1, -1, self.resnet_tensor_shape[-2], self.resnet_tensor_shape[-1]
        )

    def adapt_input(self, observations):
        rgb = observations[self.rgb_resnet_uuid]
        depth = observations[self.depth_resnet_uuid]

        use_agent = False
        nagent = 1

        if len(rgb.shape) == 6:
            use_agent = True
            nstep, nsampler, nagent = rgb.shape[:3]
        else:
            nstep, nsampler = rgb.shape[:2]

        observations[self.rgb_resnet_uuid] = rgb.view(-1, *rgb.shape[-3:])
        observations[self.depth_resnet_uuid] = depth.view(-1, *depth.shape[-3:])
        observations[self.goal_uuid] = observations[self.goal_uuid].view(-1, 1)

        return observations, use_agent, nstep, nsampler, nagent

    @staticmethod
    def adapt_output(x, use_agent, nstep, nsampler, nagent):
        if use_agent:
            return x.view(nstep, nsampler, nagent, -1)
        return x.view(nstep, nsampler * nagent, -1)

    def forward(self, observations):
        observations, use_agent, nstep, nsampler, nagent = self.adapt_input(
            observations
        )

        if self.blind:
            return self.embed_goal(observations[self.goal_uuid])
        rgb_embs = [
            self.compress_rgb_resnet(observations),
            self.distribute_target(observations),
        ]
        rgb_x = self.rgb_target_obs_combiner(
            torch.cat(
                rgb_embs,
                dim=1,
            )
        )
        depth_embs = [
            self.compress_depth_resnet(observations),
            self.distribute_target(observations),
        ]
        depth_x = self.depth_target_obs_combiner(
            torch.cat(
                depth_embs,
                dim=1,
            )
        )
        x = torch.cat([rgb_x, depth_x], dim=1)
        x = x.reshape(x.shape[0], -1)  # flatten

        return self.adapt_output(x, use_agent, nstep, nsampler, nagent)


================================================
FILE: allenact_plugins/navigation_plugin/pointnav/__init__.py
================================================


================================================
FILE: allenact_plugins/navigation_plugin/pointnav/models.py
================================================
"""Baseline models for use in the point navigation task.

Object navigation is currently available as a Task in AI2-THOR and
Facebook's Habitat.
"""

from typing import Optional, List, Union, Sequence

import gym
import torch
import torch.nn as nn
from gym.spaces import Dict as SpaceDict

from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.embodiedai.models import resnet as resnet
from allenact.embodiedai.models.basic_models import SimpleCNN
from allenact.embodiedai.models.visual_nav_models import (
    VisualNavActorCritic,
    FusionType,
)


class PointNavActorCritic(VisualNavActorCritic):
    """Use raw image as observation to the agent."""

    def __init__(
        # base params
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        goal_sensor_uuid: str,
        hidden_size=512,
        num_rnn_layers=1,
        rnn_type="GRU",
        add_prev_actions=False,
        add_prev_action_null_token=False,
        action_embed_size=4,
        multiple_beliefs=False,
        beliefs_fusion: Optional[FusionType] = None,
        auxiliary_uuids: Optional[Sequence[str]] = None,
        # custom params
        rgb_uuid: Optional[str] = None,
        depth_uuid: Optional[str] = None,
        embed_coordinates=False,
        coordinate_embedding_dim=8,
        coordinate_dims=2,
        # perception backbone params,
        backbone="gnresnet18",
        resnet_baseplanes=32,
    ):
        super().__init__(
            action_space=action_space,
            observation_space=observation_space,
            hidden_size=hidden_size,
            multiple_beliefs=multiple_beliefs,
            beliefs_fusion=beliefs_fusion,
            auxiliary_uuids=auxiliary_uuids,
        )

        self.goal_sensor_uuid = goal_sensor_uuid
        self.embed_coordinates = embed_coordinates
        if self.embed_coordinates:
            self.coordinate_embedding_size = coordinate_embedding_dim
        else:
            self.coordinate_embedding_size = coordinate_dims

        self.sensor_fusion = False
        if rgb_uuid is not None and depth_uuid is not None:
            self.sensor_fuser = nn.Linear(hidden_size * 2, hidden_size)
            self.sensor_fusion = True

        self.backbone = backbone
        if backbone == "simple_cnn":
            self.visual_encoder = SimpleCNN(
                observation_space=observation_space,
                output_size=hidden_size,
                rgb_uuid=rgb_uuid,
                depth_uuid=depth_uuid,
            )
        else:  # resnet family
            self.visual_encoder = resnet.GroupNormResNetEncoder(
                observation_space=observation_space,
                output_size=hidden_size,
                rgb_uuid=rgb_uuid,
                depth_uuid=depth_uuid,
                baseplanes=resnet_baseplanes,
                ngroups=resnet_baseplanes // 2,
                make_backbone=getattr(resnet, backbone),
            )

        if self.embed_coordinates:
            self.coordinate_embedding = nn.Linear(
                coordinate_dims, coordinate_embedding_dim
            )

        self.create_state_encoders(
            obs_embed_size=self.goal_visual_encoder_output_dims,
            num_rnn_layers=num_rnn_layers,
            rnn_type=rnn_type,
            add_prev_actions=add_prev_actions,
            add_prev_action_null_token=add_prev_action_null_token,
            prev_action_embed_size=action_embed_size,
        )

        self.create_actorcritic_head()

        self.create_aux_models(
            obs_embed_size=self.goal_visual_encoder_output_dims,
            action_embed_size=action_embed_size,
        )

        self.train()

    @property
    def is_blind(self):
        return self.visual_encoder.is_blind

    @property
    def goal_visual_encoder_output_dims(self):
        dims = self.coordinate_embedding_size
        if self.is_blind:
            return dims
        return dims + self.recurrent_hidden_state_size

    def get_target_coordinates_encoding(self, observations):
        if self.embed_coordinates:
            return self.coordinate_embedding(
                observations[self.goal_sensor_uuid].to(torch.float32)
            )
        else:
            return observations[self.goal_sensor_uuid].to(torch.float32)

    def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:
        target_encoding = self.get_target_coordinates_encoding(observations)
        obs_embeds: Union[torch.Tensor, List[torch.Tensor]]
        obs_embeds = [target_encoding]

        if not self.is_blind:
            perception_embed = self.visual_encoder(observations)
            if self.sensor_fusion:
                perception_embed = self.sensor_fuser(perception_embed)
            obs_embeds = [perception_embed] + obs_embeds

        obs_embeds = torch.cat(obs_embeds, dim=-1)
        return obs_embeds


================================================
FILE: allenact_plugins/robothor_plugin/__init__.py
================================================
from allenact.utils.system import ImportChecker

with ImportChecker(
    "Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`)."
):
    # noinspection PyUnresolvedReferences
    import ai2thor


================================================
FILE: allenact_plugins/robothor_plugin/configs/__init__.py
================================================


================================================
FILE: allenact_plugins/robothor_plugin/extra_environment.yml
================================================
channels:
  - defaults
  - conda-forge
dependencies:
  - ai2thor>=2.5.3
  - numba
  - pip
  - colour
  - packaging
  - pip:
      - numpy-quaternion
      - pyquaternion>=0.9.9
      - python-xlib


================================================
FILE: allenact_plugins/robothor_plugin/extra_requirements.txt
================================================
ai2thor>=2.5.3
numpy-quaternion
pyquaternion>=0.9.9
colour
numba
packaging
python-xlib


================================================
FILE: allenact_plugins/robothor_plugin/robothor_constants.py
================================================
MOVE_AHEAD = "MoveAhead"
ROTATE_LEFT = "RotateLeft"
ROTATE_RIGHT = "RotateRight"
LOOK_DOWN = "LookDown"
LOOK_UP = "LookUp"
END = "End"
PASS = "Pass"


================================================
FILE: allenact_plugins/robothor_plugin/robothor_distributions.py
================================================
from typing import Tuple

import torch

from allenact.base_abstractions.distributions import CategoricalDistr, Distr


class TupleCategoricalDistr(Distr):
    def __init__(self, probs=None, logits=None, validate_args=None):
        self.dists = CategoricalDistr(
            probs=probs, logits=logits, validate_args=validate_args
        )

    def log_prob(self, actions: Tuple[torch.LongTensor, ...]) -> torch.FloatTensor:
        # flattened output [steps, samplers, num_agents]
        return self.dists.log_prob(torch.stack(actions, dim=-1))

    def entropy(self) -> torch.FloatTensor:
        # flattened output [steps, samplers, num_agents]
        return self.dists.entropy()

    def sample(self, sample_shape=torch.Size()) -> Tuple[torch.LongTensor, ...]:
        # split and remove trailing singleton dim
        res = self.dists.sample(sample_shape).split(1, dim=-1)
        return tuple([r.view(r.shape[:2]) for r in res])

    def mode(self) -> Tuple[torch.LongTensor, ...]:
        # split and remove trailing singleton dim
        res = self.dists.mode().split(1, dim=-1)
        return tuple([r.view(r.shape[:2]) for r in res])


================================================
FILE: allenact_plugins/robothor_plugin/robothor_environment.py
================================================
import copy
import math
import random
import warnings
from typing import Any, Optional, Dict, List, Union, Tuple, Collection

import ai2thor.server
import numpy as np
from ai2thor.controller import Controller
from ai2thor.fifo_server import FifoServer
from ai2thor.util import metrics

from allenact.utils.cache_utils import DynamicDistanceCache
from allenact.utils.experiment_utils import recursive_update
from allenact.utils.system import get_logger


class RoboThorEnvironment:
    """Wrapper for the robo2thor controller providing additional functionality
    and bookkeeping.

    See [here](https://ai2thor.allenai.org/robothor/documentation) for comprehensive
     documentation on RoboTHOR.

    # Attributes

    controller : The AI2-THOR controller.
    config : The AI2-THOR controller configuration
    """

    def __init__(self, all_metadata_available: bool = True, **kwargs):
        self.config = dict(
            rotateStepDegrees=30.0,
            visibilityDistance=1.0,
            gridSize=0.25,
            continuousMode=True,
            snapToGrid=False,
            agentMode="locobot",
            width=640,
            height=480,
            agentCount=1,
            server_class=FifoServer,
        )

        if "agentCount" in kwargs:
            assert kwargs["agentCount"] > 0

        kwargs["agentMode"] = kwargs.get("agentMode", "locobot")
        if kwargs["agentMode"] not in ["bot", "locobot"]:
            warnings.warn(
                f"The RoboTHOR environment has not been tested using"
                f" an agent of mode '{kwargs['agentMode']}'."
            )

        recursive_update(self.config, kwargs)
        self.controller = Controller(
            **self.config,
        )

        self.all_metadata_available = all_metadata_available

        self.scene_to_reachable_positions: Optional[Dict[str, Any]] = None
        self.distance_cache: Optional[DynamicDistanceCache] = None

        if self.all_metadata_available:
            self.scene_to_reachable_positions = {
                self.scene_name: copy.deepcopy(self.currently_reachable_points)
            }
            assert len(self.scene_to_reachable_positions[self.scene_name]) > 10

            self.distance_cache = DynamicDistanceCache(rounding=1)

        self.agent_count = self.config["agentCount"]

        self._extra_teleport_kwargs: Dict[str, Any] = (
            {}
        )  # Used for backwards compatability with the teleport action

    def initialize_grid_dimensions(
        self, reachable_points: Collection[Dict[str, float]]
    ) -> Tuple[int, int, int, int]:
        """Computes bounding box for reachable points quantized with the
        current gridSize."""
        points = {
            (
                round(p["x"] / self.config["gridSize"]),
                round(p["z"] / self.config["gridSize"]),
            ): p
            for p in reachable_points
        }

        assert len(reachable_points) == len(points)

        xmin, xmax = min([p[0] for p in points]), max([p[0] for p in points])
        zmin, zmax = min([p[1] for p in points]), max([p[1] for p in points])

        return xmin, xmax, zmin, zmax

    def set_object_filter(self, object_ids: List[str]):
        self.controller.step("SetObjectFilter", objectIds=object_ids, renderImage=False)

    def reset_object_filter(self):
        self.controller.step("ResetObjectFilter", renderImage=False)

    def path_from_point_to_object_type(
        self, point: Dict[str, float], object_type: str, allowed_error: float
    ) -> Optional[List[Dict[str, float]]]:
        event = self.controller.step(
            action="GetShortestPath",
            objectType=object_type,
            position=point,
            allowedError=allowed_error,
        )
        if event.metadata["lastActionSuccess"]:
            return event.metadata["actionReturn"]["corners"]
        else:
            get_logger().debug(
                "Failed to find path for {} in {}. Start point {}, agent state {}.".format(
                    object_type,
                    self.controller.last_event.metadata["sceneName"],
                    point,
                    self.agent_state(),
                )
            )
            return None

    def distance_from_point_to_object_type(
        self, point: Dict[str, float], object_type: str, allowed_error: float
    ) -> float:
        """Minimal geodesic distance from a point to an object of the given
        type.

        It might return -1.0 for unreachable targets.
        """
        path = self.path_from_point_to_object_type(point, object_type, allowed_error)
        if path:
            # Because `allowed_error != 0` means that the path returned above might not start
            # at `point`, we explicitly add any offset there is.
            s_dist = math.sqrt(
                (point["x"] - path[0]["x"]) ** 2 + (point["z"] - path[0]["z"]) ** 2
            )
            return metrics.path_distance(path) + s_dist
        return -1.0

    def distance_to_object_type(self, object_type: str, agent_id: int = 0) -> float:
        """Minimal geodesic distance to object of given type from agent's
        current location.

        It might return -1.0 for unreachable targets.
        """
        assert 0 <= agent_id < self.agent_count
        assert (
            self.all_metadata_available
        ), "`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`."

        def retry_dist(position: Dict[str, float], object_type: str):
            allowed_error = 0.05
            debug_log = ""
            d = -1.0
            while allowed_error < 2.5:
                d = self.distance_from_point_to_object_type(
                    position, object_type, allowed_error
                )
                if d < 0:
                    debug_log = (
                        f"In scene {self.scene_name}, could not find a path from {position} to {object_type} with"
                        f" {allowed_error} error tolerance. Increasing this tolerance to"
                        f" {2 * allowed_error} any trying again."
                    )
                    allowed_error *= 2
                else:
                    break
            if d < 0:
                get_logger().debug(
                    f"In scene {self.scene_name}, could not find a path from {position} to {object_type}"
                    f" with {allowed_error} error tolerance. Returning a distance of -1."
                )
            elif debug_log != "":
                get_logger().debug(debug_log)
            return d

        return self.distance_cache.find_distance(
            self.scene_name,
            self.controller.last_event.events[agent_id].metadata["agent"]["position"],
            object_type,
            retry_dist,
        )

    def path_from_point_to_point(
        self, position: Dict[str, float], target: Dict[str, float], allowedError: float
    ) -> Optional[List[Dict[str, float]]]:
        try:
            return self.controller.step(
                action="GetShortestPathToPoint",
                position=position,
                target=target,
                allowedError=allowedError,
            ).metadata["actionReturn"]["corners"]
        except ValueError:
            raise
        except Exception:
            get_logger().debug(
                "Failed to find path for {} in {}. Start point {}, agent state {}.".format(
                    target,
                    self.controller.last_event.metadata["sceneName"],
                    position,
                    self.agent_state(),
                )
            )
            return None

    def distance_from_point_to_point(
        self, position: Dict[str, float], target: Dict[str, float], allowed_error: float
    ) -> float:
        path = self.path_from_point_to_point(position, target, allowed_error)
        if path:
            # Because `allowed_error != 0` means that the path returned above might not start
            # or end exactly at the position/target points, we explictly add any offset there is.
            s_dist = math.sqrt(
                (position["x"] - path[0]["x"]) ** 2
                + (position["z"] - path[0]["z"]) ** 2
            )
            t_dist = math.sqrt(
                (target["x"] - path[-1]["x"]) ** 2 + (target["z"] - path[-1]["z"]) ** 2
            )
            return metrics.path_distance(path) + s_dist + t_dist
        return -1.0

    def distance_to_point(self, target: Dict[str, float], agent_id: int = 0) -> float:
        """Minimal geodesic distance to end point from agent's current
        location.

        It might return -1.0 for unreachable targets.
        """
        assert 0 <= agent_id < self.agent_count
        assert (
            self.all_metadata_available
        ), "`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`."

        def retry_dist(position: Dict[str, float], target: Dict[str, float]):
            allowed_error = 0.05
            debug_log = ""
            d = -1.0
            while allowed_error < 2.5:
                d = self.distance_from_point_to_point(position, target, allowed_error)
                if d < 0:
                    debug_log = (
                        f"In scene {self.scene_name}, could not find a path from {position} to {target} with"
                        f" {allowed_error} error tolerance. Increasing this tolerance to"
                        f" {2 * allowed_error} any trying again."
                    )
                    allowed_error *= 2
                else:
                    break
            if d < 0:
                get_logger().debug(
                    f"In scene {self.scene_name}, could not find a path from {position} to {target}"
                    f" with {allowed_error} error tolerance. Returning a distance of -1."
                )
            elif debug_log != "":
                get_logger().debug(debug_log)
            return d

        return self.distance_cache.find_distance(
            self.scene_name,
            self.controller.last_event.events[agent_id].metadata["agent"]["position"],
            target,
            retry_dist,
        )

    def agent_state(self, agent_id: int = 0) -> Dict:
        """Return agent position, rotation and horizon."""
        assert 0 <= agent_id < self.agent_count

        agent_meta = self.last_event.events[agent_id].metadata["agent"]
        return {
            **{k: float(v) for k, v in agent_meta["position"].items()},
            "rotation": {k: float(v) for k, v in agent_meta["rotation"].items()},
            "horizon": round(float(agent_meta["cameraHorizon"]), 1),
        }

    def teleport(
        self,
        pose: Dict[str, float],
        rotation: Dict[str, float],
        horizon: float = 0.0,
        agent_id: int = 0,
    ):
        assert 0 <= agent_id < self.agent_count
        try:
            e = self.controller.step(
                action="TeleportFull",
                x=pose["x"],
                y=pose["y"],
                z=pose["z"],
                rotation=rotation,
                horizon=horizon,
                agentId=agent_id,
                **self._extra_teleport_kwargs,
            )
        except ValueError as e:
            if len(self._extra_teleport_kwargs) == 0:
                self._extra_teleport_kwargs["standing"] = True
            else:
                raise e
            return self.teleport(
                pose=pose, rotation=rotation, horizon=horizon, agent_id=agent_id
            )
        return e.metadata["lastActionSuccess"]

    def reset(
        self, scene_name: str = None, filtered_objects: Optional[List[str]] = None
    ) -> None:
        """Resets scene to a known initial state."""
        if scene_name is not None and scene_name != self.scene_name:
            self.controller.reset(scene_name)
            assert self.last_action_success, "Could not reset to new scene"

            if (
                self.all_metadata_available
                and scene_name not in self.scene_to_reachable_positions
            ):
                self.scene_to_reachable_positions[scene_name] = copy.deepcopy(
                    self.currently_reachable_points
                )
                assert len(self.scene_to_reachable_positions[scene_name]) > 10
        if filtered_objects:
            self.set_object_filter(filtered_objects)
        else:
            self.reset_object_filter()

    def random_reachable_state(
        self, seed: Optional[int] = None
    ) -> Dict[str, Union[Dict[str, float], float]]:
        """Returns a random reachable location in the scene."""
        assert (
            self.all_metadata_available
        ), "`random_reachable_state` cannot be called when `self.all_metadata_available` is `False`."

        if seed is not None:
            random.seed(seed)
        # xyz = random.choice(self.currently_reachable_points)
        assert len(self.scene_to_reachable_positions[self.scene_name]) > 10
        xyz = copy.deepcopy(
            random.choice(self.scene_to_reachable_positions[self.scene_name])
        )
        rotation = random.choice(
            np.arange(0.0, 360.0, self.config["rotateStepDegrees"])
        )
        horizon = 0.0  # random.choice([0.0, 30.0, 330.0])
        return {
            **{k: float(v) for k, v in xyz.items()},
            "rotation": {"x": 0.0, "y": float(rotation), "z": 0.0},
            "horizon": float(horizon),
        }

    def randomize_agent_location(
        self,
        seed: int = None,
        partial_position: Optional[Dict[str, float]] = None,
        agent_id: int = 0,
    ) -> Dict[str, Union[Dict[str, float], float]]:
        """Teleports the agent to a random reachable location in the scene."""
        assert 0 <= agent_id < self.agent_count

        if partial_position is None:
            partial_position = {}
        k = 0
        state: Optional[Dict] = None

        while k == 0 or (not self.last_action_success and k < 10):
            # self.reset()
            state = {**self.random_reachable_state(seed=seed), **partial_position}
            # get_logger().debug("picked target location {}".format(state))
            self.controller.step("TeleportFull", **state, agentId=agent_id)
            k += 1

        if not self.last_action_success:
            get_logger().warning(
                (
                    "Randomize agent location in scene {} and current random state {}"
                    " with seed {} and partial position {} failed in "
                    "10 attempts. Forcing the action."
                ).format(self.scene_name, state, seed, partial_position)
            )
            self.controller.step("TeleportFull", **state, force_action=True, agentId=agent_id)  # type: ignore
            assert self.last_action_success, "Force action failed with {}".format(state)

        # get_logger().debug("location after teleport full {}".format(self.agent_state()))
        # self.controller.step("TeleportFull", **self.agent_state())  # TODO only for debug
        # get_logger().debug("location after re-teleport full {}".format(self.agent_state()))

        return self.agent_state(agent_id=agent_id)

    def known_good_locations_list(self):
        assert (
            self.all_metadata_available
        ), "`known_good_locations_list` cannot be called when `self.all_metadata_available` is `False`."
        return self.scene_to_reachable_positions[self.scene_name]

    @property
    def currently_reachable_points(self) -> List[Dict[str, float]]:
        """List of {"x": x, "y": y, "z": z} locations in the scene that are
        currently reachable."""
        self.controller.step(action="GetReachablePositions")
        assert (
            self.last_action_success
        ), f"Could not get reachable positions for reason {self.last_event.metadata['errorMessage']}."
        return self.last_action_return

    @property
    def scene_name(self) -> str:
        """Current ai2thor scene."""
        return self.controller.last_event.metadata["sceneName"].replace("_physics", "")

    @property
    def current_frame(self) -> np.ndarray:
        """Returns rgb image corresponding to the agent's egocentric view."""
        return self.controller.last_event.frame

    @property
    def current_depth(self) -> np.ndarray:
        """Returns depth image corresponding to the agent's egocentric view."""
        return self.controller.last_event.depth_frame

    @property
    def current_frames(self) -> List[np.ndarray]:
        """Returns rgb images corresponding to the agents' egocentric views."""
        return [
            self.controller.last_event.events[agent_id].frame
            for agent_id in range(self.agent_count)
        ]

    @property
    def current_depths(self) -> List[np.ndarray]:
        """Returns depth images corresponding to the agents' egocentric
        views."""
        return [
            self.controller.last_event.events[agent_id].depth_frame
            for agent_id in range(self.agent_count)
        ]

    @property
    def last_event(self) -> ai2thor.server.Event:
        """Last event returned by the controller."""
        return self.controller.last_event

    @property
    def last_action(self) -> str:
        """Last action, as a string, taken by the agent."""
        return self.controller.last_event.metadata["lastAction"]

    @property
    def last_action_success(self) -> bool:
        """Was the last action taken by the agent a success?"""
        return self.controller.last_event.metadata["lastActionSuccess"]

    @property
    def last_action_return(self) -> Any:
        """Get the value returned by the last action (if applicable).

        For an example of an action that returns a value, see
        `"GetReachablePositions"`.
        """
        return self.controller.last_event.metadata["actionReturn"]

    def step(
        self,
        action_dict: Optional[Dict[str, Union[str, int, float, Dict]]] = None,
        **kwargs: Union[str, int, float, Dict],
    ) -> ai2thor.server.Event:
        """Take a step in the ai2thor environment."""
        if action_dict is None:
            action_dict = dict()
        action_dict.update(kwargs)

        return self.controller.step(**action_dict)

    def stop(self):
        """Stops the ai2thor controller."""
        try:
            self.controller.stop()
        except Exception as e:
            get_logger().warning(str(e))

    def all_objects(self) -> List[Dict[str, Any]]:
        """Return all object metadata."""
        return self.controller.last_event.metadata["objects"]

    def all_objects_with_properties(
        self, properties: Dict[str, Any]
    ) -> List[Dict[str, Any]]:
        """Find all objects with the given properties."""
        objects = []
        for o in self.all_objects():
            satisfies_all = True
            for k, v in properties.items():
                if o[k] != v:
                    satisfies_all = False
                    break
            if satisfies_all:
                objects.append(o)
        return objects

    def visible_objects(self) -> List[Dict[str, Any]]:
        """Return all visible objects."""
        return self.all_objects_with_properties({"visible": True})


================================================
FILE: allenact_plugins/robothor_plugin/robothor_models.py
================================================
from typing import Tuple, Optional

import gym
import torch
from gym.spaces import Dict as SpaceDict

from allenact.algorithms.onpolicy_sync.policy import (
    ActorCriticModel,
    LinearActorCriticHead,
    DistributionType,
    Memory,
    ObservationType,
)
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.embodiedai.models.basic_models import RNNStateEncoder, SimpleCNN
from allenact_plugins.robothor_plugin.robothor_distributions import (
    TupleCategoricalDistr,
)


class TupleLinearActorCriticHead(LinearActorCriticHead):
    def forward(self, x):
        out = self.actor_and_critic(x)

        logits = out[..., :-1]
        values = out[..., -1:]
        # noinspection PyArgumentList
        return (
            TupleCategoricalDistr(logits=logits),  # [steps, samplers, ...]
            values.view(*values.shape[:2], -1),  # [steps, samplers, flattened]
        )


class NavToPartnerActorCriticSimpleConvRNN(ActorCriticModel[TupleCategoricalDistr]):
    action_space: gym.spaces.Tuple

    def __init__(
        self,
        action_space: gym.spaces.Tuple,
        observation_space: SpaceDict,
        rgb_uuid: Optional[str] = "rgb",
        hidden_size=512,
        num_rnn_layers=1,
        rnn_type="GRU",
    ):
        super().__init__(action_space=action_space, observation_space=observation_space)

        self._hidden_size = hidden_size

        self.rgb_uuid = rgb_uuid

        self.visual_encoder = SimpleCNN(
            observation_space=observation_space,
            output_size=hidden_size,
            rgb_uuid=self.rgb_uuid,
            depth_uuid=None,
        )

        self.state_encoder = RNNStateEncoder(
            0 if self.is_blind else self.recurrent_hidden_state_size,
            self._hidden_size,
            num_layers=num_rnn_layers,
            rnn_type=rnn_type,
        )

        self.actor_critic = TupleLinearActorCriticHead(
            self._hidden_size, action_space[0].n
        )

        self.train()

    @property
    def output_size(self):
        return self._hidden_size

    @property
    def is_blind(self):
        return self.visual_encoder.is_blind

    @property
    def num_recurrent_layers(self):
        return self.state_encoder.num_recurrent_layers

    @property
    def recurrent_hidden_state_size(self):
        return self._hidden_size

    @property
    def num_agents(self):
        return len(self.action_space)

    def _recurrent_memory_specification(self):
        return dict(
            rnn=(
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("agent", self.num_agents),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
        )

    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        if not self.is_blind:
            perception_embed = self.visual_encoder(observations)
        else:
            # TODO manage blindness for all agents simultaneously or separate?
            raise NotImplementedError()

        # TODO alternative where all agents consume all observations
        x, rnn_hidden_states = self.state_encoder(
            perception_embed, memory.tensor("rnn"), masks
        )

        dists, vals = self.actor_critic(x)

        return (
            ActorCriticOutput(
                distributions=dists,
                values=vals,
                extras={},
            ),
            memory.set_tensor("rnn", rnn_hidden_states),
        )


================================================
FILE: allenact_plugins/robothor_plugin/robothor_preprocessors.py
================================================
from collections import OrderedDict
from typing import Dict, Any, Optional, List, cast

import gym
import numpy as np
import torch
from gym.spaces.dict import Dict as SpaceDict

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.cacheless_frcnn import fasterrcnn_resnet50_fpn
from allenact.utils.misc_utils import prepare_locals_for_super


class BatchedFasterRCNN(torch.nn.Module):
    # fmt: off
    COCO_INSTANCE_CATEGORY_NAMES = [
        '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
        'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
        'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
        'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
        'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
        'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
        'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
        'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
        'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    # fmt: on

    def __init__(self, thres=0.12, maxdets=3, res=7):
        super().__init__()
        self.model = fasterrcnn_resnet50_fpn(pretrained=True)
        self.eval()

        self.min_score = thres
        self.maxdets = maxdets
        self.res = res

    def detector_tensor(self, boxes, classes, scores, aspect_ratio=1.0):
        res, maxdets = self.res, self.maxdets
        bins = np.array(list(range(res + 1)))[1:-1] / res

        res_classes = torch.zeros(
            res, res, maxdets, dtype=torch.int64
        )  # 0 is background
        res_boxes = -1 * torch.ones(
            res, res, maxdets, 5
        )  # regular range is [0, 1] (vert) or [0, aspect_ratio] (horiz)

        temp = [[[] for _ in range(res)] for _ in range(res)]  # grid of arrays

        # # TODO Debug
        # print('NEW IMAGE')

        for it in range(classes.shape[0]):
            cx = (boxes[it, 0].item() + boxes[it, 2].item()) / 2
            cy = (boxes[it, 1].item() + boxes[it, 3].item()) / 2

            px = np.digitize(cx, bins=aspect_ratio * bins).item()
            py = np.digitize(cy, bins=bins).item()

            temp[py][px].append(
                (
                    scores[it][classes[it]].item(),  # prob
                    (boxes[it, 2] - boxes[it, 0]).item() / aspect_ratio,  # width
                    (boxes[it, 3] - boxes[it, 1]).item(),  # height
                    boxes[it, 0].item() / aspect_ratio,  # x
                    boxes[it, 1].item(),  # y
                    classes[it].item(),  # class
                )
            )

            # # TODO Debug:
            # print(self.COCO_INSTANCE_CATEGORY_NAMES[classes[it].item()])

        for py in range(res):
            for px in range(res):
                order = sorted(temp[py][px], reverse=True)[:maxdets]
                for it, data in enumerate(order):
                    res_classes[py, px, it] = data[-1]
                    res_boxes[py, px, it, :] = torch.tensor(
                        list(data[:-1])
                    )  # prob, size, top left

        res_classes = res_classes.permute(2, 0, 1).unsqueeze(0).contiguous()
        res_boxes = (
            res_boxes.view(res, res, -1).permute(2, 0, 1).unsqueeze(0).contiguous()
        )

        return res_classes, res_boxes

    def forward(self, imbatch):
        with torch.no_grad():
            imglist = [im_in.squeeze(0) for im_in in imbatch.split(split_size=1, dim=0)]

            # # TODO Debug
            # import cv2
            # for it, im_in in enumerate(imglist):
            #     cvim = 255.0 * im_in.to('cpu').permute(1, 2, 0).numpy()[:, :, ::-1]
            #     cv2.imwrite('test_highres{}.png'.format(it), cvim)

            preds = self.model(imglist)

            keeps = [
                pred["scores"] > self.min_score for pred in preds
            ]  # already  after nms

            # [0, 1] for rows, [0, aspect_ratio] for cols (im_in is C x H x W), with all images of same size (batch)
            all_boxes = [
                pred["boxes"][keep] / imbatch.shape[-2]
                for pred, keep in zip(preds, keeps)
            ]
            all_classes = [pred["labels"][keep] for pred, keep in zip(preds, keeps)]
            all_pred_scores = [pred["scores"][keep] for pred, keep in zip(preds, keeps)]

            # hack: fill in a full prob score (all classes, 0 score if undetected) for each box, for backwards compatibility
            all_scores = [
                torch.zeros(pred_scores.shape[0], 91, device=pred_scores.device)
                for pred_scores in all_pred_scores
            ]
            all_scores = [
                torch.where(
                    torch.arange(91, device=pred_scores.device).unsqueeze(0)
                    == merged_classes.unsqueeze(1),
                    pred_scores.unsqueeze(1),
                    scores,
                )
                for merged_classes, pred_scores, scores in zip(
                    all_classes, all_pred_scores, all_scores
                )
            ]

            all_classes_boxes = [
                self.detector_tensor(
                    boxes,
                    classes,
                    scores,
                    aspect_ratio=imbatch.shape[-1] / imbatch.shape[-2],
                )
                for boxes, classes, scores in zip(all_boxes, all_classes, all_scores)
            ]

            classes = torch.cat(
                [classes_boxes[0] for classes_boxes in all_classes_boxes], dim=0
            ).to(imbatch.device)
            boxes = torch.cat(
                [classes_boxes[1] for classes_boxes in all_classes_boxes], dim=0
            ).to(imbatch.device)

        return classes, boxes


class FasterRCNNPreProcessorRoboThor(Preprocessor):
    """Preprocess RGB image using a ResNet model."""

    COCO_INSTANCE_CATEGORY_NAMES = BatchedFasterRCNN.COCO_INSTANCE_CATEGORY_NAMES

    def __init__(
        self,
        input_uuids: List[str],
        output_uuid: str,
        input_height: int,
        input_width: int,
        max_dets: int,
        detector_spatial_res: int,
        detector_thres: float,
        device: Optional[torch.device] = None,
        device_ids: Optional[List[torch.device]] = None,
        **kwargs: Any,
    ):
        self.input_height = input_height
        self.input_width = input_width
        self.max_dets = max_dets
        self.detector_spatial_res = detector_spatial_res
        self.detector_thres = detector_thres
        self.device = torch.device("cpu") if device is None else device
        self.device_ids = device_ids or cast(
            List[torch.device], list(range(torch.cuda.device_count()))
        )

        self.frcnn: BatchedFasterRCNN = BatchedFasterRCNN(
            thres=self.detector_thres,
            maxdets=self.max_dets,
            res=self.detector_spatial_res,
        )

        spaces: OrderedDict[str, gym.Space] = OrderedDict()
        shape = (self.max_dets, self.detector_spatial_res, self.detector_spatial_res)
        spaces["frcnn_classes"] = gym.spaces.Box(
            low=0,  # 0 is bg
            high=len(self.COCO_INSTANCE_CATEGORY_NAMES) - 1,
            shape=shape,
            dtype=np.int64,
        )
        shape = (
            self.max_dets * 5,
            self.detector_spatial_res,
            self.detector_spatial_res,
        )
        spaces["frcnn_boxes"] = gym.spaces.Box(low=-np.inf, high=np.inf, shape=shape)

        assert (
            len(input_uuids) == 1
        ), "fasterrcnn preprocessor can only consume one observation type"

        observation_space = SpaceDict(spaces=spaces)

        super().__init__(**prepare_locals_for_super(locals()))

    def to(self, device: torch.device) -> "FasterRCNNPreProcessorRoboThor":
        self.frcnn = self.frcnn.to(device)
        self.device = device
        return self

    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
        frames_tensor = (
            obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)
        )  # bhwc -> bchw (unnormalized)
        classes, boxes = self.frcnn(frames_tensor)

        return {"frcnn_classes": classes, "frcnn_boxes": boxes}


================================================
FILE: allenact_plugins/robothor_plugin/robothor_sensors.py
================================================
from typing import Any, Tuple, Optional

import ai2thor.controller
import gym
import numpy as np
import quaternion  # noqa # pylint: disable=unused-import

from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_sensors import (
    RGBSensorThor,
    THOR_ENV_TYPE,
    THOR_TASK_TYPE,
)
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask


class RGBSensorRoboThor(RGBSensorThor):
    """Sensor for RGB images in RoboTHOR.

    Returns from a running RoboThorEnvironment instance, the current RGB
    frame corresponding to the agent's egocentric view.
    """

    def __init__(self, *args: Any, **kwargs: Any):
        get_logger().warning(
            "`RGBSensorRoboThor` is deprecated, use `RGBSensorThor` instead."
        )
        super().__init__(*args, **kwargs)


class RGBSensorMultiRoboThor(RGBSensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):
    """Sensor for RGB images in RoboTHOR.

    Returns from a running RoboThorEnvironment instance, the current RGB
    frame corresponding to the agent's egocentric view.
    """

    def __init__(self, agent_count: int = 2, **kwargs):
        # TODO take all named args from superclass and pass with super().__init__(**prepare_locals_for_super(locals()))
        super().__init__(**kwargs)
        self.agent_count = agent_count
        self.agent_id = 0

    def frame_from_env(
        self, env: RoboThorEnvironment, task: Optional[Task[RoboThorEnvironment]]
    ) -> np.ndarray:
        return env.current_frames[self.agent_id].copy()

    def get_observation(
        self,
        env: RoboThorEnvironment,
        task: Task[RoboThorEnvironment],
        *args: Any,
        **kwargs: Any
    ) -> Any:
        obs = []
        for self.agent_id in range(self.agent_count):
            obs.append(super().get_observation(env, task, *args, **kwargs))
        return np.stack(obs, axis=0)  # agents x width x height x channels


class GPSCompassSensorRoboThor(Sensor[RoboThorEnvironment, PointNavTask]):
    def __init__(self, uuid: str = "target_coordinates_ind", **kwargs: Any):
        observation_space = self._get_observation_space()

        super().__init__(**prepare_locals_for_super(locals()))

    def _get_observation_space(self):
        return gym.spaces.Box(
            low=np.finfo(np.float32).min,
            high=np.finfo(np.float32).max,
            shape=(2,),
            dtype=np.float32,
        )

    @staticmethod
    def _compute_pointgoal(
        source_position: np.ndarray,
        source_rotation: np.quaternion,
        goal_position: np.ndarray,
    ):
        direction_vector = goal_position - source_position
        direction_vector_agent = GPSCompassSensorRoboThor.quaternion_rotate_vector(
            source_rotation.inverse(), direction_vector
        )

        rho, phi = GPSCompassSensorRoboThor.cartesian_to_polar(
            direction_vector_agent[2], -direction_vector_agent[0]
        )
        return np.array([rho, phi], dtype=np.float32)

    @staticmethod
    def quaternion_from_y_angle(angle: float) -> np.quaternion:
        r"""Creates a quaternion from rotation angle around y axis"""
        return GPSCompassSensorRoboThor.quaternion_from_coeff(
            np.array(
                [0.0, np.sin(np.pi * angle / 360.0), 0.0, np.cos(np.pi * angle / 360.0)]
            )
        )

    @staticmethod
    def quaternion_from_coeff(coeffs: np.ndarray) -> np.quaternion:
        r"""Creates a quaternions from coeffs in [x, y, z, w] format"""
        quat = np.quaternion(0, 0, 0, 0)
        quat.real = coeffs[3]
        quat.imag = coeffs[0:3]
        return quat

    @staticmethod
    def cartesian_to_polar(x, y):
        rho = np.sqrt(x**2 + y**2)
        phi = np.arctan2(y, x)
        return rho, phi

    @staticmethod
    def quaternion_rotate_vector(quat: np.quaternion, v: np.array) -> np.array:
        r"""Rotates a vector by a quaternion
        Args:
            quat: The quaternion to rotate by
            v: The vector to rotate
        Returns:
            np.array: The rotated vector
        """
        vq = np.quaternion(0, 0, 0, 0)
        vq.imag = v
        return (quat * vq * quat.inverse()).imag

    def get_observation(
        self,
        env: RoboThorEnvironment,
        task: Optional[PointNavTask],
        *args: Any,
        **kwargs: Any
    ) -> Any:

        agent_state = env.agent_state()
        agent_position = np.array([agent_state[k] for k in ["x", "y", "z"]])
        rotation_world_agent = self.quaternion_from_y_angle(
            agent_state["rotation"]["y"]
        )

        goal_position = np.array([task.task_info["target"][k] for k in ["x", "y", "z"]])

        return self._compute_pointgoal(
            agent_position, rotation_world_agent, goal_position
        )


class DepthSensorThor(
    DepthSensor[
        THOR_ENV_TYPE,
        THOR_TASK_TYPE,
    ],
):
    def __init__(
        self,
        use_resnet_normalization: Optional[bool] = None,
        use_normalization: Optional[bool] = None,
        mean: Optional[np.ndarray] = np.array([[0.5]], dtype=np.float32),
        stdev: Optional[np.ndarray] = np.array([[0.25]], dtype=np.float32),
        height: Optional[int] = None,
        width: Optional[int] = None,
        uuid: str = "depth",
        output_shape: Optional[Tuple[int, ...]] = None,
        output_channels: int = 1,
        unnormalized_infimum: float = 0.0,
        unnormalized_supremum: float = 5.0,
        scale_first: bool = False,
        **kwargs: Any
    ):
        # Give priority to use_normalization, but use_resnet_normalization for backward compat. if not set
        if use_resnet_normalization is not None and use_normalization is None:
            use_normalization = use_resnet_normalization
        elif use_normalization is None:
            use_normalization = False

        super().__init__(**prepare_locals_for_super(locals()))

    def frame_from_env(
        self, env: THOR_ENV_TYPE, task: Optional[THOR_TASK_TYPE]
    ) -> np.ndarray:
        if not isinstance(env, ai2thor.controller.Controller):
            return env.controller.last_event.depth_frame

        return env.last_event.depth_frame


class DepthSensorRoboThor(DepthSensorThor):
    # For backwards compatibility
    def __init__(self, *args: Any, **kwargs: Any):
        get_logger().warning(
            "`DepthSensorRoboThor` is deprecated, use `DepthSensorThor` instead."
        )
        super().__init__(*args, **kwargs)


================================================
FILE: allenact_plugins/robothor_plugin/robothor_task_samplers.py
================================================
import copy
import gzip
import json
import random
from typing import List, Optional, Union, Dict, Any, cast, Tuple

import gym

from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.cache_utils import str_to_pos_for_cache
from allenact.utils.experiment_utils import set_seed, set_deterministic_cudnn
from allenact.utils.system import get_logger
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment
from allenact_plugins.robothor_plugin.robothor_tasks import (
    ObjectNavTask,
    PointNavTask,
    NavToPartnerTask,
)


class ObjectNavTaskSampler(TaskSampler):
    def __init__(
        self,
        scenes: Union[List[str], str],
        object_types: List[str],
        sensors: List[Sensor],
        max_steps: int,
        env_args: Dict[str, Any],
        action_space: gym.Space,
        rewards_config: Dict,
        scene_period: Optional[Union[int, str]] = None,
        max_tasks: Optional[int] = None,
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        allow_flipping: bool = False,
        dataset_first: int = -1,
        dataset_last: int = -1,
        **kwargs,
    ) -> None:
        self.rewards_config = rewards_config
        self.env_args = env_args
        self.scenes = scenes
        self.object_types = object_types
        self.env: Optional[RoboThorEnvironment] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_space = action_space
        self.allow_flipping = allow_flipping

        self.scenes_is_dataset = (dataset_first >= 0) or (dataset_last >= 0)

        if not self.scenes_is_dataset:
            assert isinstance(
                self.scenes, List
            ), "When not using a dataset, scenes ({}) must be a list".format(
                self.scenes
            )
            self.scene_counter: Optional[int] = None
            self.scene_order: Optional[List[str]] = None
            self.scene_id: Optional[int] = None
            self.scene_period: Optional[Union[str, int]] = (
                scene_period  # default makes a random choice
            )
            self.max_tasks: Optional[int] = None
            self.reset_tasks = max_tasks
        else:
            assert isinstance(
                self.scenes, str
            ), "When using a dataset, scenes ({}) must be a json file name string".format(
                self.scenes
            )
            with open(self.scenes, "r") as f:
                self.dataset_episodes = json.load(f)
                # get_logger().debug("Loaded {} object nav episodes".format(len(self.dataset_episodes)))
            self.dataset_first = dataset_first if dataset_first >= 0 else 0
            self.dataset_last = (
                dataset_last if dataset_last >= 0 else len(self.dataset_episodes) - 1
            )
            assert (
                0 <= self.dataset_first <= self.dataset_last
            ), "dataset_last {} must be >= dataset_first {} >= 0".format(
                dataset_last, dataset_first
            )
            self.reset_tasks = self.dataset_last - self.dataset_first + 1
            # get_logger().debug("{} tasks ({}, {}) in sampler".format(self.reset_tasks, self.dataset_first, self.dataset_last))

        self._last_sampled_task: Optional[ObjectNavTask] = None

        self.seed: Optional[int] = None
        self.set_seed(seed)

        if deterministic_cudnn:
            set_deterministic_cudnn()

        self.reset()

    def _create_environment(self) -> RoboThorEnvironment:
        env = RoboThorEnvironment(**self.env_args)
        return env

    @property
    def length(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled. Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        return self.reset_tasks

    @property
    def last_sampled_task(self) -> Optional[ObjectNavTask]:
        return self._last_sampled_task

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    @property
    def all_observation_spaces_equal(self) -> bool:
        """Check if observation spaces equal.

        # Returns

        True if all Tasks that can be sampled by this sampler have the
        same observation space. Otherwise False.
        """
        return True

    def sample_scene(self, force_advance_scene: bool):
        if force_advance_scene:
            if self.scene_period != "manual":
                get_logger().warning(
                    "When sampling scene, have `force_advance_scene == True`"
                    "but `self.scene_period` is not equal to 'manual',"
                    "this may cause unexpected behavior."
                )
            self.scene_id = (1 + self.scene_id) % len(self.scenes)
            if self.scene_id == 0:
                random.shuffle(self.scene_order)

        if self.scene_period is None:
            # Random scene
            self.scene_id = random.randint(0, len(self.scenes) - 1)
        elif self.scene_period == "manual":
            pass
        elif self.scene_counter >= cast(int, self.scene_period):
            if self.scene_id == len(self.scene_order) - 1:
                # Randomize scene order for next iteration
                random.shuffle(self.scene_order)
                # Move to next scene
                self.scene_id = 0
            else:
                # Move to next scene
                self.scene_id += 1
            # Reset scene counter
            self.scene_counter = 1
        elif isinstance(self.scene_period, int):
            # Stay in current scene
            self.scene_counter += 1
        else:
            raise NotImplementedError(
                "Invalid scene_period {}".format(self.scene_period)
            )

        if self.max_tasks is not None:
            self.max_tasks -= 1

        return self.scenes[int(self.scene_order[self.scene_id])]

    # def sample_episode(self, scene):
    #     self.scene_counters[scene] = (self.scene_counters[scene] + 1) % len(self.scene_to_episodes[scene])
    #     if self.scene_counters[scene] == 0:
    #         random.shuffle(self.scene_to_episodes[scene])
    #     return self.scene_to_episodes[scene][self.scene_counters[scene]]

    def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            # get_logger().debug("max_tasks {}".format(self.max_tasks))
            return None

        if not self.scenes_is_dataset:
            scene = self.sample_scene(force_advance_scene)

            if self.env is not None:
                if scene.replace("_physics", "") != self.env.scene_name.replace(
                    "_physics", ""
                ):
                    self.env.reset(scene)
            else:
                self.env = self._create_environment()
                self.env.reset(scene_name=scene)

            pose = self.env.randomize_agent_location()

            object_types_in_scene = set(
                [o["objectType"] for o in self.env.last_event.metadata["objects"]]
            )

            task_info = {"scene": scene}
            for ot in random.sample(self.object_types, len(self.object_types)):
                if ot in object_types_in_scene:
                    task_info["object_type"] = ot
                    break

            if len(task_info) == 0:
                get_logger().warning(
                    "Scene {} does not contain any"
                    " objects of any of the types {}.".format(scene, self.object_types)
                )

            task_info["initial_position"] = {k: pose[k] for k in ["x", "y", "z"]}
            task_info["initial_orientation"] = cast(Dict[str, float], pose["rotation"])[
                "y"
            ]
        else:
            assert self.max_tasks is not None
            next_task_id = self.dataset_first + self.max_tasks - 1
            # get_logger().debug("task {}".format(next_task_id))
            assert (
                self.dataset_first <= next_task_id <= self.dataset_last
            ), "wrong task_id {} for min {} max {}".format(
                next_task_id, self.dataset_first, self.dataset_last
            )
            task_info = copy.deepcopy(self.dataset_episodes[next_task_id])

            scene = task_info["scene"]
            if self.env is not None:
                if scene.replace("_physics", "") != self.env.scene_name.replace(
                    "_physics", ""
                ):
                    self.env.reset(scene_name=scene)
            else:
                self.env = self._create_environment()
                self.env.reset(scene_name=scene)

            self.env.step(
                {
                    "action": "TeleportFull",
                    **{k: float(v) for k, v in task_info["initial_position"].items()},
                    "rotation": {
                        "x": 0.0,
                        "y": float(task_info["initial_orientation"]),
                        "z": 0.0,
                    },
                    "horizon": 0.0,
                    "standing": True,
                }
            )
            assert self.env.last_action_success, "Failed to reset agent for {}".format(
                task_info
            )

            self.max_tasks -= 1

        # task_info["actions"] = []  # TODO populated by Task(Generic[EnvType]).step(...) but unused

        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        self._last_sampled_task = ObjectNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task

    def reset(self):
        if not self.scenes_is_dataset:
            self.scene_counter = 0
            self.scene_order = list(range(len(self.scenes)))
            random.shuffle(self.scene_order)
            self.scene_id = 0
        self.max_tasks = self.reset_tasks

    def set_seed(self, seed: int):
        self.seed = seed
        if seed is not None:
            set_seed(seed)


class ObjectNavDatasetTaskSampler(TaskSampler):
    def __init__(
        self,
        scenes: List[str],
        scene_directory: str,
        sensors: List[Sensor],
        max_steps: int,
        env_args: Dict[str, Any],
        action_space: gym.Space,
        rewards_config: Dict,
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        loop_dataset: bool = True,
        allow_flipping=False,
        env_class=RoboThorEnvironment,
        randomize_materials_in_training: bool = False,
        **kwargs,
    ) -> None:
        self.rewards_config = rewards_config
        self.env_args = env_args
        self.scenes = scenes
        self.episodes = {
            scene: ObjectNavDatasetTaskSampler.load_dataset(
                scene, scene_directory + "/episodes"
            )
            for scene in scenes
        }

        # Only keep episodes containing desired objects
        if "object_types" in kwargs:
            self.episodes = {
                scene: [
                    ep for ep in episodes if ep["object_type"] in kwargs["object_types"]
                ]
                for scene, episodes in self.episodes.items()
            }
            self.episodes = {
                scene: episodes
                for scene, episodes in self.episodes.items()
                if len(episodes) > 0
            }
            self.scenes = [scene for scene in self.scenes if scene in self.episodes]

        self.env_class = env_class
        self.object_types = [
            ep["object_type"] for scene in self.episodes for ep in self.episodes[scene]
        ]
        self.env: Optional[RoboThorEnvironment] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_space = action_space
        self.allow_flipping = allow_flipping
        self.scene_counter: Optional[int] = None
        self.scene_order: Optional[List[str]] = None
        self.scene_id: Optional[int] = None
        # get the total number of tasks assigned to this process
        if loop_dataset:
            self.max_tasks = None
        else:
            self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes)
        self.reset_tasks = self.max_tasks
        self.scene_index = 0
        self.episode_index = 0
        self.randomize_materials_in_training = randomize_materials_in_training

        self._last_sampled_task: Optional[ObjectNavTask] = None

        self.seed: Optional[int] = None
        self.set_seed(seed)

        if deterministic_cudnn:
            set_deterministic_cudnn()

        self.reset()

    def _create_environment(self) -> RoboThorEnvironment:
        env = self.env_class(**self.env_args)
        return env

    @staticmethod
    def load_dataset(scene: str, base_directory: str) -> List[Dict]:
        filename = (
            "/".join([base_directory, scene])
            if base_directory[-1] != "/"
            else "".join([base_directory, scene])
        )
        filename += ".json.gz"
        fin = gzip.GzipFile(filename, "r")
        json_bytes = fin.read()
        fin.close()
        json_str = json_bytes.decode("utf-8")
        data = json.loads(json_str)
        random.shuffle(data)
        return data

    @staticmethod
    def load_distance_cache_from_file(scene: str, base_directory: str) -> Dict:
        filename = (
            "/".join([base_directory, scene])
            if base_directory[-1] != "/"
            else "".join([base_directory, scene])
        )
        filename += ".json.gz"
        fin = gzip.GzipFile(filename, "r")
        json_bytes = fin.read()
        fin.close()
        json_str = json_bytes.decode("utf-8")
        data = json.loads(json_str)
        return data

    @property
    def __len__(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled. Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        return self.reset_tasks

    @property
    def last_sampled_task(self) -> Optional[ObjectNavTask]:
        return self._last_sampled_task

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    @property
    def all_observation_spaces_equal(self) -> bool:
        """Check if observation spaces equal.

        # Returns

        True if all Tasks that can be sampled by this sampler have the
            same observation space. Otherwise False.
        """
        return True

    @property
    def length(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled. Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks

    def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]):
            self.scene_index = (self.scene_index + 1) % len(self.scenes)
            # shuffle the new list of episodes to train on
            random.shuffle(self.episodes[self.scenes[self.scene_index]])
            self.episode_index = 0
        scene = self.scenes[self.scene_index]
        episode = self.episodes[scene][self.episode_index]
        if self.env is None:
            self.env = self._create_environment()

        if scene.replace("_physics", "") != self.env.scene_name.replace("_physics", ""):
            self.env.reset(scene_name=scene)
        else:
            self.env.reset_object_filter()

        self.env.set_object_filter(
            object_ids=[
                o["objectId"]
                for o in self.env.last_event.metadata["objects"]
                if o["objectType"] == episode["object_type"]
            ]
        )

        # only randomize materials in train scenes
        were_materials_randomized = False
        if self.randomize_materials_in_training:
            if (
                "Train" in scene
                or int(scene.replace("FloorPlan", "").replace("_physics", "")) % 100
                < 21
            ):
                were_materials_randomized = True
                self.env.controller.step(action="RandomizeMaterials")

        task_info = {
            "scene": scene,
            "object_type": episode["object_type"],
            "materials_randomized": were_materials_randomized,
        }
        if len(task_info) == 0:
            get_logger().warning(
                "Scene {} does not contain any"
                " objects of any of the types {}.".format(scene, self.object_types)
            )
        task_info["initial_position"] = episode["initial_position"]
        task_info["initial_orientation"] = episode["initial_orientation"]
        task_info["initial_horizon"] = episode.get("initial_horizon", 0)
        task_info["distance_to_target"] = episode.get("shortest_path_length")
        task_info["path_to_target"] = episode.get("shortest_path")
        task_info["object_type"] = episode["object_type"]
        task_info["id"] = episode["id"]
        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        self.episode_index += 1
        if self.max_tasks is not None:
            self.max_tasks -= 1
        if not self.env.teleport(
            pose=episode["initial_position"],
            rotation=episode["initial_orientation"],
            horizon=episode.get("initial_horizon", 0),
        ):
            return self.next_task()
        self._last_sampled_task = ObjectNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task

    def reset(self):
        self.episode_index = 0
        self.scene_index = 0
        self.max_tasks = self.reset_tasks

    def set_seed(self, seed: int):
        self.seed = seed
        if seed is not None:
            set_seed(seed)


class PointNavTaskSampler(TaskSampler):
    def __init__(
        self,
        scenes: List[str],
        # object_types: List[str],
        # scene_to_episodes: List[Dict[str, Any]],
        sensors: List[Sensor],
        max_steps: int,
        env_args: Dict[str, Any],
        action_space: gym.Space,
        rewards_config: Dict,
        scene_period: Optional[Union[int, str]] = None,
        max_tasks: Optional[int] = None,
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        **kwargs,
    ) -> None:
        self.rewards_config = rewards_config
        self.env_args = env_args
        self.scenes = scenes
        # self.object_types = object_types
        # self.scene_to_episodes = scene_to_episodes
        # self.scene_counters = {scene: -1 for scene in self.scene_to_episodes}
        # self.scenes = list(self.scene_to_episodes.keys())
        self.env: Optional[RoboThorEnvironment] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_space = action_space

        self.scene_counter: Optional[int] = None
        self.scene_order: Optional[List[str]] = None
        self.scene_id: Optional[int] = None
        self.scene_period: Optional[Union[str, int]] = (
            scene_period  # default makes a random choice
        )
        self.max_tasks: Optional[int] = None
        self.reset_tasks = max_tasks

        self._last_sampled_task: Optional[PointNavTask] = None

        self.seed: Optional[int] = None
        self.set_seed(seed)

        if deterministic_cudnn:
            set_deterministic_cudnn()

        self.reset()

    def _create_environment(self) -> RoboThorEnvironment:
        env = RoboThorEnvironment(**self.env_args)
        return env

    @property
    def length(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled.
        Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        # total = 0
        # for scene in self.scene_to_episodes:
        #     total += len(self.scene_to_episodes[scene])
        # return total
        return self.reset_tasks

    @property
    def last_sampled_task(self) -> Optional[PointNavTask]:
        return self._last_sampled_task

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    @property
    def all_observation_spaces_equal(self) -> bool:
        """Check if observation spaces equal.

        # Returns

        True if all Tasks that can be sampled by this sampler
        have the     same observation space. Otherwise False.
        """
        return True

    def sample_scene(self, force_advance_scene: bool):
        if force_advance_scene:
            if self.scene_period != "manual":
                get_logger().warning(
                    "When sampling scene, have `force_advance_scene == True`"
                    "but `self.scene_period` is not equal to 'manual',"
                    "this may cause unexpected behavior."
                )
            self.scene_id = (1 + self.scene_id) % len(self.scenes)
            if self.scene_id == 0:
                random.shuffle(self.scene_order)

        if self.scene_period is None:
            # Random scene
            self.scene_id = random.randint(0, len(self.scenes) - 1)
        elif self.scene_period == "manual":
            pass
        elif self.scene_counter >= cast(int, self.scene_period):
            if self.scene_id == len(self.scene_order) - 1:
                # Randomize scene order for next iteration
                random.shuffle(self.scene_order)
                # Move to next scene
                self.scene_id = 0
            else:
                # Move to next scene
                self.scene_id += 1
            # Reset scene counter
            self.scene_counter = 1
        elif isinstance(self.scene_period, int):
            # Stay in current scene
            self.scene_counter += 1
        else:
            raise NotImplementedError(
                "Invalid scene_period {}".format(self.scene_period)
            )

        if self.max_tasks is not None:
            self.max_tasks -= 1

        return self.scenes[int(self.scene_order[self.scene_id])]

    # def sample_episode(self, scene):
    #     self.scene_counters[scene] = (self.scene_counters[scene] + 1) % len(self.scene_to_episodes[scene])
    #     if self.scene_counters[scene] == 0:
    #         random.shuffle(self.scene_to_episodes[scene])
    #     return self.scene_to_episodes[scene][self.scene_counters[scene]]

    def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        scene = self.sample_scene(force_advance_scene)

        if self.env is not None:
            if scene.replace("_physics", "") != self.env.scene_name.replace(
                "_physics", ""
            ):
                self.env.reset(scene_name=scene)
        else:
            self.env = self._create_environment()
            self.env.reset(scene_name=scene)

        # task_info = copy.deepcopy(self.sample_episode(scene))
        # task_info['target'] = task_info['target_position']
        # task_info['actions'] = []

        locs = self.env.known_good_locations_list()
        # get_logger().debug("locs[0] {} locs[-1] {}".format(locs[0], locs[-1]))

        ys = [loc["y"] for loc in locs]
        miny = min(ys)
        maxy = max(ys)
        assert maxy - miny < 1e-6, "miny {} maxy {} for scene {}".format(
            miny, maxy, scene
        )

        too_close_to_target = True
        target: Optional[Dict[str, float]] = None
        for _ in range(10):
            self.env.randomize_agent_location()
            target = copy.copy(random.choice(locs))
            too_close_to_target = self.env.distance_to_point(target) <= 0
            if not too_close_to_target:
                break

        pose = self.env.agent_state()

        task_info = {
            "scene": scene,
            "initial_position": {k: pose[k] for k in ["x", "y", "z"]},
            "initial_orientation": pose["rotation"]["y"],
            "target": target,
            "actions": [],
        }

        if too_close_to_target:
            get_logger().warning("No path for sampled episode {}".format(task_info))
        # else:
        #     get_logger().debug("Path found for sampled episode {}".format(task_info))

        # pose = {**task_info['initial_position'], 'rotation': {'x': 0.0, 'y': task_info['initial_orientation'], 'z': 0.0}, 'horizon': 0.0}
        # self.env.step({"action": "TeleportFull", **pose})
        # assert self.env.last_action_success, "Failed to initialize agent to {} in {} for epsiode {}".format(pose, scene, task_info)

        self._last_sampled_task = PointNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task

    def reset(self):
        self.scene_counter = 0
        self.scene_order = list(range(len(self.scenes)))
        random.shuffle(self.scene_order)
        self.scene_id = 0
        self.max_tasks = self.reset_tasks

        # for scene in self.scene_to_episodes:
        #     random.shuffle(self.scene_to_episodes[scene])
        # for scene in self.scene_counters:
        #     self.scene_counters[scene] = -1

    def set_seed(self, seed: int):
        self.seed = seed
        if seed is not None:
            set_seed(seed)


class PointNavDatasetTaskSampler(TaskSampler):
    def __init__(
        self,
        scenes: List[str],
        scene_directory: str,
        sensors: List[Sensor],
        max_steps: int,
        env_args: Dict[str, Any],
        action_space: gym.Space,
        rewards_config: Dict,
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        loop_dataset: bool = True,
        shuffle_dataset: bool = True,
        allow_flipping=False,
        env_class=RoboThorEnvironment,
        **kwargs,
    ) -> None:
        self.rewards_config = rewards_config
        self.env_args = env_args
        self.scenes = scenes
        self.shuffle_dataset: bool = shuffle_dataset
        self.episodes = {
            scene: ObjectNavDatasetTaskSampler.load_dataset(
                scene, scene_directory + "/episodes"
            )
            for scene in scenes
        }
        self.env_class = env_class
        self.env: Optional[RoboThorEnvironment] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_space = action_space
        self.allow_flipping = allow_flipping
        self.scene_counter: Optional[int] = None
        self.scene_order: Optional[List[str]] = None
        self.scene_id: Optional[int] = None
        # get the total number of tasks assigned to this process
        if loop_dataset:
            self.max_tasks = None
        else:
            self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes)
        self.reset_tasks = self.max_tasks
        self.scene_index = 0
        self.episode_index = 0

        self._last_sampled_task: Optional[PointNavTask] = None

        self.seed: Optional[int] = None
        self.set_seed(seed)

        if deterministic_cudnn:
            set_deterministic_cudnn()

        self.reset()

    def _create_environment(self) -> RoboThorEnvironment:
        env = self.env_class(**self.env_args)
        return env

    @property
    def __len__(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled. Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        return self.reset_tasks

    @property
    def last_sampled_task(self) -> Optional[PointNavTask]:
        return self._last_sampled_task

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    @property
    def all_observation_spaces_equal(self) -> bool:
        """Check if observation spaces equal.

        # Returns

        True if all Tasks that can be sampled by this sampler have the
            same observation space. Otherwise False.
        """
        return True

    def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]):
            self.scene_index = (self.scene_index + 1) % len(self.scenes)
            # shuffle the new list of episodes to train on
            if self.shuffle_dataset:
                random.shuffle(self.episodes[self.scenes[self.scene_index]])
            self.episode_index = 0

        scene = self.scenes[self.scene_index]
        episode = self.episodes[scene][self.episode_index]
        if self.env is not None:
            if scene.replace("_physics", "") != self.env.scene_name.replace(
                "_physics", ""
            ):
                self.env.reset(scene_name=scene, filtered_objects=[])
        else:
            self.env = self._create_environment()
            self.env.reset(scene_name=scene, filtered_objects=[])

        def to_pos(s):
            if isinstance(s, (Dict, Tuple)):
                return s
            if isinstance(s, float):
                return {"x": 0, "y": s, "z": 0}
            return str_to_pos_for_cache(s)

        for k in ["initial_position", "initial_orientation", "target_position"]:
            episode[k] = to_pos(episode[k])

        task_info = {
            "scene": scene,
            "initial_position": episode["initial_position"],
            "initial_orientation": episode["initial_orientation"],
            "target": episode["target_position"],
            "shortest_path": episode["shortest_path"],
            "distance_to_target": episode["shortest_path_length"],
            "id": episode["id"],
        }

        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        self.episode_index += 1
        if self.max_tasks is not None:
            self.max_tasks -= 1

        if not self.env.teleport(
            pose=episode["initial_position"], rotation=episode["initial_orientation"]
        ):
            return self.next_task()

        self._last_sampled_task = PointNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )

        return self._last_sampled_task

    def reset(self):
        self.episode_index = 0
        self.scene_index = 0
        self.max_tasks = self.reset_tasks

    def set_seed(self, seed: int):
        self.seed = seed
        if seed is not None:
            set_seed(seed)

    @property
    def length(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled.
        Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks


class NavToPartnerTaskSampler(TaskSampler):
    def __init__(
        self,
        scenes: List[str],
        sensors: List[Sensor],
        max_steps: int,
        env_args: Dict[str, Any],
        action_space: gym.Space,
        rewards_config: Dict,
        scene_period: Optional[Union[int, str]] = None,
        max_tasks: Optional[int] = None,
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        **kwargs,
    ) -> None:
        self.rewards_config = rewards_config
        self.env_args = env_args
        self.scenes = scenes
        self.env: Optional[RoboThorEnvironment] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_space = action_space

        self.scene_counter: Optional[int] = None
        self.scene_order: Optional[List[str]] = None
        self.scene_id: Optional[int] = None
        self.scene_period: Optional[Union[str, int]] = (
            scene_period  # default makes a random choice
        )
        self.max_tasks: Optional[int] = None
        self.reset_tasks = max_tasks

        self._last_sampled_task: Optional[NavToPartnerTask] = None

        self.seed: Optional[int] = None
        self.set_seed(seed)

        if deterministic_cudnn:
            set_deterministic_cudnn()

        self.reset()

    def _create_environment(self) -> RoboThorEnvironment:
        assert (
            self.env_args["agentCount"] == 2
        ), "NavToPartner is only defined for 2 agents!"
        env = RoboThorEnvironment(**self.env_args)
        return env

    @property
    def length(self) -> Union[int, float]:
        """Length.

        # Returns

        Number of total tasks remaining that can be sampled.
        Can be float('inf').
        """
        return float("inf") if self.max_tasks is None else self.max_tasks

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        return self.reset_tasks

    @property
    def last_sampled_task(self) -> Optional[NavToPartnerTask]:
        return self._last_sampled_task

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    @property
    def all_observation_spaces_equal(self) -> bool:
        """Check if observation spaces equal.

        # Returns

        True if all Tasks that can be sampled by this sampler
        have the     same observation space. Otherwise False.
        """
        return True

    def sample_scene(self, force_advance_scene: bool):
        if force_advance_scene:
            if self.scene_period != "manual":
                get_logger().warning(
                    "When sampling scene, have `force_advance_scene == True`"
                    "but `self.scene_period` is not equal to 'manual',"
                    "this may cause unexpected behavior."
                )
            self.scene_id = (1 + self.scene_id) % len(self.scenes)
            if self.scene_id == 0:
                random.shuffle(self.scene_order)

        if self.scene_period is None:
            # Random scene
            self.scene_id = random.randint(0, len(self.scenes) - 1)
        elif self.scene_period == "manual":
            pass
        elif self.scene_counter >= cast(int, self.scene_period):
            if self.scene_id == len(self.scene_order) - 1:
                # Randomize scene order for next iteration
                random.shuffle(self.scene_order)
                # Move to next scene
                self.scene_id = 0
            else:
                # Move to next scene
                self.scene_id += 1
            # Reset scene counter
            self.scene_counter = 1
        elif isinstance(self.scene_period, int):
            # Stay in current scene
            self.scene_counter += 1
        else:
            raise NotImplementedError(
                "Invalid scene_period {}".format(self.scene_period)
            )

        if self.max_tasks is not None:
            self.max_tasks -= 1

        return self.scenes[int(self.scene_order[self.scene_id])]

    def next_task(
        self, force_advance_scene: bool = False
    ) -> Optional[NavToPartnerTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        scene = self.sample_scene(force_advance_scene)

        if self.env is not None:
            if scene.replace("_physics", "") != self.env.scene_name.replace(
                "_physics", ""
            ):
                self.env.reset(scene_name=scene)
        else:
            self.env = self._create_environment()
            self.env.reset(scene_name=scene)

        too_close_to_target = True
        for _ in range(10):
            self.env.randomize_agent_location(agent_id=0)
            self.env.randomize_agent_location(agent_id=1)

            pose1 = self.env.agent_state(0)
            pose2 = self.env.agent_state(1)
            dist = self.env.distance_cache.find_distance(
                self.env.scene_name,
                {k: pose1[k] for k in ["x", "y", "z"]},
                {k: pose2[k] for k in ["x", "y", "z"]},
                self.env.distance_from_point_to_point,
            )

            too_close_to_target = (
                dist <= 1.25 * self.rewards_config["max_success_distance"]
            )
            if not too_close_to_target:
                break

        task_info = {
            "scene": scene,
            "initial_position1": {k: pose1[k] for k in ["x", "y", "z"]},
            "initial_position2": {k: pose2[k] for k in ["x", "y", "z"]},
            "initial_orientation1": pose1["rotation"]["y"],
            "initial_orientation2": pose2["rotation"]["y"],
            "id": "_".join(
                [scene]
                # + ["%4.2f" % pose1[k] for k in ["x", "y", "z"]]
                # + ["%4.2f" % pose1["rotation"]["y"]]
                # + ["%4.2f" % pose2[k] for k in ["x", "y", "z"]]
                # + ["%4.2f" % pose2["rotation"]["y"]]
                + ["%d" % random.randint(0, 2**63 - 1)]
            ),
        }

        if too_close_to_target:
            get_logger().warning("Bad sampled episode {}".format(task_info))

        self._last_sampled_task = NavToPartnerTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task

    def reset(self):
        self.scene_counter = 0
        self.scene_order = list(range(len(self.scenes)))
        random.shuffle(self.scene_order)
        self.scene_id = 0
        self.max_tasks = self.reset_tasks

    def set_seed(self, seed: int):
        self.seed = seed
        if seed is not None:
            set_seed(seed)


================================================
FILE: allenact_plugins/robothor_plugin/robothor_tasks.py
================================================
import math
from typing import Tuple, List, Dict, Any, Optional, Union, Sequence, cast

import gym
import numpy as np

from allenact.base_abstractions.misc import RLStepResult
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.utils.system import get_logger
from allenact.utils.tensor_utils import tile_images
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.robothor_plugin.robothor_constants import (
    MOVE_AHEAD,
    ROTATE_LEFT,
    ROTATE_RIGHT,
    END,
    LOOK_UP,
    LOOK_DOWN,
)
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment


def spl_metric(
    success: bool, optimal_distance: float, travelled_distance: float
) -> Optional[float]:
    if not success:
        return 0.0
    elif optimal_distance < 0:
        return None
    elif optimal_distance == 0:
        if travelled_distance == 0:
            return 1.0
        else:
            return 0.0
    else:
        travelled_distance = max(travelled_distance, optimal_distance)
        return optimal_distance / travelled_distance


class PointNavTask(Task[RoboThorEnvironment]):
    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END)

    def __init__(
        self,
        env: RoboThorEnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        reward_configs: Dict[str, Any],
        **kwargs,
    ) -> None:
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self.reward_configs = reward_configs
        self._took_end_action: bool = False
        self._success: Optional[bool] = False
        self.last_geodesic_distance = self.env.distance_to_point(
            self.task_info["target"]
        )

        self.optimal_distance = self.last_geodesic_distance
        self._rewards: List[float] = []
        self._distance_to_goal: List[float] = []
        self._metrics = None
        self.path: List[Any] = (
            []
        )  # the initial coordinate will be directly taken from the optimal path
        self.travelled_distance = 0.0

        self.task_info["followed_path"] = [self.env.agent_state()]
        self.task_info["action_names"] = self.action_names()

    @property
    def action_space(self):
        return gym.spaces.Discrete(len(self._actions))

    def reached_terminal_state(self) -> bool:
        return self._took_end_action

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return cls._actions

    def close(self) -> None:
        self.env.stop()

    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.class_action_names()[action]

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success
            pose = self.env.agent_state()
            self.path.append({k: pose[k] for k in ["x", "y", "z"]})
            self.task_info["followed_path"].append(pose)
        if len(self.path) > 1:
            self.travelled_distance += IThorEnvironment.position_dist(
                p0=self.path[-1], p1=self.path[-2], ignore_y=True
            )
        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success, "action": action},
        )
        return step_result

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
        if mode == "rgb":
            return self.env.current_frame
        elif mode == "depth":
            return self.env.current_depth

    def _is_goal_in_range(self) -> Optional[bool]:
        tget = self.task_info["target"]
        dist = self.dist_to_target()

        if -0.5 < dist <= 0.2:
            return True
        elif dist > 0.2:
            return False
        else:
            get_logger().debug(
                "No path for {} from {} to {}".format(
                    self.env.scene_name, self.env.agent_state(), tget
                )
            )
            return None

    def shaping(self) -> float:
        rew = 0.0

        if self.reward_configs["shaping_weight"] == 0.0:
            return rew

        geodesic_distance = self.dist_to_target()

        if geodesic_distance == -1.0:
            geodesic_distance = self.last_geodesic_distance
        if (
            self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5
        ):  # (robothor limits)
            rew += self.last_geodesic_distance - geodesic_distance
        self.last_geodesic_distance = geodesic_distance

        return rew * self.reward_configs["shaping_weight"]

    def judge(self) -> float:
        """Judge the last event."""
        reward = self.reward_configs["step_penalty"]

        reward += self.shaping()

        if self._took_end_action:
            if self._success is not None:
                reward += (
                    self.reward_configs["goal_success_reward"]
                    if self._success
                    else self.reward_configs["failed_stop_reward"]
                )
        elif self.num_steps_taken() + 1 >= self.max_steps:
            reward += self.reward_configs.get("reached_max_steps_reward", 0.0)

        self._rewards.append(float(reward))
        return float(reward)

    def dist_to_target(self):
        return self.env.distance_to_point(self.task_info["target"])

    def metrics(self) -> Dict[str, Any]:
        if not self.is_done():
            return {}

        total_reward = float(np.sum(self._rewards))
        self._rewards = []

        if self._success is None:
            return {}

        dist2tget = self.dist_to_target()
        spl = spl_metric(
            success=self._success,
            optimal_distance=self.optimal_distance,
            travelled_distance=self.travelled_distance,
        )

        metrics = {
            **super(PointNavTask, self).metrics(),
            "success": self._success,  # False also if no path to target
            "total_reward": total_reward,
            "dist_to_target": dist2tget,
            "spl": 0 if spl is None else spl,
        }
        return metrics


class ObjectNavTask(Task[RoboThorEnvironment]):
    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN)

    def __init__(
        self,
        env: RoboThorEnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        reward_configs: Dict[str, Any],
        **kwargs,
    ) -> None:
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self.reward_configs = reward_configs
        self._took_end_action: bool = False
        self._success: Optional[bool] = False
        self.mirror = task_info["mirrored"]

        self._all_metadata_available = env.all_metadata_available

        self._rewards: List[float] = []
        self._distance_to_goal: List[float] = []
        self._metrics = None
        self.path: List = (
            []
        )  # the initial coordinate will be directly taken from the optimal path
        self.travelled_distance = 0.0

        self.task_info["followed_path"] = [self.env.agent_state()]
        self.task_info["taken_actions"] = []
        self.task_info["action_names"] = self.class_action_names()

        if self._all_metadata_available:
            self.last_geodesic_distance = self.env.distance_to_object_type(
                self.task_info["object_type"]
            )
            self.optimal_distance = self.last_geodesic_distance
            self.closest_geo_distance = self.last_geodesic_distance

        self.last_expert_action: Optional[int] = None
        self.last_action_success = False

    @property
    def action_space(self):
        return gym.spaces.Discrete(len(self._actions))

    def reached_terminal_state(self) -> bool:
        return self._took_end_action

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return cls._actions

    def close(self) -> None:
        self.env.stop()

    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.class_action_names()[action]

        if self.mirror:
            if action_str == ROTATE_RIGHT:
                action_str = ROTATE_LEFT
            elif action_str == ROTATE_LEFT:
                action_str = ROTATE_RIGHT

        self.task_info["taken_actions"].append(action_str)

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success
            pose = self.env.agent_state()
            self.path.append({k: pose[k] for k in ["x", "y", "z"]})
            self.task_info["followed_path"].append(pose)
        if len(self.path) > 1:
            self.travelled_distance += IThorEnvironment.position_dist(
                p0=self.path[-1], p1=self.path[-2], ignore_y=True
            )
        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success, "action": action},
        )
        return step_result

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
        if mode == "rgb":
            frame = self.env.current_frame.copy()
        elif mode == "depth":
            frame = self.env.current_depth.copy()
        else:
            raise NotImplementedError(f"Mode '{mode}' is not supported.")

        if self.mirror:
            frame = frame[:, ::-1, :].copy()  # horizontal flip
            # print("mirrored render")
        return frame

    def _is_goal_in_range(self) -> bool:
        return any(
            o["objectType"] == self.task_info["object_type"]
            for o in self.env.visible_objects()
        )

    def shaping(self) -> float:
        rew = 0.0

        if self.reward_configs["shaping_weight"] == 0.0:
            return rew

        geodesic_distance = self.env.distance_to_object_type(
            self.task_info["object_type"]
        )

        # Ensuring the reward magnitude is not greater than the total distance moved
        max_reward_mag = 0.0
        if len(self.path) >= 2:
            p0, p1 = self.path[-2:]
            max_reward_mag = math.sqrt(
                (p0["x"] - p1["x"]) ** 2 + (p0["z"] - p1["z"]) ** 2
            )

        if self.reward_configs.get("positive_only_reward", False):
            if geodesic_distance > 0.5:
                rew = max(self.closest_geo_distance - geodesic_distance, 0)
        else:
            if (
                self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5
            ):  # (robothor limits)
                rew += self.last_geodesic_distance - geodesic_distance

        self.last_geodesic_distance = geodesic_distance
        self.closest_geo_distance = min(self.closest_geo_distance, geodesic_distance)

        return (
            max(
                min(rew, max_reward_mag),
                -max_reward_mag,
            )
            * self.reward_configs["shaping_weight"]
        )

    def judge(self) -> float:
        """Judge the last event."""
        reward = self.reward_configs["step_penalty"]

        reward += self.shaping()

        if self._took_end_action:
            if self._success:
                reward += self.reward_configs["goal_success_reward"]
            else:
                reward += self.reward_configs["failed_stop_reward"]
        elif self.num_steps_taken() + 1 >= self.max_steps:
            reward += self.reward_configs.get("reached_max_steps_reward", 0.0)

        self._rewards.append(float(reward))
        return float(reward)

    def get_observations(self, **kwargs) -> Any:
        obs = self.sensor_suite.get_observations(env=self.env, task=self)
        if self.mirror:
            for o in obs:
                if ("rgb" in o or "depth" in o) and isinstance(obs[o], np.ndarray):
                    if (
                        len(obs[o].shape) == 3
                    ):  # heuristic to determine this is a visual sensor
                        obs[o] = obs[o][:, ::-1, :].copy()  # horizontal flip
                    elif len(obs[o].shape) == 2:  # perhaps only two axes for depth?
                        obs[o] = obs[o][:, ::-1].copy()  # horizontal flip
        return obs

    def metrics(self) -> Dict[str, Any]:
        if not self.is_done():
            return {}

        metrics = super(ObjectNavTask, self).metrics()
        if self._all_metadata_available:
            dist2tget = self.env.distance_to_object_type(self.task_info["object_type"])

            spl = spl_metric(
                success=self._success,
                optimal_distance=self.optimal_distance,
                travelled_distance=self.travelled_distance,
            )

            metrics = {
                **metrics,
                "success": self._success,
                "total_reward": np.sum(self._rewards),
                "dist_to_target": dist2tget,
                "spl": 0 if spl is None else spl,
            }
        return metrics

    def query_expert(self, end_action_only: bool = False, **kwargs) -> Tuple[int, bool]:
        if self._is_goal_in_range():
            return self.class_action_names().index(END), True

        if end_action_only:
            return 0, False
        else:
            try:
                self.env.step(
                    {
                        "action": "ObjectNavExpertAction",
                        "objectType": self.task_info["object_type"],
                    }
                )
            except ValueError:
                raise RuntimeError(
                    "Attempting to use the action `ObjectNavExpertAction` which is not supported by your version of"
                    " AI2-THOR. The action `ObjectNavExpertAction` is experimental. In order"
                    " to enable this action, please install the (in development) version of AI2-THOR. Through pip"
                    " this can be done with the command"
                    " `pip install -e git+https://github.com/allenai/ai2thor.git@7d914cec13aae62298f5a6a816adb8ac6946c61f#egg=ai2thor`."
                )
            if self.env.last_action_success:
                expert_action: Optional[str] = self.env.last_event.metadata[
                    "actionReturn"
                ]
                if isinstance(expert_action, str):
                    if self.mirror:
                        if expert_action == "RotateLeft":
                            expert_action = "RotateRight"
                        elif expert_action == "RotateRight":
                            expert_action = "RotateLeft"

                    return self.class_action_names().index(expert_action), True
                else:
                    # This should have been caught by self._is_goal_in_range()...
                    return 0, False
            else:
                return 0, False


class NavToPartnerTask(Task[RoboThorEnvironment]):
    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT)

    def __init__(
        self,
        env: RoboThorEnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        reward_configs: Dict[str, Any],
        **kwargs,
    ) -> None:
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self.reward_configs = reward_configs

        assert self.env.agent_count == 2, "NavToPartnerTask only defined for 2 agents!"

        pose1 = self.env.agent_state(0)
        pose2 = self.env.agent_state(1)
        self.last_geodesic_distance = self.env.distance_cache.find_distance(
            self.env.scene_name,
            {k: pose1[k] for k in ["x", "y", "z"]},
            {k: pose2[k] for k in ["x", "y", "z"]},
            self.env.distance_from_point_to_point,
        )

        self.task_info["followed_path1"] = [pose1]
        self.task_info["followed_path2"] = [pose2]
        self.task_info["action_names"] = self.class_action_names()

    @property
    def action_space(self):
        return gym.spaces.Tuple(
            [
                gym.spaces.Discrete(len(self._actions)),
                gym.spaces.Discrete(len(self._actions)),
            ]
        )

    def reached_terminal_state(self) -> bool:
        return (
            self.last_geodesic_distance <= self.reward_configs["max_success_distance"]
        )

    @classmethod
    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:
        return cls._actions

    def close(self) -> None:
        self.env.stop()

    def _step(self, action: Tuple[int, int]) -> RLStepResult:
        assert isinstance(action, tuple)
        action_str1 = self.class_action_names()[action[0]]
        action_str2 = self.class_action_names()[action[1]]

        self.env.step({"action": action_str1, "agentId": 0})
        self.last_action_success1 = self.env.last_action_success
        self.env.step({"action": action_str2, "agentId": 1})
        self.last_action_success2 = self.env.last_action_success

        pose1 = self.env.agent_state(0)
        self.task_info["followed_path1"].append(pose1)
        pose2 = self.env.agent_state(1)
        self.task_info["followed_path2"].append(pose2)

        self.last_geodesic_distance = self.env.distance_cache.find_distance(
            self.env.scene_name,
            {k: pose1[k] for k in ["x", "y", "z"]},
            {k: pose2[k] for k in ["x", "y", "z"]},
            self.env.distance_from_point_to_point,
        )

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={
                "last_action_success": [
                    self.last_action_success1,
                    self.last_action_success2,
                ],
                "action": action,
            },
        )
        return step_result

    def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray:
        assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented"
        if mode == "rgb":
            return tile_images(self.env.current_frames)
        elif mode == "depth":
            return tile_images(self.env.current_depths)

    def judge(self) -> float:
        """Judge the last event."""
        reward = self.reward_configs["step_penalty"]

        if self.reached_terminal_state():
            reward += self.reward_configs["success_reward"]

        return reward  # reward shared by both agents (no shaping)

    def metrics(self) -> Dict[str, Any]:
        if not self.is_done():
            return {}

        return {
            **super().metrics(),
            "success": self.reached_terminal_state(),
        }


================================================
FILE: allenact_plugins/robothor_plugin/robothor_viz.py
================================================
import copy
import json
import math
import os
from typing import Tuple, Sequence, Union, Dict, Optional, Any, cast, Generator, List

import cv2
import numpy as np
from PIL import Image, ImageDraw
from ai2thor.controller import Controller
from matplotlib import pyplot as plt
from matplotlib.figure import Figure
import colour as col

from allenact.utils.system import get_logger
from allenact.utils.viz_utils import TrajectoryViz

ROBOTHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR = os.path.join(
    os.path.expanduser("~"), ".allenact", "robothor", "top_down_viz_cache"
)


class ThorPositionTo2DFrameTranslator(object):
    def __init__(
        self,
        frame_shape_rows_cols: Tuple[int, int],
        cam_position: Sequence[float],
        orth_size: float,
    ):
        self.frame_shape = frame_shape_rows_cols
        self.lower_left = np.array((cam_position[0], cam_position[2])) - orth_size
        self.span = 2 * orth_size

    def __call__(self, position: Sequence[float]):
        if len(position) == 3:
            x, _, z = position
        else:
            x, z = position

        camera_position = (np.array((x, z)) - self.lower_left) / self.span
        return np.array(
            (
                round(self.frame_shape[0] * (1.0 - camera_position[1])),
                round(self.frame_shape[1] * camera_position[0]),
            ),
            dtype=int,
        )


class ThorViz(TrajectoryViz):
    def __init__(
        self,
        path_to_trajectory: Sequence[str] = ("task_info", "followed_path"),
        label: str = "thor_trajectory",
        figsize: Tuple[float, float] = (8, 4),  # width, height
        fontsize: float = 10,
        scenes: Union[
            Tuple[str, int, int, int, int], Sequence[Tuple[str, int, int, int, int]]
        ] = ("FloorPlan_Val{}_{}", 1, 3, 1, 5),
        viz_rows_cols: Tuple[int, int] = (448, 448),
        single_color: bool = False,
        view_triangle_only_on_last: bool = True,
        disable_view_triangle: bool = False,
        line_opacity: float = 1.0,
        **kwargs
    ):
        super().__init__(
            path_to_trajectory=path_to_trajectory,
            label=label,
            figsize=figsize,
            fontsize=fontsize,
            **kwargs
        )

        if isinstance(scenes[0], str):
            scenes = [
                cast(Tuple[str, int, int, int, int], scenes)
            ]  # make it list of tuples
        self.scenes = cast(List[Tuple[str, int, int, int, int]], scenes)

        self.room_path = ROBOTHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR
        os.makedirs(self.room_path, exist_ok=True)

        self.viz_rows_cols = viz_rows_cols
        self.single_color = single_color
        self.view_triangle_only_on_last = view_triangle_only_on_last
        self.disable_view_triangle = disable_view_triangle
        self.line_opacity = line_opacity

        # Only needed for rendering
        self.map_data: Optional[Dict[str, Any]] = None
        self.thor_top_downs: Optional[Dict[str, np.ndarray]] = None

        self.controller: Optional[Controller] = None

    def init_top_down_render(self):
        self.map_data = self.get_translator()
        self.thor_top_downs = self.make_top_down_views()

        # No controller needed after this point
        if self.controller is not None:
            self.controller.stop()
            self.controller = None

    @staticmethod
    def iterate_scenes(
        all_scenes: Sequence[Tuple[str, int, int, int, int]]
    ) -> Generator[str, None, None]:
        for scenes in all_scenes:
            for wall in range(scenes[1], scenes[2] + 1):
                for furniture in range(scenes[3], scenes[4] + 1):
                    roomname = scenes[0].format(wall, furniture)
                    yield roomname

    def cached_map_data_path(self, roomname: str) -> str:
        return os.path.join(self.room_path, "map_data__{}.json".format(roomname))

    def get_translator(self) -> Dict[str, Any]:
        roomname = list(ThorViz.iterate_scenes(self.scenes))[0]
        json_file = self.cached_map_data_path(roomname)
        if not os.path.exists(json_file):
            self.make_controller()
            self.controller.reset(roomname)
            map_data = self.get_agent_map_data()
            get_logger().info("Dumping {}".format(json_file))
            with open(json_file, "w") as f:
                json.dump(map_data, f, indent=4, sort_keys=True)
        else:
            with open(json_file, "r") as f:
                map_data = json.load(f)

        pos_translator = ThorPositionTo2DFrameTranslator(
            self.viz_rows_cols,
            self.position_to_tuple(map_data["cam_position"]),
            map_data["cam_orth_size"],
        )
        map_data["pos_translator"] = pos_translator

        get_logger().debug("Using map_data {}".format(map_data))
        return map_data

    def cached_image_path(self, roomname: str) -> str:
        return os.path.join(
            self.room_path, "{}__r{}_c{}.png".format(roomname, *self.viz_rows_cols)
        )

    def make_top_down_views(self) -> Dict[str, np.ndarray]:
        top_downs = {}
        for roomname in self.iterate_scenes(self.scenes):
            fname = self.cached_image_path(roomname)
            if not os.path.exists(fname):
                self.make_controller()
                self.dump_top_down_view(roomname, fname)
            top_downs[roomname] = cv2.imread(fname)

        return top_downs

    def crop_viz_image(self, viz_image: np.ndarray) -> np.ndarray:
        # Top-down view of room spans vertically near the center of the frame in RoboTHOR:
        y_min = int(self.viz_rows_cols[0] * 0.3)
        y_max = int(self.viz_rows_cols[0] * 0.8)
        # But it covers approximately the entire width:
        x_min = 0
        x_max = self.viz_rows_cols[1]
        cropped_viz_image = viz_image[y_min:y_max, x_min:x_max, :]
        return cropped_viz_image

    def make_controller(self):
        if self.controller is None:
            self.controller = Controller()

            self.controller.step({"action": "ChangeQuality", "quality": "Very High"})
            self.controller.step(
                {
                    "action": "ChangeResolution",
                    "x": self.viz_rows_cols[1],
                    "y": self.viz_rows_cols[0],
                }
            )

    def get_agent_map_data(self):
        self.controller.step({"action": "ToggleMapView"})
        cam_position = self.controller.last_event.metadata["cameraPosition"]
        cam_orth_size = self.controller.last_event.metadata["cameraOrthSize"]
        to_return = {
            "cam_position": cam_position,
            "cam_orth_size": cam_orth_size,
        }
        self.controller.step({"action": "ToggleMapView"})
        return to_return

    @staticmethod
    def position_to_tuple(position: Dict[str, float]) -> Tuple[float, float, float]:
        return position["x"], position["y"], position["z"]

    @staticmethod
    def add_lines_to_map(
        ps: Sequence[Any],
        frame: np.ndarray,
        pos_translator: ThorPositionTo2DFrameTranslator,
        opacity: float,
        color: Optional[Tuple[int, ...]] = None,
    ) -> np.ndarray:
        if len(ps) <= 1:
            return frame
        if color is None:
            color = (255, 0, 0)

        img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
        img2 = Image.new("RGBA", frame.shape[:-1])  # Use RGBA

        opacity = int(round(255 * opacity))  # Define transparency for the triangle.
        draw = ImageDraw.Draw(img2)
        for i in range(len(ps) - 1):
            draw.line(
                tuple(reversed(pos_translator(ps[i])))
                + tuple(reversed(pos_translator(ps[i + 1]))),
                fill=color + (opacity,),
                width=int(frame.shape[0] / 100),
            )

        img = Image.alpha_composite(img1, img2)
        return np.array(img.convert("RGB"))

    @staticmethod
    def add_line_to_map(
        p0: Any,
        p1: Any,
        frame: np.ndarray,
        pos_translator: ThorPositionTo2DFrameTranslator,
        opacity: float,
        color: Optional[Tuple[int, ...]] = None,
    ) -> np.ndarray:
        if p0 == p1:
            return frame
        if color is None:
            color = (255, 0, 0)

        img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
        img2 = Image.new("RGBA", frame.shape[:-1])  # Use RGBA

        opacity = int(round(255 * opacity))  # Define transparency for the triangle.
        draw = ImageDraw.Draw(img2)
        draw.line(
            tuple(reversed(pos_translator(p0))) + tuple(reversed(pos_translator(p1))),
            fill=color + (opacity,),
            width=int(frame.shape[0] / 100),
        )

        img = Image.alpha_composite(img1, img2)
        return np.array(img.convert("RGB"))

    @staticmethod
    def add_agent_view_triangle(
        position: Any,
        rotation: Dict[str, float],
        frame: np.ndarray,
        pos_translator: ThorPositionTo2DFrameTranslator,
        scale: float = 1.0,
        opacity: float = 0.1,
    ) -> np.ndarray:
        p0 = np.array((position[0], position[2]))
        p1 = copy.copy(p0)
        p2 = copy.copy(p0)

        theta = -2 * math.pi * (rotation["y"] / 360.0)
        rotation_mat = np.array(
            [[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]]
        )
        offset1 = scale * np.array([-1 / 2.0, 1])
        offset2 = scale * np.array([1 / 2.0, 1])

        p1 += np.matmul(rotation_mat, offset1)
        p2 += np.matmul(rotation_mat, offset2)

        img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA")
        img2 = Image.new("RGBA", frame.shape[:-1])  # Use RGBA

        opacity = int(round(255 * opacity))  # Define transparency for the triangle.
        points = [tuple(reversed(pos_translator(p))) for p in [p0, p1, p2]]
        draw = ImageDraw.Draw(img2)
        draw.polygon(points, fill=(255, 255, 255, opacity))

        img = Image.alpha_composite(img1, img2)
        return np.array(img.convert("RGB"))

    @staticmethod
    def visualize_agent_path(
        positions: Sequence[Any],
        frame: np.ndarray,
        pos_translator: ThorPositionTo2DFrameTranslator,
        single_color: bool = False,
        view_triangle_only_on_last: bool = False,
        disable_view_triangle: bool = False,
        line_opacity: float = 1.0,
        trajectory_start_end_color_str: Tuple[str, str] = ("red", "green"),
    ) -> np.ndarray:
        if single_color:
            frame = ThorViz.add_lines_to_map(
                list(map(ThorViz.position_to_tuple, positions)),
                frame,
                pos_translator,
                line_opacity,
                tuple(
                    map(
                        lambda x: int(round(255 * x)),
                        col.Color(trajectory_start_end_color_str[0]).rgb,
                    )
                ),
            )
        else:
            if len(positions) > 1:
                colors = list(
                    col.Color(trajectory_start_end_color_str[0]).range_to(
                        col.Color(trajectory_start_end_color_str[1]), len(positions) - 1
                    )
                )
            for i in range(len(positions) - 1):
                frame = ThorViz.add_line_to_map(
                    ThorViz.position_to_tuple(positions[i]),
                    ThorViz.position_to_tuple(positions[i + 1]),
                    frame,
                    pos_translator,
                    opacity=line_opacity,
                    color=tuple(map(lambda x: int(round(255 * x)), colors[i].rgb)),
                )

        if view_triangle_only_on_last:
            positions = [positions[-1]]
        if disable_view_triangle:
            positions = []
        for position in positions:
            frame = ThorViz.add_agent_view_triangle(
                ThorViz.position_to_tuple(position),
                rotation=position["rotation"],
                frame=frame,
                pos_translator=pos_translator,
                opacity=0.05 + view_triangle_only_on_last * 0.2,
            )
        return frame

    def dump_top_down_view(self, room_name: str, image_path: str):
        get_logger().debug("Dumping {}".format(image_path))

        self.controller.reset(room_name)
        self.controller.step(
            {"action": "Initialize", "gridSize": 0.1, "makeAgentsVisible": False}
        )
        self.controller.step({"action": "ToggleMapView"})
        top_down_view = self.controller.last_event.cv2img

        cv2.imwrite(image_path, top_down_view)

    def make_fig(self, episode: Any, episode_id: str) -> Figure:
        trajectory: Sequence[Dict[str, Any]] = self._access(
            episode, self.path_to_trajectory
        )

        if self.thor_top_downs is None:
            self.init_top_down_render()

        roomname = "_".join(episode_id.split("_")[:3])

        im = self.visualize_agent_path(
            trajectory,
            self.thor_top_downs[roomname],
            self.map_data["pos_translator"],
            single_color=self.single_color,
            view_triangle_only_on_last=self.view_triangle_only_on_last,
            disable_view_triangle=self.disable_view_triangle,
            line_opacity=self.line_opacity,
        )

        fig, ax = plt.subplots(figsize=self.figsize)
        ax.set_title(episode_id, fontsize=self.fontsize)
        ax.imshow(self.crop_viz_image(im)[:, :, ::-1])
        ax.axis("off")

        return fig


class ThorMultiViz(ThorViz):
    def __init__(
        self,
        path_to_trajectory_prefix: Sequence[str] = ("task_info", "followed_path"),
        agent_suffixes: Sequence[str] = ("1", "2"),
        label: str = "thor_trajectories",
        trajectory_start_end_color_strs: Sequence[Tuple[str, str]] = (
            ("red", "green"),
            ("cyan", "purple"),
        ),
        **kwargs
    ):
        super().__init__(label=label, **kwargs)

        self.path_to_trajectory_prefix = list(path_to_trajectory_prefix)
        self.agent_suffixes = list(agent_suffixes)
        self.trajectory_start_end_color_strs = list(trajectory_start_end_color_strs)

    def make_fig(self, episode: Any, episode_id: str) -> Figure:
        if self.thor_top_downs is None:
            self.init_top_down_render()

        roomname = "_".join(episode_id.split("_")[:3])
        im = self.thor_top_downs[roomname]

        for agent, start_end_color in zip(
            self.agent_suffixes, self.trajectory_start_end_color_strs
        ):
            path = self.path_to_trajectory_prefix[:]
            path[-1] = path[-1] + agent
            trajectory = self._access(episode, path)

            im = self.visualize_agent_path(
                trajectory,
                im,
                self.map_data["pos_translator"],
                single_color=self.single_color,
                view_triangle_only_on_last=self.view_triangle_only_on_last,
                disable_view_triangle=self.disable_view_triangle,
                line_opacity=self.line_opacity,
                trajectory_start_end_color_str=start_end_color,
            )

        fig, ax = plt.subplots(figsize=self.figsize)
        ax.set_title(episode_id, fontsize=self.fontsize)
        ax.imshow(self.crop_viz_image(im)[:, :, ::-1])
        ax.axis("off")

        return fig


================================================
FILE: allenact_plugins/robothor_plugin/scripts/__init__.py
================================================


================================================
FILE: allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.py
================================================
import gzip
import json
import os
from typing import Sequence, Optional

from allenact_plugins.robothor_plugin.robothor_task_samplers import (
    ObjectNavDatasetTaskSampler,
)


def create_debug_dataset_from_train_dataset(
    scene: str,
    target_object_type: Optional[str],
    episodes_subset: Sequence[int],
    train_dataset_path: str,
    base_debug_output_path: str,
):
    downloaded_episodes = os.path.join(
        train_dataset_path, "episodes", scene + ".json.gz"
    )

    assert os.path.exists(downloaded_episodes), (
        "'{}' doesn't seem to exist or is empty. Make sure you've downloaded to download the appropriate"
        " training dataset with"
        " datasets/download_navigation_datasets.sh".format(downloaded_episodes)
    )

    # episodes
    episodes = ObjectNavDatasetTaskSampler.load_dataset(
        scene=scene, base_directory=os.path.join(train_dataset_path, "episodes")
    )

    if target_object_type is not None:
        ids = {
            "{}_{}_{}".format(scene, target_object_type, epit)
            for epit in episodes_subset
        }
    else:
        ids = {"{}_{}".format(scene, epit) for epit in episodes_subset}
    debug_episodes = [ep for ep in episodes if ep["id"] in ids]
    assert len(ids) == len(debug_episodes), (
        f"Number of input ids ({len(ids)}) does not equal"
        f" number of output debug tasks ({len(debug_episodes)})"
    )

    # sort by episode_ids
    debug_episodes = [
        idep[1]
        for idep in sorted(
            [(int(ep["id"].split("_")[-1]), ep) for ep in debug_episodes],
            key=lambda x: x[0],
        )
    ]
    assert len(debug_episodes) == len(episodes_subset)

    episodes_dir = os.path.join(base_debug_output_path, "episodes")
    os.makedirs(episodes_dir, exist_ok=True)
    episodes_file = os.path.join(episodes_dir, scene + ".json.gz")

    json_str = json.dumps(debug_episodes)
    json_bytes = json_str.encode("utf-8")
    with gzip.GzipFile(episodes_file, "w") as fout:
        fout.write(json_bytes)
    assert os.path.exists(episodes_file)


if __name__ == "__main__":
    CURRENT_PATH = os.getcwd()
    SCENE = "FloorPlan_Train1_1"
    TARGET = "Television"
    EPISODES = [0, 7, 11, 12]
    BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "robothor-objectnav", "debug")

    create_debug_dataset_from_train_dataset(
        scene=SCENE,
        target_object_type=TARGET,
        episodes_subset=EPISODES,
        train_dataset_path=os.path.join(
            CURRENT_PATH, "datasets", "robothor-objectnav", "train"
        ),
        base_debug_output_path=BASE_OUT,
    )


================================================
FILE: allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.py
================================================
import os

from allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import (
    create_debug_dataset_from_train_dataset,
)

if __name__ == "__main__":
    CURRENT_PATH = os.getcwd()
    SCENE = "FloorPlan_Train1_1"
    EPISODES = [3, 4, 5, 6]
    BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "robothor-pointnav", "debug")

    create_debug_dataset_from_train_dataset(
        scene=SCENE,
        target_object_type=None,
        episodes_subset=EPISODES,
        train_dataset_path=os.path.join(
            CURRENT_PATH, "datasets", "robothor-pointnav", "train"
        ),
        base_debug_output_path=BASE_OUT,
    )


================================================
FILE: allenact_plugins/setup.py
================================================
import glob
import os
from pathlib import Path

from setuptools import find_packages, setup


def parse_req_file(fname, initial=None):
    """Reads requires.txt file generated by setuptools and outputs a
    new/updated dict of extras as keys and corresponding lists of dependencies
    as values.

    The input file's contents are similar to a `ConfigParser` file, e.g.
    pkg_1
    pkg_2
    pkg_3

    [extras1]
    pkg_4
    pkg_5

    [extras2]
    pkg_6
    pkg_7
    """
    reqs = {} if initial is None else initial
    cline = None
    with open(fname, "r") as f:
        for line in f.readlines():
            line = line[:-1].strip()
            if len(line) == 0:
                continue
            if line[0] == "[":
                # Add new key for current extras (if missing in dict)
                cline = line[1:-1].strip()
                if cline not in reqs:
                    reqs[cline] = []
            else:
                # Only keep dependencies from extras
                if cline is not None:
                    reqs[cline].append(line)
    return reqs


def get_version(fname):
    """Reads PKG-INFO file generated by setuptools and extracts the Version
    number."""
    res = "UNK"
    with open(fname, "r") as f:
        for line in f.readlines():
            line = line[:-1]
            if line.startswith("Version:"):
                res = line.replace("Version:", "").strip()
                break
    if res in ["UNK", ""]:
        raise ValueError(f"Missing Version number in {fname}")
    return res


def run_setup():
    base_dir = os.path.abspath(os.path.dirname(Path(__file__)))

    if not os.path.exists(
        os.path.join(base_dir, "allenact_plugins.egg-info/dependency_links.txt")
    ):
        # Build mode for sdist

        # Extra dependencies required for various plugins
        extras = {}
        for plugin_path in glob.glob(os.path.join(base_dir, "*_plugin")):
            plugin_name = os.path.basename(plugin_path).replace("_plugin", "")
            extra_reqs_path = os.path.join(plugin_path, "extra_requirements.txt")
            if os.path.exists(extra_reqs_path):
                with open(extra_reqs_path, "r") as f:
                    # Filter out non-PyPI dependencies
                    extras[plugin_name] = [
                        clean_dep
                        for clean_dep in (dep.strip() for dep in f.readlines())
                        if clean_dep != ""
                        and not clean_dep.startswith("#")
                        and "@ git+https://github.com/" not in clean_dep
                    ]
        extras["all"] = sum(extras.values(), [])

        os.chdir(os.path.join(base_dir, ".."))

        with open(".VERSION", "r") as f:
            __version__ = f.readline().strip()
    else:
        # Install mode from sdist
        __version__ = get_version(
            os.path.join(base_dir, "allenact_plugins.egg-info/PKG-INFO")
        )
        extras = parse_req_file(
            os.path.join(base_dir, "allenact_plugins.egg-info/requires.txt")
        )

    setup(
        name="allenact_plugins",
        version=__version__,
        description="Plugins for the AllenAct framework",
        long_description=(
            "A collection of plugins/extensions for use within the AllenAct framework."
        ),
        classifiers=[
            "Intended Audience :: Science/Research",
            "Development Status :: 3 - Alpha",
            "License :: OSI Approved :: MIT License",
            "Topic :: Scientific/Engineering :: Artificial Intelligence",
            "Programming Language :: Python",
            "Programming Language :: Python :: 3.6",
            "Programming Language :: Python :: 3.7",
            "Programming Language :: Python :: 3.8",
            "Programming Language :: Python :: 3.9",
            "Programming Language :: Python :: 3.10",
        ],
        keywords=["reinforcement learning", "embodied-AI", "AI", "RL", "SLAM"],
        url="https://github.com/allenai/allenact",
        author="Allen Institute for Artificial Intelligence",
        author_email="lucaw@allenai.org",
        license="MIT",
        packages=find_packages(include=["allenact_plugins", "allenact_plugins.*"]),
        install_requires=[f"allenact=={__version__}"],
        setup_requires=["pytest-runner"],
        tests_require=["pytest", "pytest-cov"],
        extras_require=extras,
    )


if __name__ == "__main__":
    run_setup()


================================================
FILE: conda/environment-10.1.yml
================================================
channels:
  - defaults
  - pytorch
dependencies:
  - cudatoolkit=10.1
  - pytorch>=1.6.0,!=1.8.0
  - torchvision>=0.7.0,<0.10.0


================================================
FILE: conda/environment-10.2.yml
================================================
channels:
  - defaults
  - pytorch
dependencies:
  - cudatoolkit=10.2
  - pytorch>=1.6.0,!=1.8.0
  - torchvision>=0.7.0,<0.10.0


================================================
FILE: conda/environment-11.1.yml
================================================
channels:
  - defaults
  - pytorch
  - nvidia
dependencies:
  - cudatoolkit=11.1
  - pytorch>=1.6.0,!=1.8.0
  - torchvision>=0.7.0


================================================
FILE: conda/environment-9.2.yml
================================================
channels:
  - defaults
  - pytorch
dependencies:
  - cudatoolkit=9.2
  - pytorch>=1.6.0,!=1.8.0
  - torchvision>=0.7.0,<0.10.0


================================================
FILE: conda/environment-base.yml
================================================
channels:
  - defaults
  - pytorch
  - conda-forge
dependencies:
  - python=3.8
  - certifi
  - chardet=4.0.0
  - cloudpickle=1.6.0
  - cycler=0.10.0
  - decorator=4.4.2
  - filelock=3.0.12
  - future=0.18.2
  - gym>=0.17.0,<0.20.0
  - idna>=2.10
  - imageio>=2.9.0
  - imageio-ffmpeg>=0.4.3
  - kiwisolver=1.3.1
  - matplotlib>=3.3.1
  - networkx
  - numpy>=1.19.1
  - opencv
  - conda-forge::pillow>=8.2.0,<9.0.0
  - pip
  - proglog>=0.1.9
  - protobuf>=3.14.0
  - pyglet>=1.5.0
  - pyparsing>=2.4.7
  - python-dateutil>=2.8.1
  - pytorch::pytorch>=1.6.0,!=1.8.0
  - pytorch::torchvision>=0.7.0
  - requests>=2.25.1
  - setproctitle
  - six>=1.15.0
  - tensorboardx>=2.1
  - tqdm
  - urllib3>=1.26.2
  - attrs
  - pip:
    - moviepy>=1.0.3
    - scipy>=1.5.4
    - compress-pickle>=1.2.0


================================================
FILE: conda/environment-cpu.yml
================================================
channels:
  - defaults
  - pytorch
dependencies:
  - cpuonly
  - pytorch>=1.6.0,!=1.8.0
  - torchvision>=0.7.0,<0.10.0


================================================
FILE: conda/environment-dev.yml
================================================
channels:
  - defaults
  - conda-forge
dependencies:
  - black>=24.2.0
  - docformatter>=1.3.1
  - gitpython
  - markdown>=3.3
  - mkdocs>=1.1.2
  - mkdocs-material>=5.5.3
  - mkdocs-material-extensions>=1.0
  - mypy
  - pre-commit
  - pytest>=6.1.1
  - ruamel.yaml
  - pip:
    - pydoc-markdown>=3.4.0


================================================
FILE: constants.py
================================================
import os
from pathlib import Path

ABS_PATH_OF_TOP_LEVEL_DIR = os.path.abspath(os.path.dirname(Path(__file__)))
ABS_PATH_OF_DOCS_DIR = os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "docs")


================================================
FILE: datasets/.gitignore
================================================
*
!.gitignore
!*.sh
!.habitat_datasets_download_info.json
!.habitat_downloader_helper.py
!habitat/configs/debug_habitat_pointnav.yaml

================================================
FILE: datasets/.habitat_datasets_download_info.json
================================================
{
    "pointnav-gibson-v1": {
        "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v1/pointnav_gibson_v1.zip",
        "rel_path": "data/datasets/pointnav/gibson/v1/",
        "config_url": "configs/datasets/imagenav/gibson.yaml"
    },
    "pointnav-gibson-v2": {
        "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v2/pointnav_gibson_v2.zip",
        "rel_path": "data/datasets/pointnav/gibson/v2/",
        "config_url": "configs/datasets/pointnav/gibson.yaml"
    },
    "pointnav-mp3d-v1": {
        "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/mp3d/v1/pointnav_mp3d_v1.zip",
        "rel_path": "data/datasets/pointnav/mp3d/v1/",
        "config_url": "configs/datasets/imagenav/mp3d.yaml"
    },
    "objectnav-mp3d-v1": {
        "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/objectnav/m3d/v1/objectnav_mp3d_v1.zip",
        "rel_path": "data/datasets/objectnav/mp3d/v1/",
        "config_url": "configs/datasets/objectnav/mp3d.yaml"
    },
    "eqa-mp3d-v1": {
        "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/eqa/mp3d/v1/eqa_mp3d_v1.zip",
        "rel_path": "data/datasets/eqa/mp3d/v1/",
        "config_url": "configs/datasets/eqa/mp3d.yaml"
    },
    "vln-r2r-mp3d-v1": {
        "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/vln/mp3d/r2r/v1/vln_r2r_mp3d_v1.zip",
        "rel_path": "data/datasets/vln/mp3d/r2r/v1",
        "config_url": "configs/datasets/vln/mp3d_r2r.yaml"
    }
}


================================================
FILE: datasets/.habitat_downloader_helper.py
================================================
import json
import os
import re
import shutil
import sys
from pathlib import Path
from urllib.request import urlopen

from allenact.utils.misc_utils import all_equal

DATASET_DIR = os.path.abspath(os.path.dirname(Path(__file__)))


def get_habitat_download_info(allow_create: bool = False):
    """Get a dictionary giving a specification of where habitat data lives
    online.

    # Parameters

    allow_create: Whether or not we should try to regenerate the json file that represents
        the above dictionary. This is potentially unsafe so please only set this to `True`
        if you're sure it will download what you want.
    """
    json_save_path = os.path.join(DATASET_DIR, ".habitat_datasets_download_info.json")
    if allow_create and not os.path.exists(json_save_path):
        url = "https://raw.githubusercontent.com/facebookresearch/habitat-lab/master/README.md"
        output = urlopen(url).read().decode("utf-8")

        lines = [l.strip() for l in output.split("\n")]

        task_table_started = False
        table_lines = []
        for l in lines:
            if l.count("|") > 3 and l[0] == l[-1] == "|":
                if task_table_started:
                    table_lines.append(l)
                elif "Task" in l and "Link" in l:
                    task_table_started = True
                    table_lines.append(l)
            elif task_table_started:
                break

        url_pat = re.compile("\[.*\]\((.*)\)")

        def get_url(in_str: str):
            match = re.match(pattern=url_pat, string=in_str)
            if match:
                return match.group(1)
            else:
                return in_str

        header = None
        rows = []
        for i, l in enumerate(table_lines):
            l = l.strip("|")
            entries = [get_url(e.strip().replace("`", "")) for e in l.split("|")]

            if i == 0:
                header = [e.lower().replace(" ", "_") for e in entries]
            elif not all_equal(entries):
                rows.append(entries)

        link_ind = header.index("link")
        extract_ind = header.index("extract_path")
        config_ind = header.index("config_to_use")
        assert link_ind >= 0

        data_info = {}
        for row in rows:
            id = row[link_ind].split("/")[-1].replace(".zip", "").replace("_", "-")
            data_info[id] = {
                "link": row[link_ind],
                "rel_path": row[extract_ind],
                "config_url": row[config_ind],
            }

        with open(json_save_path, "w") as f:
            json.dump(data_info, f)

    with open(json_save_path, "r") as f:
        return json.load(f)


if __name__ == "__main__":
    habitat_dir = os.path.join(DATASET_DIR, "habitat")
    os.makedirs(habitat_dir, exist_ok=True)
    os.chdir(habitat_dir)

    download_info = get_habitat_download_info(allow_create=False)

    if len(sys.argv) != 2 or sys.argv[1] not in download_info:
        print(
            "Incorrect input, expects a single input where this input is one of "
            f" {['test-scenes', *sorted(download_info.keys())]}."
        )
        quit(1)

    task_key = sys.argv[1]
    task_dl_info = download_info[task_key]

    output_archive_name = "__TO_OVERWRITE__.zip"
    deletable_dir_name = "__TO_DELETE__"

    cmd = f"wget {task_dl_info['link']} -O {output_archive_name}"
    if os.system(cmd):
        print(f"ERROR: `{cmd}` failed.")
        quit(1)

    cmd = f"unzip {output_archive_name} -d {deletable_dir_name}"
    if os.system(cmd):
        print(f"ERROR: `{cmd}` failed.")
        quit(1)

    download_to_path = task_dl_info["rel_path"].replace("data/", "")
    if download_to_path[-1] == "/":
        download_to_path = download_to_path[:-1]

    os.makedirs(download_to_path, exist_ok=True)

    cmd = f"rsync -avz {deletable_dir_name}/ {download_to_path}/"
    if os.system(cmd):
        print(f"ERROR: `{cmd}` failed.")
        quit(1)

    os.remove(output_archive_name)
    shutil.rmtree(deletable_dir_name)


================================================
FILE: datasets/download_habitat_datasets.sh
================================================
#!/bin/bash

# Move to the directory containing this file
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" || exit

mkdir -p habitat
mkdir -p habitat/scene_datasets
mkdir -p habitat/datasets
mkdir -p habitat/configs

cd habitat || exit

output_archive_name=__TO_OVERWRITE__.zip
deletable_dir_name=__TO_DELETE__

install_test_scenes_and_data() {
    if ! wget http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip -O $output_archive_name; then
      echo "Could not unzip download test scenes from http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip"
      exit 1
    fi
    if ! unzip $output_archive_name -d $deletable_dir_name; then
      echo "Could not unzip $output_archive_name to $deletable_dir_name"
      exit 1
    fi
    rsync -avz $deletable_dir_name/data/datasets . && \
    rsync -avz $deletable_dir_name/data/scene_datasets . && \
    rm $output_archive_name && \
    rm -r $deletable_dir_name
}

install_scene_data() {
  python3 ../.habitat_downloader_helper.py "$1"
}

if [ "$1" = "test-scenes" ]
then
  install_test_scenes_and_data

else
  install_scene_data $1
fi


================================================
FILE: datasets/download_navigation_datasets.sh
================================================
#!/bin/bash

# Move to the directory containing this file
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" || exit

install_dataset() {
    dataset_name="$1"
    download_suffix="$2"
    if ! mkdir "$dataset_name" ; then
      echo "Could not create directory " $(pwd)/$dataset_name "Does it already exist? If so, delete it."
      exit 1
    fi
    url_archive_name=$dataset_name$download_suffix.tar.gz
    output_archive_name=__TO_OVERWRITE__.tar.gz
    wget https://prior-datasets.s3.us-east-2.amazonaws.com/embodied-ai/navigation/$url_archive_name -O $output_archive_name
    tar -xf "$output_archive_name" -C "$dataset_name" --strip-components=1 && rm $output_archive_name
    echo "saved folder: "$dataset_name""
}


# Download, Unzip, and Remove zip
if [ "$1" = "robothor-pointnav" ]
then
    echo "Downloading RoboTHOR PointNav Dataset ..."
    install_dataset "$1" "-v0"
    cd ..
    echo "Generating RoboTHOR PointNav Debug Dataset ..."
    PYTHONPATH=. python ./allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.py

elif [ "$1" = "robothor-objectnav" ]
then
    echo "Downloading RoboTHOR ObjectNav Dataset ..."
    install_dataset "$1" "-challenge-2021"
    cd ..
    echo "Generating RoboTHOR ObjectNav Debug Dataset ..."
    PYTHONPATH=. python ./allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.py

elif [ "$1" = "ithor-pointnav" ]
then
    echo "Downloading iTHOR PointNav Dataset ..."
    install_dataset "$1" "-v0"
    cd ..
    echo "Generating iTHOR PointNav Debug Dataset ..."
    PYTHONPATH=. python ./allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.py

elif [ "$1" = "ithor-objectnav" ]
then
    echo "Downloading iTHOR ObjectNav Dataset ..."
    install_dataset "$1" "-v0"
    cd ..
    echo "Generating iTHOR ObjectNav Debug Dataset ..."
    PYTHONPATH=. python ./allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.py

elif [ "$1" = "all-thor" ]
then
    bash download_navigation_datasets.sh "robothor-pointnav"
    bash download_navigation_datasets.sh "robothor-objectnav"
    bash download_navigation_datasets.sh "ithor-pointnav"
    bash download_navigation_datasets.sh "ithor-objectnav"

else
    echo "\nFailed: Usage download_navigation_datasets.sh robothor-pointnav | robothor-objectnav | ithor-pointnav | ithor-objectnav | all-thor"
    exit 1
fi


================================================
FILE: dev_requirements.txt
================================================
black==24.2.0
ruamel.yaml
gitpython
markdown==3.3
mypy
pytest
docformatter==1.3.1
docstr-coverage==1.2.0
mkdocs==1.1.2
mkdocs-material==5.5.3
pre-commit
pydoc-markdown==3.4.0
compress-pickle==1.2.0


================================================
FILE: docs/.gitignore
================================================
api/


================================================
FILE: docs/CNAME
================================================
allenact.org

================================================
FILE: docs/FAQ.md
================================================
# FAQ

## How do I file a bug regarding the code or documentation?

Please file bugs by submitting an [issue](https://github.com/allenai/allenact/issues). We also welcome contributions from the community, including new features and bugfixes on existing functionality. Please refer to our [contribution guidelines](CONTRIBUTING.md).

## How do I generate documentation?

Documentation is generated using [mkdoc](https://www.mkdocs.org/) and
[pydoc-markdown](https://pypi.org/project/pydoc-markdown/). 

### Building documentation locally

The `mkdocs` command used to build our documentation relies on all documentation existing
as subdirectories of the `docs` folder. To ensure that all relevant markdown files are placed into
this directory, you should always run

```bash
bash scripts/build_docs.sh
```

from the top-level project directory before running any of the `mkdocs` commands below. 

If you have made no changes to the documentation and only wish to build documentation on 
your local machine, run the following from within the `allenact` root directory. Note: This will generate HTML documentation within the `site` folder

```bash
mkdocs build
```

### Serving documentation locally

If you have made no changes to the documentation and only wish to serve documentation on your local
 machine (with live reloading of modified documentation), run the following from within the `allenact` root directory.
 
```bash
mkdocs serve
```

Then navigate to [http://127.0.0.1:8000/](http://127.0.0.1:8000/)

### Modifying and serving documentation locally

If you have made changes to the documentation, you will need to run a documentation builder script 
before you serve it on your local machine.

```bash
bash scripts/build_docs.sh
mkdocs serve
```

Then navigate to [http://127.0.0.1:8000/](http://127.0.0.1:8000/)

Alternatively, the `site` directory (once built) can be served as a static webpage on your local machine 
without installing any dependencies by running `python -m http.server 8000` from within the `site` directory.


================================================
FILE: docs/css/extra.css
================================================
/* Allow word-breaks in headers */
h1 {
  word-wrap: break-word;
}

/* Don't have the edit button as it's broken for us */
.md-content__button {
    display: none;
}

================================================
FILE: docs/getting_started/abstractions.md
================================================
# Primary abstractions

Our package relies on a collection of fundamental abstractions to define how, and in what task, an agent should be
trained and evaluated. A subset of these abstractions are described in plain language below. Each of the below sections
end with a link to the (formal) documentation of the abstraction as well as a link to an example implementation of the
abstraction (if relevant). The following provides a high-level illustration of how these abstractions interact.


![abstractions-overview](../img/abstractions.png)

## Experiment configuration

In `allenact`, experiments are defined by implementing the abstract `ExperimentConfig` class. The methods
of this implementation are then called during training/inference to properly set up the desired experiment. For example,
the `ExperimentConfig.create_model` method will be called at the beginning of training to create the model
to be trained.
See either the ["designing your first minigrid experiment"](/tutorials/minigrid-tutorial) or the
["designing an experiment for point navigation"](/tutorials/training-a-pointnav-model)
 tutorials to get an in-depth description of how these experiment configurations are defined in practice.

See also the [abstract `ExperimentConfig` class](/api/allenact/base_abstractions/experiment_config#experimentconfig) 
and an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_environment/#ithorenvironment).

## Task sampler

A task sampler is responsible for generating a sequence of tasks for agents to solve. The sequence of tasks can be 
randomly generated (e.g. in training) or extracted from an ordered pool (e.g. in validation or testing).

See the [abstract `TaskSampler` class](/api/allenact/base_abstractions/task/#tasksampler) 
and an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_task_samplers/#objectnavtasksampler).

## Task

Tasks define the scope of the interaction between agents and an environment (including the action types agents are 
allowed to execute), as well as metrics to evaluate the agents' performance. For example, we might define a task 
`ObjectNaviThorGridTask` in which agents receive observations obtained from the environment (e.g. RGB images) or directly from 
the task (e.g. a target object class) and are allowed to execute actions such as `MoveAhead`, `RotateRight`, 
`RotateLeft`, and `End` whenever agents determine they have reached their target. The metrics might include a
success indicator or some quantitative metric on the optimality of the followed path.  

See the [abstract `Task` class](/api/allenact/base_abstractions/task/#task) 
and an [example implementation](/api/allenact_plugins/robothor_plugin/robothor_tasks/#objectnavtask).

## Sensor

Sensors provide observations extracted from an environment (e.g. RGB or depth images) or directly from a task (e.g. the 
end point in point navigation or target object class in semantic navigation) that can be directly consumed by 
agents.

See the [abstract `Sensor` class](/api/allenact/base_abstractions/sensor/#sensor) 
and an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_sensors/#rgbsensorthor).

## Actor critic model

The actor-critic agent is responsible for computing batched action probabilities and state values given the 
observations provided by sensors, internal state representations, previous actions, and potentially 
other inputs.

See the [abstract `ActorCriticModel` class](/api/allenact/algorithms/onpolicy_sync/policy/#ActorCriticModel) 
and an
[example implementation](/api/projects/objectnav_baselines/models/object_nav_models#ObjectNavBaselineActorCritic).

## Training pipeline

The training pipeline, defined in the
[`ExperimentConfig`'s `training_pipeline` method](/api/allenact/base_abstractions/experiment_config/#training_pipeline),
contains one or more training stages where different
[losses can be combined or sequentially applied](/howtos/defining-a-new-training-pipeline).
 
## Losses

Actor-critic losses compute a combination of action loss and value loss out of collected experience that can be used to 
train actor-critic models with back-propagation, e.g. PPO or A2C.

See the
[`AbstractActorCriticLoss` class](/api/allenact/algorithms/onpolicy_sync/losses/abstract_loss#abstractactorcriticloss) 
and an [example implementation](/api/allenact/algorithms/onpolicy_sync/losses/ppo/#ppo).

Off-policy losses implement generic training iterations in which a batch of data is run through a model (that can be a
subgraph of an [`ActorCriticModel`](#actor-critic-model)) and a loss is
computed on the model's output.

See the
[`AbstractOffPolicyLoss` class](/api/allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss#abstractoffpolicyloss) 
and an [example implementation](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy/#MiniGridOffPolicyExpertCELoss).


================================================
FILE: docs/getting_started/running-your-first-experiment.md
================================================
# Running your first experiment

Assuming you have [installed the full library](../installation/installation-allenact.md#full-library), you can run your
first experiment by calling

```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o experiment_output/minigrid -s 12345
```

from the `allenact` root directory.

* With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file 
will be found in the `projects/tutorials` directory.
* With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers).
* With `-o experiment_output/minigrid` we set the output folder into which results and logs will be saved.
* With `-s 12345` we set the random seed.

If everything was installed correctly, a simple model will be trained (and validated) in the MiniGrid environment and
a new folder `experiment_output/minigrid` will be created containing:

* a `checkpoints/MiniGridTutorial/LOCAL_TIME_STR/` subfolder with model weight checkpoints,
* a `used_configs/MiniGridTutorial/LOCAL_TIME_STR/` subfolder with all used configuration files,
* and a tensorboard log file under `tb/MiniGridTutorial/LOCAL_TIME_STR/`.

Here `LOCAL_TIME_STR` is a string that records the time when the experiment was started (e.g. the string 
`"2020-08-21_18-19-47"` corresponds to an experiment started on August 21st 2020, 47 seconds past 6:19pm. 

If we have Tensorboard installed, we can track training progress with
```bash
tensorboard --logdir experiment_output/minigrid/tb
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).

After 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder.
The training curves should look similar to:

![training curves](../img/minigrid_train.png)

If everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4.
(For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the
not-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example
with a different random seed). The validation curves should look similar to:

![validation curves](../img/minigrid_valid.png)
 
A detailed tutorial describing how the `minigrid_tutorial` experiment configuration was created can be found 
[here](../tutorials/minigrid-tutorial.md). 
 
To run your own custom experiment simply define a new experiment configuration in a file 
`projects/YOUR_PROJECT_NAME/experiments/my_custom_experiment.py` after which you may run it with
`PYTHONPATH=. python allenact/main.py my_custom_experiment -b projects/YOUR_PROJECT_NAME/experiments`.

<!-- ## Experiment configuration

The main entry point for users is a configuration file that defines the experiment we
want to run. More concretely, it includes a single class defining:

* A `tag` to identify the experiment.
* A method to instantiate [actor-critic models](/getting_started/abstractions#actor-critic-model).
* A multi-staged training pipeline with different types of [losses](/getting_started/abstractions#actor-critic-loss), an 
optimizer, and other parameters like learning rates, batch sizes, etc. 
* Machine configuration parameters that will be used e.g. for training or validation.
* A method to instantiate [task samplers](/getting_started/abstractions#task-sampler).
* Methods describing initialization parameters for task samplers used in training, validation, and testing; including
 the assignment of workers to devices for running environments.

A detailed view to an example experiment config file can be found [here](/overview/experiment).
 -->

================================================
FILE: docs/getting_started/structure.md
================================================
# Structure of the codebase

The codebase consists of the following directories: `allenact`, `datasets`, `docs`, `overrides`, `allenact_plugins`,
`pretrained_model_ckpts`, `projects`, `scripts`, and `tests`. Below, we explain the overall structure and how
different components of the codebase are organized. 

## [`allenact` directory](https://github.com/allenai/allenact/tree/master/allenact)

Contains runtime algorithms for on-policy and off-policy training and inference, base abstractions used throughout
the code base and basic models to be used as building blocks in future models.

* `allenact.algorithms` includes on-policy and off-policy training nd inference algorithms and abstractions for losses,
policies, rollout storage, etc.

* `allenact.base_abstractions` includes the base `ExperimentConfig`, distributions, base `Sensor`, `TaskSampler`, `Task`,
etc.

* `allenact.embodiedai` includes basic CNN, and RNN state encoders, besides basic `ActorCriticModel` implementations
for embodied AI tasks.

## [`datasets` directory](https://github.com/allenai/allenact/tree/master/datasets)

A directory made to store task-specific datasets. For example, the script `datasets/download_navigation_datasets.sh` can
be used to automatically download task dataset files for Point Navigation within the RoboTHOR environment
and it will place these files into a new `datasets/robothor-pointnav` directory. 

## [`docs` directory](https://github.com/allenai/allenact/tree/master/docs)

Contains documentation for the framework, including guides for installation and first experiments, how-to's for
the definition and usage of different abstractions, tutorials and per-project documentation.

## [`overrides` directory](https://github.com/allenai/allenact/tree/master/overrides)

Files within this directory are used to the look and structure of the documentation generated when running `mkdocs`.
See our [FAQ](../FAQ.md) for information on how to generate this documentation for yourself. 

## [`allenact_plugins` directory](https://github.com/allenai/allenact/tree/master/allenact_plugins)

Contains implementations of `ActorCriticModel`s and `Task`s in different environments. Each plugin folder is 
named as `{environment}_plugin` and contains three subfolders:

1. `configs` to host useful configuration for the environment or tasks.
1. `data` to store data to be consumed by the environment or tasks.
1. `scripts` to setup the plugin or gather and process data.

## [`pretrained_model_ckpts` directory](https://github.com/allenai/allenact/tree/master/pretrained_model_ckpts)

Directory into which pretrained model checkpoints will be saved. See also the 
`pretrained_model_ckpts/download_navigation_model_ckpts.sh` which can be used to download such checkpoints.

## [`projects` directory](https://github.com/allenai/allenact/tree/master/projects)

Contains project-specific code like experiment configurations and scripts to process results, generate visualizations
or prepare data.

## [`scripts` directory](https://github.com/allenai/allenact/tree/master/scripts)

Includes framework-wide scripts to build the documentation, format code, run_tests and start an xserver. The latter can
be used for OpenGL-based environments having super-user privileges in Linux, assuming NVIDIA drivers and `xserver-xorg`
are installed.

## [`tests` directory](https://github.com/allenai/allenact/tree/master/tests)

Includes unit tests for `allenact`.

## [`allenact.utils` directory](https://github.com/allenai/allenact/tree/master/allenact/utils)

It includes different types of utilities, mainly divided into:

* `allenact.utils.experiment_utils`, including the `TrainingPipeline`, `PipelineStage` and other utilities to configure an
experiment.
* `allenact.utils.model_utils`, including generic CNN creation, forward-pass helpers and other utilities.
* `allenact.utils.tensor_utils`, including functions to batch observations, convert tensors into video, scale image tensors, etc.
* `allenact.utils.viz_utils`, including a `VizSuite` class that can be instantiated with different visualization plugins during
inference.
* `allenact.utils.system`, including logging and networking helpers.

Other utils files, including `allenact.utils.misc_utils`, contain a number of helper functions for different purposes.


================================================
FILE: docs/howtos/changing-rewards-and-losses.md
================================================
# Changing rewards and losses

In order to train actor-critic agents, we need to specify

* `rewards` at the task level, and
* `losses` at the training pipeline level. 

## Rewards

We will use the [object navigation task in `iTHOR`](/api/allenact_plugins/ithor_plugin/ithor_tasks/#objectnavtask) as a 
running example. We can see how the `ObjectNaviThorGridTask._step(self, action: int) -> RLStepResult` method computes
the reward for the latest action by invoking a function like:

```python
def judge(self) -> float:
    reward = -0.01

    if not self.last_action_success:
        reward += -0.03

    if self._took_end_action:
        reward += 1.0 if self._success else -1.0

    return float(reward)
```

Any reward shaping can be easily added by e.g. modifying the definition of an existing class:

```python
class NavigationWithShaping(allenact_plugins.ithor_plugin.ithor_tasks.ObjectNaviThorGridTask):
    def judge(self) -> float:
        reward = super().judge()
        
        if self.previous_state is not None:
            reward += float(my_reward_shaping_function(
                self.previous_state,
                self.current_state,
            ))
        
        self.previous_state = self.current_state
        
        return reward

``` 

## Losses

We support [A2C](/api/allenact/algorithms/onpolicy_sync/losses/a2cacktr#a2c),
[PPO](/api/allenact/algorithms/onpolicy_sync/losses/ppo#ppo), and
[imitation](/api/allenact/algorithms/onpolicy_sync/losses/imitation#imitation) losses amongst others. We can easily
include [DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf) or variations thereof by
assuming the availability of an expert providing optimal actions to agents and combining imitation and PPO losses in
different ways through multiple stages:

```python
class MyExperimentConfig(allenact.base_abstractions.experiment_config.ExperimentConfig):
    ...
    @classmethod
    def training_pipeline(cls, **kwargs):
        dagger_steps = int(3e4)
        ppo_steps = int(3e4)
        ppo_steps2 = int(1e6)
        ...
        return allenact.utils.experiment_utils.TrainingPipeline(
            named_losses={
                "imitation_loss": allenact.algorithms.onpolicy_sync.losses.imitation.Imitation(),
                "ppo_loss": allenact.algorithms.onpolicy_sync.losses.ppo.PPO(
                    **allenact.algorithms.onpolicy_sync.losses.ppo.PPOConfig,
                ),
            },
            ...
            pipeline_stages=[
                allenact.utils.experiment_utils.PipelineStage(
                    loss_names=["imitation_loss", "ppo_loss"],
                    teacher_forcing=allenact.utils.experiment_utils.LinearDecay(
                        startp=1.0, endp=0.0, steps=dagger_steps,
                    ),
                    max_stage_steps=dagger_steps,
                ),
                allenact.utils.experiment_utils.PipelineStage(
                    loss_names=["ppo_loss", "imitation_loss"],
                    max_stage_steps=ppo_steps
                ),
                allenact.utils.experiment_utils.PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=ppo_steps2,
                ),
            ],
        )
```


================================================
FILE: docs/howtos/defining-a-new-model.md
================================================
# Defining a new model

All actor-critic models must implement the interface described by the
[ActorCriticModel class](/api/allenact/algorithms/onpolicy_sync/policy/#actorcriticmodel). This interface includes two methods that need to be 
implemented:

* `recurrent_memory_specification`, returning a description of the model's recurrent memory; and 
* `forward`, returning an [ActorCriticOutput](/api/allenact/base_abstractions/misc/#actorcriticoutput) given the current observation,
hidden state and previous actions.

For convenience, we provide a [recurrent network module](/api/allenact/embodiedai/models/basic_models/#rnnstateencoder) and
[a simple CNN module](/api/allenact/embodiedai/models/basic_models/#simplecnn) from the Habitat baseline navigation
models, that will be used in this example.

### Actor-critic model interface

As an example, let's build an object navigation agent.

```python
class ObjectNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]):
    """Baseline recurrent actor critic model for object-navigation.

    # Attributes
    action_space : The space of actions available to the agent. Currently only discrete
        actions are allowed (so this space will always be of type `gym.spaces.Discrete`).
    observation_space : The observation space expected by the agent. This observation space
        should include (optionally) 'rgb' images and 'depth' images and is required to
        have a component corresponding to the goal `goal_sensor_uuid`.
    goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor`
        as an example of such a sensor.
    hidden_size : The hidden size of the GRU RNN.
    object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal
        object type.
    """

    def __init__(
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        goal_sensor_uuid: str,
        rgb_uuid: Optional[str],
        depth_uuid: Optional[str],
        hidden_size=512,
        object_type_embedding_dim=8,
        trainable_masked_hidden_state: bool = False,
        num_rnn_layers=1,
        rnn_type="GRU",
    ):
        """Initializer.

        See class documentation for parameter definitions.
        """
        super().__init__(action_space=action_space, observation_space=observation_space)

        self.goal_sensor_uuid = goal_sensor_uuid
        self._n_object_types = self.observation_space.spaces[self.goal_sensor_uuid].n
        self._hidden_size = hidden_size
        self.object_type_embedding_size = object_type_embedding_dim

        self.visual_encoder = SimpleCNN(
            observation_space=self.observation_space,
            output_size=self._hidden_size,
            rgb_uuid=rgb_uuid,
            depth_uuid=depth_uuid,
        )

        self.state_encoder = RNNStateEncoder(
            (0 if self.is_blind else self._hidden_size) + object_type_embedding_dim,
            self._hidden_size,
            trainable_masked_hidden_state=trainable_masked_hidden_state,
            num_layers=num_rnn_layers,
            rnn_type=rnn_type,
        )

        self.actor = LinearActorHead(self._hidden_size, action_space.n)
        self.critic = LinearCriticHead(self._hidden_size)

        self.object_type_embedding = nn.Embedding(
            num_embeddings=self._n_object_types,
            embedding_dim=object_type_embedding_dim,
        )

        self.train()

    @property
    def recurrent_hidden_state_size(self) -> int:
        """The recurrent hidden state size of the model."""
        return self._hidden_size

    @property
    def is_blind(self) -> bool:
        """True if the model is blind (e.g. neither 'depth' or 'rgb' is an
        input observation type)."""
        return self.visual_encoder.is_blind

    @property
    def num_recurrent_layers(self) -> int:
        """Number of recurrent hidden layers."""
        return self.state_encoder.num_recurrent_layers

    def _recurrent_memory_specification(self):
        return dict(
            rnn=(
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
        )

    def get_object_type_encoding(
        self, observations: Dict[str, torch.FloatTensor]
    ) -> torch.FloatTensor:
        """Get the object type encoding from input batched observations."""
        # noinspection PyTypeChecker
        return self.object_type_embedding(  # type:ignore
            observations[self.goal_sensor_uuid].to(torch.int64)
        )

    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        """Processes input batched observations to produce new actor and critic
        values. Processes input batched observations (along with prior hidden
        states, previous actions, and masks denoting which recurrent hidden
        states should be masked) and returns an `ActorCriticOutput` object
        containing the model's policy (distribution over actions) and
        evaluation of the current state (value).

        # Parameters
        observations : Batched input observations.
        memory : `Memory` containing the hidden states from initial timepoints.
        prev_actions : Tensor of previous actions taken.
        masks : Masks applied to hidden states. See `RNNStateEncoder`.

        # Returns
        Tuple of the `ActorCriticOutput` and recurrent hidden state.
        """
        target_encoding = self.get_object_type_encoding(
            cast(Dict[str, torch.FloatTensor], observations)
        )
        x = [target_encoding]

        if not self.is_blind:
            perception_embed = self.visual_encoder(observations)
            x = [perception_embed] + x

        x_cat = torch.cat(x, dim=-1)  # type: ignore
        x_out, rnn_hidden_states = self.state_encoder(
            x_cat, memory.tensor("rnn"), masks
        )

        return (
            ActorCriticOutput(
                distributions=self.actor(x_out), values=self.critic(x_out), extras={}
            ),
            memory.set_tensor("rnn", rnn_hidden_states),
        )
```


================================================
FILE: docs/howtos/defining-a-new-task.md
================================================
# Defining a new task

In order to use new tasks in our experiments, we need to define two classes:

* A [Task](/api/allenact/base_abstractions/task#task), including, among others, a `step` implementation providing a
[RLStepResult](/api/allenact/base_abstractions/misc#rlstepresult), a `metrics` method providing quantitative performance measurements 
for agents and, optionally, a `query_expert` method that can be used e.g. with an
[imitation loss](/api/allenact/algorithms/onpolicy_sync/losses/imitation#imitation) during training.
* A [TaskSampler](/api/allenact/base_abstractions/task#tasksampler), that allows instantiating new Tasks for the agents to solve during
training, validation and testing.

## Task

Let's define a semantic navigation task, where agents have to navigate from a starting point in an environment to an
object of a specific class using a minimal amount of steps and deciding when the goal has been reached.

We need to define the methods `action_space`, `render`, `_step`, `reached_terminal_state`, `class_action_names`, `close`,
`metrics`, and `query_expert` from the base `Task` definition.


### Initialization, action space and termination
Let's start with the definition of the action space and task initialization:

```python
...
from allenact_plugins.ithor_plugin.ithor_constants import (
    MOVE_AHEAD,
    ROTATE_LEFT,
    ROTATE_RIGHT,
    LOOK_DOWN,
    LOOK_UP,
    END,
)

...


class ObjectNaviThorGridTask(Task[IThorEnvironment]):
    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END)

    def __init__(
            self,
            env: IThorEnvironment,
            sensors: List[Sensor],
            task_info: Dict[str, Any],
            max_steps: int,
            **kwargs
    ) -> None:
        super().__init__(
            env=env,
            sensors=sensors,
            task_info=task_info,
            max_steps=max_steps, **kwargs
        )
        self._took_end_action: bool = False
        self._success: Optional[bool] = False

    @property
    def action_space(self):
        return gym.spaces.Discrete(len(self._actions))

    @classmethod
    def class_action_names(cls) -> Tuple[str, ...]:
        return cls._actions

    def reached_terminal_state(self) -> bool:
        return self._took_end_action

    def close(self) -> None:
        self.env.stop()

    ...
```

### Step method
Next, we define the main method `_step` that will be called every time the agent produces a new action: 
```python
class ObjectNaviThorGridTask(Task[IThorEnvironment]):
    ...
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.class_action_names()[action]

        if action_str == END:
            self._took_end_action = True
            self._success = self.is_goal_object_visible()
            self.last_action_success = self._success
        else:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result
    
    ...

    def is_goal_object_visible(self) -> bool:
        return any(
            o["objectType"] == self.task_info["object_type"]
            for o in self.env.visible_objects()
        )

    def judge(self) -> float:
        reward = -0.01

        if not self.last_action_success:
            reward += -0.03

        if self._took_end_action:
            reward += 1.0 if self._success else -1.0

        return float(reward)
```

###  Metrics, rendering and expert actions

Finally, we define methods to render and evaluate the current task, and optionally generate expert actions to be used
e.g. for DAgger training.
```python
    def render(self, mode: str = "rgb", *args, **kwargs) -> numpy.ndarray:
        assert mode == "rgb", "only rgb rendering is implemented"
        return self.env.current_frame


    def metrics(self) -> Dict[str, Any]:
        if not self.is_done():
            return {}
        else:
            return {"success": self._success, "ep_length": self.num_steps_taken()}

    def query_expert(self, **kwargs) -> Tuple[int, bool]:
        return my_objnav_expert_implementation(self)
```

## TaskSampler

We also need to define the corresponding TaskSampler, which must contain implementations for methods `__len__`,
`total_unique`, `last_sampled_task`, `next_task`, `close`, `reset`, and `set_seed`. Currently,
an additional method `all_observation_spaces_equal` is used to ensure compatibility with the current
[RolloutBlockStorage](/api/allenact/algorithms/onpolicy_sync/storage#rolloutblockstorage).

Let's define a tasks sampler able to provide an infinite number of object navigation tasks for AI2-THOR.

### Initialization and termination 

```python
class ObjectNavTaskSampler(TaskSampler):
    def __init__(
        self,
        scenes: List[str],
        object_types: str,
        sensors: List[Sensor],
        max_steps: int,
        env_args: Dict[str, Any],
        action_space: gym.Space,
        seed: Optional[int] = None,
        deterministic_cudnn: bool = False,
        *args,
        **kwargs
    ) -> None:
        self.env_args = env_args
        self.scenes = scenes
        self.object_types = object_types
        self.grid_size = 0.25
        self.env: Optional[IThorEnvironment] = None
        self.sensors = sensors
        self.max_steps = max_steps
        self._action_sapce = action_space

        self.scene_id: Optional[int] = None

        self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None

        set_seed(seed)

        self.reset()

    def close(self) -> None:
        if self.env is not None:
            self.env.stop()

    def reset(self):
        self.scene_id = 0
    
    def _create_environment(self) -> IThorEnvironment:
        env = IThorEnvironment(
            make_agents_visible=False,
            object_open_speed=0.05,
            restrict_to_initially_reachable_points=True,
            **self.env_args,
        )
        return env
```

### Task sampling

Finally, we need to define methods to determine the number of available tasks (possibly infinite) and sample tasks:
```python

    @property
    def length(self) -> Union[int, float]:
        return float("inf")

    @property
    def total_unique(self) -> Optional[Union[int, float]]:
        return None

    @property
    def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]:
        return self._last_sampled_task

    @property
    def all_observation_spaces_equal(self) -> bool:
        return True

    def next_task(self) -> Optional[ObjectNaviThorGridTask]:
        self.scene_id = random.randint(0, len(self.scenes) - 1)
        self.scene = self.scenes[self.scene_id]

        if self.env is not None:
            if scene != self.env.scene_name:
                self.env.reset(scene)
        else:
            self.env = self._create_environment()
            self.env.reset(scene_name=scene)

        self.env.randomize_agent_location()

        task_info = {"object_type": random.sample(self.object_types, 1)}

        self._last_sampled_task = ObjectNaviThorGridTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_sapce,
        )
        return self._last_sampled_task
```

================================================
FILE: docs/howtos/defining-a-new-training-pipeline.md
================================================
# Defining a new training pipeline

Defining a new training pipeline, or even new learning algorithms, is straightforward with the modular design in
`AllenAct`.

A convenience [Builder](/api/allenact/utils/experiment_utils#builder) object allows us to defer the instantiation
of objects of the class passed as their first argument while allowing passing additional keyword arguments to their
initializers.

## On-policy

We can implement a training pipeline which trains with a single stage using PPO:
```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
    ...
    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 2 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )
    ...
```

Alternatively, we could use a more complex pipeline that includes dataset aggregation
([DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf)). This requires the existence of an
expert (implemented in the task definition) that provides optimal actions to agents. We have implemented such a 
pipeline by extending the above configuration as follows:
```python
class ObjectNavThorDaggerThenPPOExperimentConfig(ExperimentConfig):
    ...
    SENSORS = [
        ...
        ExpertActionSensor(nactions=6), # Notice that we have added
                                        # an expert action sensor.
    ]
    ...
    @classmethod
    def training_pipeline(cls, **kwargs):
        dagger_steps = int(1e4) # Much smaller number of steps as we're using imitation learning
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 1 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
                "imitation_loss": Imitation(), # We add an imitation loss.
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[ # The pipeline now has two stages, in the first
                              # we use DAgger (imitation loss + teacher forcing).
                              # In the second stage we no longer use teacher
                              # forcing and add in the ppo loss.
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0, endp=0.0, steps=dagger_steps,
                    ),
                    max_stage_steps=dagger_steps,
                ),
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )
``` 

## Off-policy

We can also define off-policy stages where an external dataset is used, in this case, for Behavior Cloning: 

```python
class BCOffPolicyBabyAIGoToLocalExperimentConfig(ExperimentConfig):
    ...
    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = int(1e7)
        num_steps=128
        return TrainingPipeline(
            save_interval=10000,  # Save every 10000 steps (approximately)
            metric_accumulate_interval=1,
            optimizer_builder=Builder(optim.Adam, dict(lr=2.5e-4)),
            num_mini_batch=0,  # no on-policy training
            update_repeats=0,  # no on-policy training
            num_steps=num_steps // 4,  # rollouts from environment tasks
            named_losses={
                "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss(
                    total_episodes_in_epoch=int(1e6)  # dataset contains 1M episodes
                ),
            },
            gamma=0.99,
            use_gae=True,
            gae_lambda=1.0,
            max_grad_norm=0.5,
            advance_scene_rollout_period=None,
            pipeline_stages=[
                PipelineStage(
                    loss_names=[],  # no on-policy losses
                    max_stage_steps=total_train_steps,
                    # We only train from off-policy data:
                    offpolicy_component=OffPolicyPipelineComponent(
                        data_iterator_builder=lambda **kwargs: create_minigrid_offpolicy_data_iterator(
                            path=DATASET_PATH,  # external dataset
                            nrollouts=128,  # per trainer batch size
                            rollout_len=num_steps,  # For truncated-BPTT
                            instr_len=5,
                            **kwargs,
                        ),
                        loss_names=["offpolicy_expert_ce_loss"],  # off-policy losses
                        updates=16,  # 16 batches per rollout
                    ),
                ),
            ],
        )
```

Note that, in this example, `128 / 4 = 32` steps will be sampled from tasks in a MiniGrid environment (which can be
useful to track the agent's performance), while a subgraph of the model (in this case the entire Actor) is
trained from batches of 128-step truncated episodes sampled from an offline dataset stored under `DATASET_PATH`.


================================================
FILE: docs/howtos/defining-an-experiment.md
================================================
# Defining an  experiment

Let's look at an example experiment configuration for an object navigation example with an actor-critic agent observing
RGB images from the environment and target object classes from the task. This is a simplified example where the 
agent is confined to a single `iTHOR` scene (`FloorPlan1`) and needs to find a single object (a tomato). To see how one
might running a "full"/"hard" version of navigation within AI2-THOR, see our tutorials
 [PointNav in RoboTHOR](../tutorials/training-a-pointnav-model.md) and 
 [Swapping in a new environment](../tutorials/transfering-to-a-different-environment-framework.md).

The interface to be implemented by the experiment specification is defined in
[allenact.base_abstractions.experiment_config](/api/allenact/base_abstractions/experiment_config#experimentconfig). If you'd
like to skip ahead and see the finished configuration, [see here](https://github.com/allenai/allenact/blob/master/projects/tutorials/object_nav_ithor_ppo_one_object.py).
We begin by making the following imports:

```python
from math import ceil
from typing import Dict, Any, List, Optional

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor, GoalObjectTypeThorSensor
from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler
from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
from projects.objectnav_baselines.models.object_nav_models import (
 ObjectNavBaselineActorCritic,
)
from allenact.utils.experiment_utils import Builder, PipelineStage, TrainingPipeline, LinearDecay
```

Now first method to implement is `tag`, which provides a string identifying the experiment:

```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
    ...
    @classmethod
    def tag(cls):
        return "ObjectNavThorPPO"
    ...
```

## Model creation

Next, `create_model` will be used to instantiate an
[baseline object navigation actor-critic model](/api/projects/objectnav_baselines/models/object_nav_models#ObjectNavBaselineActorCritic):

```python
class ObjectNavThorExperimentConfig(ExperimentConfig):
    ...

    # A simple setting, train/valid/test are all the same single scene
    # and we're looking for a single object
    OBJECT_TYPES = ["Tomato"]
    TRAIN_SCENES = ["FloorPlan1_physics"]
    VALID_SCENES = ["FloorPlan1_physics"]
    TEST_SCENES = ["FloorPlan1_physics"]

    # Setting up sensors and basic environment details
    SCREEN_SIZE = 224
    SENSORS = [
        RGBSensorThor(
            height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True,
        ),
        GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),
    ]
    
    ...
    
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ObjectNavBaselineActorCritic(
            action_space=gym.spaces.Discrete(len(ObjectNaviThorGridTask.class_action_names())),
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            rgb_uuid=cls.SENSORS[0].uuid,
            depth_uuid=None,
            goal_sensor_uuid="goal_object_type_ind",
            hidden_size=512,
            object_type_embedding_dim=8,
        )
    ...
```

## Training pipeline

We now implement a training pipeline which trains with a single stage using PPO.

In the below we use [Builder](/api/allenact/utils/experiment_utils#builder) objects, which allow us to defer the instantiation
of objects of the class passed as their first argument while allowing passing additional keyword arguments to their
initializers. This is necessary when instantiating things like PyTorch optimizers who take as input the list of
parameters associated with our agent's model (something we can't know until the `create_model` function has been called).
 
```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
    ...
    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 2 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )
    ...
```

Alternatively, we could use a more sophisticated pipeline that begins training with dataset aggregation
([DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf)) before moving to training
with PPO. This requires the existence of an
expert (implemented in the task definition) that provides optimal actions to agents. We have implemented such a 
pipeline by extending the above configuration as follows

```python
class ObjectNavThorDaggerThenPPOExperimentConfig(ObjectNavThorPPOExperimentConfig):
    ...
    SENSORS = [
        RGBSensorThor(
            height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True,
        ),
        GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),
        ExpertActionSensor(nactions=6), # Notice that we have added an expert action sensor.
    ]
    ...
    @classmethod
    def training_pipeline(cls, **kwargs):
        dagger_steps = int(1e4) # Much smaller number of steps as we're using imitation learning
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 1 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
                "imitation_loss": Imitation(), # We add an imitation loss.
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[ # The pipeline now has two stages, in the first
                              # we use DAgger (imitation loss + teacher forcing).
                              # In the second stage we no longer use teacher
                              # forcing and add in the ppo loss.
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0, endp=0.0, steps=dagger_steps,
                    ),
                    max_stage_steps=dagger_steps,
                ),
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )
``` 

A version of our experiment config file for which we have implemented this two-stage training
can be found [here](https://github.com/allenai/allenact/blob/master/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py).
This two-stage configuration `ObjectNavThorDaggerThenPPOExperimentConfig` is actually implemented _as a subclass of `ObjectNavThorPPOExperimentConfig`_.
This is a common pattern used in AllenAct and lets one skip a great deal of boilerplate when defining a new
experiment as a slight modification of an old one. Of course one must then be careful: changes to the superclass
configuration will propagate to all subclassed configurations. 

## Machine configuration

In `machine_params` we define machine configuration parameters that will be used for training, validation and test:
```python
class ObjectNavThorPPOExperimentConfig(allenact.base_abstractions.experiment_config.ExperimentConfig):
    ...
    @classmethod
    def machine_params(cls, mode="train", **kwargs):
        num_gpus = torch.cuda.device_count()
        has_gpu = num_gpus != 0 

        if mode == "train":
            nprocesses = 20 if has_gpu else 4
            gpu_ids = [0] if has_gpu else []
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [1 % num_gpus] if has_gpu else []
        elif mode == "test":
            nprocesses = 1
            gpu_ids = [0] if has_gpu else []
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        return {"nprocesses": nprocesses, "gpu_ids": gpu_ids}
    ...
```
In the above we use the availability of cuda (`torch.cuda.device_count() !=  0`) to determine whether
we should use parameters appropriate for local machines or for a server. We might optionally add a list of
`sampler_devices` to assign devices (likely those not used for running our agent) to task sampling workers.

## Task sampling

The above has defined the model we'd like to use, the types of losses we wish to use during training,
and the machine specific parameters that should be used during training. Critically we have not yet
defined which task we wish to train our agent to complete. This is done by implementing the 
`ExperimentConfig.make_sampler_fn` function
```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
    ...
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return ObjectNavTaskSampler(**kwargs)
    ...
```
Now, before training starts, our trainer will know to generate a collection of task
samplers using `make_sampler_fn` for training (and possibly validation or testing).
The `kwargs` parameters used in the above function call can be different for each
training process, we implement such differences using the
`ExperimentConfig.train_task_sampler_args` function
```python
class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
    ...
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            self.TRAIN_SCENES,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_period"] = "manual"
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        return res
    ...
```
Now training process `i` out of `n` total processes will be instantiated with the parameters
`ObjectNavThorPPOExperimentConfig.train_task_sampler_args(i, n, ...)`. Similar functions
 (`valid_task_sampler_args` and `test_task_sampler_args`) exist for generating validation
 and test parameters. Note also that with this function we can assign devices to run
 our environment for each worker. See the documentation of `ExperimentConfig` for more information.
 

## Running the experiment

We are now in the position to run the experiment (with seed 12345) using the command
```bash
python main.py object_nav_ithor_ppo_one_object -b projects/tutorials -s 12345
```


================================================
FILE: docs/howtos/running-a-multi-agent-experiment.md
================================================
# To-do


================================================
FILE: docs/howtos/visualizing-results.md
================================================
# To-do


================================================
FILE: docs/installation/download-datasets.md
================================================
# Downloading datasets 

**Note:** These instructions assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and, generally, [installed
specific plugin requirements](../installation/installation-allenact.md#plugins-extra-requirements).

The below provides instructions on how to download datasets necessary for defining the train, validation, and
test sets used within the ObjectNav/PointNav tasks in the `iTHOR` and `RoboTHOR` environments.

<!--
Note that these datasets **do not include** scene assets for the below datasets. For `iTHOR` and `RoboTHOR`
these scene assets will be downloaded automatically, for `habitat` please following the instructions
in [this tutorial](installation-framework.md).
-->

## Point Navigation (PointNav)

### RoboTHOR
To get the PointNav dataset for `RoboTHOR` run the following command:
```bash
bash datasets/download_navigation_datasets.sh robothor-pointnav
```
This will download the dataset into `datasets/robothor-pointnav`.

### iTHOR
To get the PointNav dataset for `iTHOR` run the following command:
```bash
bash datasets/download_navigation_datasets.sh ithor-pointnav
```
This will download the dataset into `datasets/ithor-pointnav`.

## Object Navigation (ObjectNav)

### RoboTHOR
To get the ObjectNav dataset for `RoboTHOR` run the following command:

```bash
bash datasets/download_navigation_datasets.sh robothor-objectnav
```
This will download the dataset into `datasets/robothor-objectnav`.

### iTHOR
To get the ObjectNav dataset for `iTHOR` run the following command:
```bash
bash datasets/download_navigation_datasets.sh ithor-objectnav
```
This will download the dataset into `datasets/ithor-objectnav`.


================================================
FILE: docs/installation/installation-allenact.md
================================================
# Installation of AllenAct

**Note 1:** This library has been tested *only in python 3.6.*/3.7.*. The following assumes you have a working
version of *python 3.6/3.7* installed locally. 

**Note 2:** If you are installing `allenact` intending to use a GPU for training/inference and your
current machine uses an older version of CUDA you may need to manually install the version of 
PyTorch that supports your CUDA version. In such a case, after installing the below requirements, you
should follow the directions for installing PyTorch with older
versions of CUDA available on the [PyTorch homepage](https://pytorch.org/).

In order to install `allenact` and/or its requirements we recommend creating a new
[python virtual environment](https://docs.python.org/3/tutorial/venv.html) and installing all
of the below requirements into this virtual environment.

Alternatively, we also document how to [install a conda environment](#installing-a-conda-environment)
with all the requirements, which is especially useful if you plan to train models in [Habitat](https://aihabitat.org/).

## Different ways to use `allenact`

There are three main installation paths depending on how you wish to use `allenact`.

1. You want to use the `allenact` abstractions and training engine for your own task/environment and don't really 
care about using any of our plugins that offer additional support (in the form of models, sensors, task samplers, etc.)
for select tasks/environments like AI2-THOR, Habitat, and MiniGrid.
    - If this sounds like you, install the [standalone framework](#standalone-framework).
1. You want to use `allenact` as above but would also like to use some of our additional plugins.
    - If this sounds like you, install the [framework and plugins](#framework-and-plugins).
1. You want full access to everything in `allenact` (including all plugins and all of our projects and baselines)
   and want to have the option to edit the internal implementation of `allenact` to suit your desire. 
    - If this sounds like you, install the [full library](#full-library).   


## Standalone framework

You can install `allenact` easily using pip:

```bash
pip install allenact
```

If you'd like to install the latest development version of `allenact` (possibly unstable) directly from GitHub see the
next section.

### Bleeding edge pip install

To install the latest `allenact` framework, you can use

```bash
pip install -e "git+https://github.com/allenai/allenact.git@main#egg=allenact&subdirectory=allenact"
```

and, similarly, you can also use

```bash
pip install -e "git+https://github.com/allenai/allenact.git@main#egg=allenact_plugins[all]&subdirectory=allenact_plugins"
```

to install all plugins.

Depending on your machine configuration, you may need to use `pip3` instead of `pip` in the commands
above.

## Framework and plugins

To install `allenact` and all available plugins, run

```bash
pip install allenact allenact_plugins[all]
```

which will install `allenact` and `allenact_plugins` packages along with the requirements for _all_
of the plugins (when possible). If you only want to install the requirements for some subset of plugins, you can
specify these plugins with the `allenact_plugins[plugin1,plugin2]` notation. For instance, to install requirements
for the `ithor_plugin` and the `minigrid_plugin`, simply run:

```bash
pip install allenact allenact_plugins[ithor,minigrid]
```

A list of all available plugins can be found [here](https://github.com/allenai/allenact/tree/master/allenact_plugins).

## Full library

Clone the `allenact` repository to your local machine and move into the top-level directory

```bash
git clone git@github.com:allenai/allenact.git
cd allenact
```

Below we describe two alternative ways to install all dependencies via `pip` or `conda`.

### Installing requirements with `pip`

All requirements for `allenact` (not including plugin requirements) may be installed by running the following command:

```bash
pip install -r requirements.txt; pip install -r dev_requirements.txt
```

To install plugin requirements, see below.

#### Plugins extra requirements

To install the specific requirements of each plugin, we need to additionally call

```bash
pip install -r allenact_plugins/<PLUGIN_NAME>_plugin/extra_requirements.txt
```

from the top-level directory.

### Installing a `conda` environment

_If you are unfamiliar with Conda, please familiarize yourself with their [introductory documentation](https://docs.conda.io/projects/conda/en/latest/).
If you have not already, you will need to first [install Conda (i.e. Anaconda or Miniconda)](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)
on your machine. We suggest installing [Miniconda](https://docs.conda.io/projects/conda/en/latest/glossary.html#miniconda-glossary)
as it's relatively lightweight._

The `conda` folder contains YAML files specifying [Conda environments](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file)
compatible with AllenAct. These environment files include: 

* `environment-base.yml` - A base environment file to be used on all machines (it includes
[PyTorch](https://pytorch.org/) with the latest `cudatoolkit`).
* `environment-dev.yml` - Additional dev dependencies.
* `environment-<CUDA_VERSION>.yml` - Additional dependencies, where `<CUDA_VERSION>` is the CUDA version used on your
machine (if you are using linux, you might find this version by running `/usr/local/cuda/bin/nvcc --version`).
* `environment-cpu.yml` - Additional dependencies to be used on machines where GPU support is not needed (everything
 will be run on the CPU).
 

For the moment let's assume you're using `environment-base.yml` above. To install a conda environment with name `allenact`
 using this file you can simply run the following (*this will take a few minutes*):

```bash
conda env create --file ./conda/environment-base.yml --name allenact
``` 
The above is very simple but has the side effect of creating a new `src` directory where it will
place some of AllenAct's dependencies. To get around this, instead of running the above you can instead
run the commands:

```bash
export MY_ENV_NAME=allenact
export CONDA_BASE="$(dirname $(dirname "${CONDA_EXE}"))"
export PIP_SRC="${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc"
conda env create --file ./conda/environment-base.yml --name $MY_ENV_NAME
``` 

These additional commands tell conda to place these dependencies under the `${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc` directory rather
than under `src`, this is more in line with where we'd expect dependencies to be placed when running `pip install ...`.

If needed, you can use one of the `environment-<CUDA_VERSION>.yml` environment files to install the proper version of
the `cudatoolkit` by running:

```bash
conda env update --file ./conda/environment-<CUDA_VERSION>.yml --name allenact
```
or the CPU-only version:
```bash
conda env update --file ./conda/environment-cpu.yml --name allenact
```

#### Using the `conda` environment

Now that you've installed the conda environment as above, you can activate it by running:

```bash
conda activate allenact
```

after which you can run everything as you would normally.


#### Installing supported environments with `conda`

Each supported plugin contains a YAML environment file that can be applied upon the existing `allenact` environment. To
install the specific requirements of each plugin, we need to additionally call

```bash
conda env update --file allenact_plugins/<PLUGIN_NAME>_plugin/extra_environment.yml --name $MY_ENV_NAME
```

from the top-level directory.

**Habitat:** Note that, for habitat, we provide two environment types, regarding whether our machine is connected to a
display. More details can be found [here](../installation/installation-framework.md#installation-of-habitat). 


================================================
FILE: docs/installation/installation-framework.md
================================================
# Installation of supported environments

In general, each supported environment can be installed by just following the instructions to
[install the full library and specific requirements of every plugin](../installation/installation-allenact.md#full-library)
either [via pip](../installation/installation-allenact.md#installing-requirements-with-pip) or
[via Conda](../installation/installation-allenact.md#installing-a-conda-environment).

Below we provide additional installation instructions for a number of environments that we support and
provide some guidance for problems commonly experienced when using these environments.

## Installation of iTHOR (`ithor` plugin)

The first time you will run an experiment with `iTHOR` (or any script that uses `ai2thor`)
the library will download all of the assets it requires to render the scenes automatically.
However, the datasets must be manually downloaded as described [here](../installation/download-datasets.md).

**Trying to use `iTHOR` on a machine without an attached display?** 

**Note:** These instructions assume you have
[installed the full library](../installation/installation-allenact.md#full-library).

If you wish to run `iTHOR` on a machine without an attached display (for instance, a remote server such as an AWS
 machine) you will also need to run a script that launches `xserver` processes on your GPUs. This can be done
 with the following command:

```bash
sudo python scripts/startx.py &
```

Notice that you need to run the command with `sudo` (i.e. administrator privileges). If you do not have `sudo` 
access (for example if you are running this on a shared university machine) you
can ask your administrator to run it for you. You only need to run it once (as
long as you do not turn off your machine).

## Installation of RoboTHOR (`robothor` plugin)

`RoboTHOR` is installed in the same way as `iTHOR`. For more information see the above section on installing `iTHOR`. 

## Installation of Habitat

Installing habitat requires 

1. Installing the `habitat-lab` and `habitat-sim` packages.
   - This may be done by either following the [directions provided by Habitat themselves](https://github.com/facebookresearch/habitat-lab#installation)
or by using our `conda` installation instructions below. 
1. Downloading the scene assets (i.e. the Gibson or Matterport scene files) relevant to whichever task you're interested in.
   - Unfortunately we cannot legally distribute these files to you directly. Instead you will need to download these
     yourself. See [here](https://github.com/facebookresearch/habitat-lab#Gibson) for how you can download 
     the Gibson files and [here](https://github.com/facebookresearch/habitat-lab#matterport3d) for directions on
     how to download the Matterport flies.
1. Downloading the dataset files for the task you're interested in (e.g. PointNav, ObjectNav, etc).
   - See [here](https://github.com/facebookresearch/habitat-lab#task-datasets) for links to these dataset files.
 
<!--
### Using Docker

To run experiments using Habitat please use our docker image using the following command:

```bash
docker pull allenact/allenact:latest
```

This container includes the 0.1.0 release of `allenact`, the 0.1.5 release of `habitat` as well
as the `Gibson` point navigation dataset. This dataset consists of a set of start and goal positions provided by habitat.
You then need to launch the container and attach into it:

```bash
docker run --runtime=nvidia -it allenact/allenact
```
If you are running the container on a machine without an Nvidia GPU, omit the `--runtime=nvidia` flag.

Once inside the container simply `cd` into the `allenact` directory where all the allenact and habitat code should be stored:
 
Unfortunately we cannot legally redistribute the Gibson scenes by including them in the above container.
Instead you will need to download these yourself by filling out 
[this form](https://docs.google.com/forms/d/e/1FAIpQLScWlx5Z1DM1M-wTSXaa6zV8lTFkPmTHW1LqMsoCBDWsTDjBkQ/viewform)
and downloading the `gibson_habitat_trainval` data. Extract the scene assets (`.glb` files) into `habitat-lab/data/scene_datasets/` 
within the above container. You can then proceed to run your experiments using `allenact` as you normally would.
-->

### Using `conda`

Habitat has recently released the option to install their simulator using `conda` which avoids having
to manually build dependencies or use Docker. This does not guarantee that the installation process
is completely painless (it is difficult to avoid all possible build issues) but we've found it
to be a nice alternative to using Docker. To use this installation option please first
install an AllenAct `conda` environment using the instructions available [here](../installation/installation-allenact.md#installing-a-conda-environment).
After installing this environment, you can then install `habitat-sim` and `habitat-lab` by running:

If you are on a machine with an attached display:
```bash
export MY_ENV_NAME=allenact
export CONDA_BASE="$(dirname $(dirname "${CONDA_EXE}"))"
export PIP_SRC="${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc"
conda env update --file allenact_plugins/habitat_plugin/extra_environment.yml --name $MY_ENV_NAME
```

If you are on a machine without an attached display (e.g. a server), replace the last command by:
```bash
conda env update --file allenact_plugins/habitat_plugin/extra_environment_headless.yml --name $MY_ENV_NAME
```

After these steps, feel free to proceed to download the required scene assets and task-specific dataset files as
described above.

<!--
#### Installing a Conda environment

_If you are unfamiliar with Conda, please familiarize yourself with their [introductory documentation](https://docs.conda.io/projects/conda/en/latest/).
If you have not already, you will need to first [install Conda (i.e. Anaconda or Miniconda)](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)
on your machine. We suggest installing [Miniconda](https://docs.conda.io/projects/conda/en/latest/glossary.html#miniconda-glossary)
as it's relatively lightweight._

Clone the `allenact` repository to your local machine and move into the top-level directory

```bash
git clone git@github.com:allenai/allenact.git
cd allenact
```

The `conda` folder contains YAML files specifying [Conda environments](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file)
compatible with AllenAct. These environment files include: 

* `environment-base.yml` - A base environment file to be used on machines where the version of CUDA on your machine
matches the one of the latest `cudatoolkit` in conda.
* `environment-dev.yml` - Additional dev dependencies.
* `environment-<CUDA_VERSION>.yml` - Additional dependencies, where `<CUDA_VERSION>` is the CUDA version used on your
machine (if you are using linux, you might find this version by running `/usr/local/cuda/bin/nvcc --version`).
* `environment-cpu.yml` - Additional dependencies to be used on machines where GPU support is not needed (everything
 will be run on the CPU).
 

For the moment let's assume you're using `environment-base.yml` above. To install a conda environment with name `allenact`
 using this file you can simply run the following (*this will take a few minutes*):

```bash
conda env create --file ./conda/environment-base.yml --name allenact
``` 
The above is very simple but has the side effect of creating a new `src` directory where it will
place some of AllenAct's dependencies. To get around this, instead of running the above you can instead
run the commands:

```bash
export MY_ENV_NAME=allenact
export CONDA_BASE="$(dirname $(dirname "${CONDA_EXE}"))"
export PIP_SRC="${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc"
conda env create --file ./conda/environment-base.yml --name $MY_ENV_NAME
``` 

These additional commands tell conda to place these dependencies under the `${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc` directory rather
than under `src`, this is more in line with where we'd expect dependencies to be placed when running `pip install ...`.

If needed, you can use one of the `environment-<CUDA_VERSION>.yml` environment files to install the proper version of
the `cudatoolkit` by running:

```bash
conda env update --file ./conda/environment-<CUDA_VERSION>.yml --name allenact
```
or the CPU-only version:
```bash
conda env update --file ./conda/environment-cpu.yml --name allenact
```

##### Using the Conda environment

Now that you've installed the conda environment as above, you can activate it by running:

```bash
conda activate allenact
```

after which you can run everything as you would normally.
-->

================================================
FILE: docs/javascripts/extra.js
================================================
// The below can be used to open all nav links in the documentation, code found at
// from https://github.com/squidfunk/mkdocs-material/issues/767#issuecomment-384558269
// from the user Akkadius.
/*
document.addEventListener("DOMContentLoaded", function() {
    load_navpane();
});

function load_navpane() {
    var width = window.innerWidth;
    if (width <= 1200) {
        return;
    }

    var nav = document.getElementsByClassName("md-nav");
    for (var i = 0; i < nav.length; i++) {
        if (typeof nav.item(i).style === "undefined") {
            continue;
        }

        if (nav.item(i).getAttribute("data-md-level") && nav.item(i).getAttribute("data-md-component")) {
            nav.item(i).style.display = 'block';
            nav.item(i).style.overflow = 'visible';
        }
    }

    var nav = document.getElementsByClassName("md-nav__toggle");
    for(var i = 0; i < nav.length; i++) {
       nav.item(i).checked = true;
    }
}
*/

================================================
FILE: docs/notebooks/firstbook.md
================================================
# To-do

================================================
FILE: docs/projects/advisor_2020/README.md
================================================
# Experiments for Advisor

## TODO: 

1. Add details taken from https://unnat.github.io/advisor/. 
2. Cite the arxiv paper.
3. Give a list of things you can run with bash commands.
4. Ideally be able to recreate a large set of experiments.

================================================
FILE: docs/projects/babyai_baselines/README.md
================================================
# Baseline experiments for the BabyAI environment

We perform a collection of baseline experiments within the BabyAI environment
 on the GoToLocal task, see the `projects/babyai_baselines/experiments/go_to_local` directory.
 For instance, to train a model using PPO, run
 
```bash
python main.py go_to_local.ppo --experiment_base projects/babyai_baselines/experiments
```

Note that these experiments will be quite slow when not using a GPU as the BabyAI model architecture is surprisingly 
large. Specifying a GPU (if available) can be done from the command line using hooks we created using 
[gin-config](https://github.com/google/gin-config). E.g. to train using the 0th GPU device, add

```bash
--gp "machine_params.gpu_id = 0"
```  

to the above command.

================================================
FILE: docs/projects/gym_baselines/README.md
================================================
# Baseline models Gym (for MuJoCo environments)

This project contains the code for training baseline models for the tasks under the [MuJoCo](https://gym.openai.com/envs/#mujoco) group of Gym environments, included ["Ant-v2"](https://gym.openai.com/envs/Ant-v2/), ["HalfCheetah-v2"](https://gym.openai.com/envs/HalfCheetah-v2/), ["Hopper-v2"](https://gym.openai.com/envs/Hopper-v2/), ["Humanoid-v2"](https://gym.openai.com/envs/Humanoid-v2/), ["InvertedDoublePendulum-v2"](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["InvertedPendulum-v2"](https://gym.openai.com/envs/InvertedPendulum-v2/), [Reacher-v2](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["Swimmer-v2"](https://gym.openai.com/envs/Swimmer-v2/), and [Walker2d-v2"](https://gym.openai.com/envs/Walker2d-v2/).

Provided are experiment configs for training a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](https://allenact.org/api/allenact_plugins/gym_plugin/gym_models/#memorylessactorcritic), with a [Gaussian distribution](https://allenact.org/api/allenact_plugins/gym_plugin/gym_distributions/#gaussiandistr) to sample actions for all continuous-control environments under the `MuJoCo` group of `Gym` environments. 

The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm. 

To train an experiment run the following command from the `allenact` root directory:

```bash
python main.py <PATH_TO_EXPERIMENT_CONFIG> -o <PATH_TO_OUTPUT>
```

Where `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights
and logs to be stored and `<PATH_TO_EXPERIMENT_CONFIG>` is the path to the python file containing
the experiment configuration. An example usage of this command would be:

```bash
python main.py projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py -o /YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo
```

This trains a lightweight implementation with separate MLPs for actors and critic with a Gaussian distribution to sample actions in the "Ant-v2" environment, and stores the model weights and logs
to `/YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo`.

## Results

In our experiments, the rewards for MuJoCo environments we obtained after training using PPO are similar to those reported by OpenAI Gym Baselines(1M steps). The Humanoid environment is compared with the original PPO paper where training 50M steps using PPO. Due to the time constraint, we only tested our baseline across two seeds so far. 


| Environment           | Gym Baseline Reward | Ours Reward |
| -----------           | ------------------- | ----------- |
|[Ant-v2](https://gym.openai.com/envs/Ant-v2/)| 1083.2 |1098.6(reached 4719 in 25M steps)  | 
| [HalfCheetah-v2](https://gym.openai.com/envs/HalfCheetah-v2/) | 1795.43             |  1741(reached 4019 in 18M steps)           |
|[Hopper-v2](https://gym.openai.com/envs/Hopper-v2/)|2316.16|2266|
|[Humanoid-v2](https://gym.openai.com/envs/Humanoid-v2/)|4000+|4500+(reached 6500 in 70M steps)|
| [InvertedPendulum-v2](https://gym.openai.com/envs/InvertedPendulum-v2/) | 809.43              |  1000       |
|[Reacher-v2](https://gym.openai.com/envs/Reacher-v2/)|-6.71|-7.045|
|[Swimmer-v2](https://gym.openai.com/envs/Swimmer-v2/)|111.19|124.7|
|[Walker2d](https://gym.openai.com/envs/Walker2d-v2/)|3424.95|2723 in 10M steps|


================================================
FILE: docs/projects/objectnav_baselines/README.md
================================================
# Baseline models ObjectNav (for RoboTHOR/iTHOR)

This project contains the code for training baseline models for the ObjectNav task. In ObjectNav, the agent
spawns at a location in an environment and is tasked to explore the environment until it finds an object of a
certain type (such as TV or Basketball). Once the agent is confident that it has the object within sight
it executes the `END` action which terminates the episode. If the agent is within a set
distance to the target (in our case 1.0 meters) and the target is visible within its observation frame
the agent succeeded, otherwise it failed.

Provided are experiment configs for training a simple convolutional model with
an GRU using `RGB`, `Depth` or `RGB-D` (i.e. `RGB+Depth`) as inputs in
[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).

The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm. For the RoboTHOR environment we also have and experiment
(`objectnav_robothor_rgb_resnetgru_dagger.py`) showing how a model can be trained using DAgger,
a form of imitation learning.

To train an experiment run the following command from the `allenact` root directory:

```bash
python main.py <PATH_TO_EXPERIMENT_CONFIG> -o <PATH_TO_OUTPUT> -c
```

Where `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights
and logs to be stored and `<PATH_TO_EXPERIMENT_CONFIG>` is the path to the python file containing
the experiment configuration. An example usage of this command would be:

```bash
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet_ddppo.py -o storage/objectnav-robothor-rgb
```

This trains a simple convolutional neural network with a GRU using RGB input 
passed through a pretrained ResNet-18 visual encoder on the
PointNav task in the RoboTHOR environment and stores the model weights and logs
to `storage/pointnav-robothor-rgb`.

## RoboTHOR ObjectNav 2021 Challenge

The experiment configs found under the `projects/objectnav_baselines/experiments/robothor` directory are designed
to conform to the requirements of the [RoboTHOR ObjectNav 2021 Challenge](https://ai2thor.allenai.org/robothor/cvpr-2021-challenge).

### Training a baseline
To train a baseline ResNet->GRU model taking RGB-D inputs, run the following command
```bash
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet_ddppo.py -o storage/objectnav-robothor-rgbd
```
By default, when using a machine with a GPU, the above experiment will attempt to train using 60 parallel processes
across all available GPUs. See the `TRAIN_GPU_IDS` constant in `experiments/objectnav_thor_base.py` and
the `NUM_PROCESSES` constant in `experiments/robothor/objectnav_robothor_base.py` if you'd like to change which
GPUs are used or how many processes are run respectively.

### Downloading our pretrained model checkpoint
We provide a pretrained model obtained allowing the above command to run for all 300M training steps and then selecting
the model checkpoint with best validation-set performance (for us occuring at ~170M training steps). You can download 
this model checkpoint by running
```bash
bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021
```
from the top-level directory. This will download the pretrained model weights and save them at the path
```bash
pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt
```

### Running inference on the pretrained model

You can run inference on the above pretrained model (on the test dataset) by running
```bash
export SAVED_MODEL_PATH=pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.py -c $SAVED_MODEL_PATH --eval
```
To discourage "cheating", the test dataset has been scrubbed of the information needed to actually compute the success rate / SPL
of your model and so running the above will only save the trajectories your models take. To evaluate these
trajectories you will have to submit them to our leaderboard, see [here for more details](https://github.com/allenai/robothor-challenge/).
If you'd like to get a sense of if your model is doing well before submitting to the leaderboard, you can obtain the 
success rate / SPL of it on our validation dataset. To do this, you can simply comment-out the line
```python
    TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/test")
```
within the `projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py` file and rerun the above
`python main.py ...` command (when the test dataset is not given, the code defaults to using the validation set).

================================================
FILE: docs/projects/pointnav_baselines/README.md
================================================
# Baseline models for the Point Navigation task in the Habitat, RoboTHOR and iTHOR environments

This project contains the code for training baseline models on the PointNav task. In this setting the agent
spawns at a location in an environment and is tasked to move to another location. The agent is given a "compass"
that tells it the distance and bearing to the target position at every frame. Once the agent is confident that
it has reached the end it executes the `END` action which terminates the episode. If the agent is within a set
distance to the target (in our case 0.2 meters) the agent succeeded, else it failed.

Provided are experiment configs for training a simple convolutional model with
an GRU using `RGB`, `Depth` or `RGBD` as inputs in [Habitat](https://github.com/facebookresearch/habitat-sim), 
[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).

The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm.

To train an experiment run the following command from the `allenact` root directory:

```bash
python main.py -o <PATH_TO_OUTPUT> -c -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> <EXPERIMENT_NAME>
```

Where `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights
and logs to be stored, `<BASE_DIRECTORY_OF_YOUR_EXPERIMENT>` is the directory where our
experiment file is located and `<EXPERIMENT_NAME>` is the name of the python module containing
the experiment. An example usage of this command would be:

```bash
python main.py -o storage/pointnav-robothor-depth -b projects/pointnav_baselines/experiments/robothor/ pointnav_robothor_depth_simpleconvgru_ddppo
```

This trains a simple convolutional neural network with a GRU using Depth input on the
PointNav task in the RoboTHOR environment and stores the model weights and logs
to `storage/pointnav-robothor-rgb`.


================================================
FILE: docs/projects/two_body_problem_2019/README.md
================================================
# Experiments for the Two Body Problem paper

## TODO: 

1. Add details taken from https://prior.allenai.org/projects/two-body-problem 
2. Cite the CVPR paper.
3. Give a list of things you can run with bash commands.
4. At least a subset of the experiments.

================================================
FILE: docs/tutorials/distributed-objectnav-tutorial.md
================================================
<!-- DO NOT EDIT THIS FILE. --> 
<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/distributed_objectnav_tutorial.py', EDIT IT INSTEAD. -->

# Tutorial: Distributed training across multiple nodes.
**Note** The provided commands to execute in this tutorial assume include a configuration script to
[clone the full library](../installation/installation-allenact.md#full-library). Setting up headless THOR might
require superuser privileges. We also assume [NCCL](https://developer.nvidia.com/nccl) is available for communication
across computation nodes and all nodes have a running `ssh` server.

The below introduced experimental tools and commands for distributed training assume a Linux OS (tested on Ubuntu
18.04).

In this tutorial, we:

1. Introduce the available API for training across multiple nodes, as well as experimental scripts for distributed
configuration, training start and termination, and remote command execution.
1. Introduce the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenAct`. Note that, in contrast with
previous tutorials using AI2-THOR, this time we don't require an xserver (in Linux) to be active.
1. Show a training example for RoboTHOR ObjectNav on a cluster, with each node having sufficient GPUs and GPU memory to
host 60 experience samplers collecting rollout data.

Thanks to the massive parallelization of experience collection and model training enabled by
[DD-PPO](https://arxiv.org/abs/1911.00357), we can greatly speed up training by scaling across multiple nodes:

![training speedup](../img/multinode_training.jpg)

## The task: ObjectNav

In ObjectNav, the goal for the agent is to navigate to an object (possibly unseen during training) of a known given
class and signal task completion when it determines it has reached the goal.


## Implementation

For this tutorial, we'll use the readily available `objectnav_baselines` project, which includes configurations for
a wide variety of object navigation experiments for both iTHOR and RoboTHOR. Since those configuration files are
defined for a single-node setup, we will mainly focus on the changes required in the `machine_params` and
`training_pipeline` methods.

Note that, in order to use the headless version of AI2-THOR, we currently need to install a specific THOR commit,
different from the default one in `robothor_plugin`. Note that this command is included in the configuration script
below, so **we don't need to run this**:

```bash
pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48
```

The experiment config starts as follows:

```python
import math
from typing import Optional, Sequence

import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
    Builder,
    LinearDecay,
    MultiLinearDecay,
    TrainingPipeline,
    PipelineStage,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_rgb_resnet18gru_ddppo import (
    ObjectNavRoboThorRGBPPOExperimentConfig as BaseConfig,
)


class DistributedObjectNavRoboThorRGBPPOExperimentConfig(BaseConfig):
    def tag(self) -> str:
        return "DistributedObjectNavRoboThorRGBPPO"
```
We override ObjectNavRoboThorBaseConfig's THOR_COMMIT_ID to match the installed headless one:
```python
    THOR_COMMIT_ID = "91139c909576f3bf95a187c5b02c6fd455d06b48"
```
Also indicate that we're using headless THOR (for `task_sampler_args` methods):
```python
    THOR_IS_HEADLESS = True
```
**Temporary hack** Disable the `commit_id` argument passed to the THOR `Controller`'s `init` method:
```python
    def env_args(self):
        res = super().env_args()
        res.pop("commit_id", None)
        return res
```
And, of course, define the number of nodes. This will be used by `machine_params` and `training_pipeline` below.
We override the existing `ExperimentConfig`'s `init` method to include control on the number of nodes:

```python
    def __init__(
        self,
        distributed_nodes: int = 1,
        num_train_processes: Optional[int] = None,
        train_gpu_ids: Optional[Sequence[int]] = None,
        val_gpu_ids: Optional[Sequence[int]] = None,
        test_gpu_ids: Optional[Sequence[int]] = None,
    ):
        super().__init__(
            num_train_processes=num_train_processes,
            train_gpu_ids=train_gpu_ids,
            val_gpu_ids=val_gpu_ids,
            test_gpu_ids=test_gpu_ids,
        )
        self.distributed_nodes = distributed_nodes
```
### Machine parameters

**Note:** We assume that all nodes are identical (same number and model of GPUs and drivers).

The `machine_params` method will be invoked by `runner.py` with different arguments, e.g. to determine the
configuration for validation or training.

When working in distributed settings, `AllenAct` needs to know the total number of trainers across all nodes as well
as the local number of trainers. This is accomplished through the introduction of a `machine_id` keyword argument,
which will be used to define the training parameters as follows:

```python
    def machine_params(self, mode="train", **kwargs):
        params = super().machine_params(mode, **kwargs)

        if mode == "train":
            params.devices = params.devices * self.distributed_nodes
            params.nprocesses = params.nprocesses * self.distributed_nodes
            params.sampler_devices = params.sampler_devices * self.distributed_nodes

            if "machine_id" in kwargs:
                machine_id = kwargs["machine_id"]
                assert (
                    0 <= machine_id < self.distributed_nodes
                ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]"

                local_worker_ids = list(
                    range(
                        len(self.train_gpu_ids) * machine_id,
                        len(self.train_gpu_ids) * (machine_id + 1),
                    )
                )

                params.set_local_worker_ids(local_worker_ids)

            # Confirm we're setting up train params nicely:
            print(
                f"devices {params.devices}"
                f"\nnprocesses {params.nprocesses}"
                f"\nsampler_devices {params.sampler_devices}"
                f"\nlocal_worker_ids {params.local_worker_ids}"
            )
        elif mode == "valid":
            # Use all GPUs at their maximum capacity for training
            # (you may run validation in a separate machine)
            params.nprocesses = (0,)

        return params
```
In summary, we need to specify which indices in `devices`, `nprocesses` and `sampler_devices` correspond to the
local `machine_id` node (whenever a `machine_id` is given as a keyword argument), otherwise we specify the global
configuration.

### Training pipeline

In preliminary ObjectNav experiments, we observe that small batches are useful during the initial training steps in
terms of sample efficiency, whereas large batches are preferred during the rest of training.

In order to scale to the larger amount of collected data in multi-node settings, we will proceed with a two-stage
pipeline:

1. In the first stage, we'll enforce a number of updates per amount of collected data similar to the
configuration with a single node by enforcing more batches per rollout (for about 30 million steps).
1. In the second stage we'll switch to a configuration with larger learning rate and batch size to be
used up to the grand total of 300 million experience steps.

We first define a helper method to generate a learning rate curve with decay for each stage:

```python
    @staticmethod
    def lr_scheduler(small_batch_steps, transition_steps, ppo_steps, lr_scaling):
        safe_small_batch_steps = int(small_batch_steps * 1.02)
        large_batch_and_lr_steps = ppo_steps - safe_small_batch_steps - transition_steps

        # Learning rate after small batch steps (assuming decay to 0)
        break1 = 1.0 - safe_small_batch_steps / ppo_steps

        # Initial learning rate for large batch (after transition from initial to large learning rate)
        break2 = lr_scaling * (
            1.0 - (safe_small_batch_steps + transition_steps) / ppo_steps
        )
        return MultiLinearDecay(
            [
                # Base learning rate phase for small batch (with linear decay towards 0)
                LinearDecay(steps=safe_small_batch_steps, startp=1.0, endp=break1,),
                # Allow the optimizer to adapt its statistics to the changes with a larger learning rate
                LinearDecay(steps=transition_steps, startp=break1, endp=break2,),
                # Scaled learning rate phase for large batch (with linear decay towards 0)
                LinearDecay(steps=large_batch_and_lr_steps, startp=break2, endp=0,),
            ]
        )
```
The training pipeline looks like:

```python
    def training_pipeline(self, **kwargs):
        # These params are identical to the baseline configuration for 60 samplers (1 machine)
        ppo_steps = int(300e6)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000 if torch.cuda.is_available() else 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        # We add 30 million steps for small batch learning
        small_batch_steps = int(30e6)
        # And a short transition phase towards large learning rate
        # (see comment in the `lr_scheduler` helper method
        transition_steps = int(2 / 3 * self.distributed_nodes * 1e6)

        # Find exact number of samplers per GPU
        assert (
            self.num_train_processes % len(self.train_gpu_ids) == 0
        ), "Expected uniform number of samplers per GPU"
        samplers_per_gpu = self.num_train_processes // len(self.train_gpu_ids)

        # Multiply num_mini_batch by the largest divisor of
        # samplers_per_gpu to keep all batches of same size:
        num_mini_batch_multiplier = [
            i
            for i in reversed(
                range(1, min(samplers_per_gpu // 2, self.distributed_nodes) + 1)
            )
            if samplers_per_gpu % i == 0
        ][0]

        # Multiply update_repeats so that the product of this factor and
        # num_mini_batch_multiplier is >= self.distributed_nodes:
        update_repeats_multiplier = int(
            math.ceil(self.distributed_nodes / num_mini_batch_multiplier)
        )

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig, show_ratios=False)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                # We increase the number of batches for the first stage to reach an
                # equivalent number of updates per collected rollout data as in the
                # 1 node/60 samplers setting
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=small_batch_steps,
                    num_mini_batch=num_mini_batch * num_mini_batch_multiplier,
                    update_repeats=update_repeats * update_repeats_multiplier,
                ),
                # The we proceed with the base configuration (leading to larger
                # batches due to the increased number of samplers)
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=ppo_steps - small_batch_steps,
                ),
            ],
            # We use the MultiLinearDecay curve defined by the helper function,
            # setting the learning rate scaling as the square root of the number
            # of nodes. Linear scaling might also works, but we leave that
            # check to the reader.
            lr_scheduler_builder=Builder(
                LambdaLR,
                {
                    "lr_lambda": self.lr_scheduler(
                        small_batch_steps=small_batch_steps,
                        transition_steps=transition_steps,
                        ppo_steps=ppo_steps,
                        lr_scaling=math.sqrt(self.distributed_nodes),
                    )
                },
            ),
        )
```
## Multi-node configuration

**Note:** In the following, we'll assume you don't have an available setup for distributed execution, such as
[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup and run
distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for
a rather basic usage pattern that might not suit your needs.

If we haven't set up AllenAct with the headless version of AI2-THOR in our nodes, we can define a configuration script
similar to:

```bash
#!/bin/bash

# Prepare a virtualenv for allenact
sudo apt-get install -y python3-venv
python3 -mvenv ~/allenact_venv
source ~/allenact_venv/bin/activate
pip install -U pip wheel

# Install AllenAct
cd ~
git clone https://github.com/allenai/allenact.git
cd allenact

# Install AllenaAct + RoboTHOR plugin dependencies
pip install -r requirements.txt
pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt

# Download + setup datasets
bash datasets/download_navigation_datasets.sh robothor-objectnav

# Install headless AI2-THOR and required libvulkan1
sudo apt-get install -y libvulkan1
pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48

# Download AI2-THOR binaries
python -c "from ai2thor.controller import Controller; c=Controller(); c.stop()"

echo DONE
```

and save it as `headless_robothor_config.sh`. Note that some of the configuration steps in the script assume you have
superuser privileges.

Then, we can just copy this file to the first node in our cluster and run it with:

```bash
source <PATH/TO/headless_robothor_config.sh>
```

If everything went well, we should be able to

```bash
cd ~/allenact && source ~/allenact_venv/bin/activate
```

Note that we might need to install `libvulkan1` in each node (even if the AllenAct setup is shared across nodes) if it
is not already available.

### Local filesystems

If our cluster does not use a shared filesystem, we'll need to propagate the setup to the rest of nodes. Assuming
we can just `ssh` with the current user to all nodes, we can propagate our config with

```bash
scripts/dconfig.py --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \
--config_script <PATH/TO/headless_robothor_config.sh>
```

and we can check the state of the installation with the `scripts/dcommand.py` tool:

```bash
scripts/dcommand.py --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \
--command 'tail -n 5 ~/log_allenact_distributed_config'
```

If everything went fine, all requirements are ready to start running our experiment.

## Run your experiment

**Note:** In this section, we again assume you don't have an available setup for distributed execution, such as
[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup/run
distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for
a rather basic usage pattern that might not suit your needs.

Our experimental extension to AllenAct's `main.py` script allows using practically identical commands to the ones
used in a single-node setup to start our experiments. From the root `allenact` directory, we can simply invoke

```bash
scripts/dmain.py projects/tutorials/distributed_objectnav_tutorial.py \
--config_kwargs '{"distributed_nodes":3}' \
--runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \
--env_activate_path ~/allenact_venv/bin/activate \
--allenact_path ~/allenact \
--distributed_ip_and_port <FIRST_IP_ADDRESS_IN_RUNS_ON_LIST>:<FREE_PORT_NUMBER_FOR_THIS_IP_ADDRESS>
```

This script will do several things for you, including synchronization of the changes in the `allenact` directory
to all machines, enabling virtual environments in each node, sharing the same random seed for all `main.py` instances,
assigning `--machine_id` parameters required for multi-node training, and redirecting the process output to a log file
under the output results folder.

Note that by changing the value associated with the `distributed_nodes` key in the `config_kwargs` map and the `runs_on`
list of IPs, we can easily scale our training to e.g. 1, 3, or 8 nodes as shown in the chart above. Note that for this
call to work unmodified, you should have sufficient GPUs/GPU memory to host 60 samplers per node.

## Track and stop your experiment

You might have noticed that, when your experiment started with the above command, a file was created under
`~/.allenact`. This file includes IP addresses and screen session IDs for all nodes. It can be used
by the already introduced `scripts/dcommand.py` script, if we omit the `--runs_on` argument, to call a command on each
node via ssh; but most importantly it is used by the `scripts/dkill.py` script to terminate all screen sessions hosting
our training processes.

### Experiment tracking

A simple way to check all machines are training, assuming you have `nvidia-smi` installed in all nodes, is to just call

```bash
scripts/dcommand.py
```

from the root `allenact` directory. If everything is working well, the GPU usage stats from `nvidia-smi` should reflect
ongoing activity. You can also add different commands to be executed by each node. It is of course also possible to run
tensorboard on any of the nodes, if that's your preference.

### Experiment termination

Just call

```bash
scripts/dkill.py
```

After killing all involved screen sessions, you will be asked about whether you also want to delete the "killfile"
stored under the `~/.allenact` directory (which might be your preferred option once all processes are terminated).

We hope this tutorial will help you start quickly testing new ideas! Even if we've only explored moderates settings of
up to 480 experience samplers, you might want to consider some additional changes (like the
[choice for the optimizer](https://arxiv.org/abs/2103.07013)) if you plan to run at larger scale.


================================================
FILE: docs/tutorials/gym-mujoco-tutorial.md
================================================
<!-- DO NOT EDIT THIS FILE. --> 
<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/gym_mujoco_tutorial.py', EDIT IT INSTEAD. -->

# Tutorial: OpenAI gym MuJoCo environment.
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the
`gym_plugin`. The latter can be installed by

```bash
pip install -r allenact_plugins/gym_plugin/extra_requirements.txt
```

The environments for this tutorial use [MuJoCo](http://www.mujoco.org/)(**Mu**lti-**Jo**int dynamics in **Co**ntact)
physics simulator, which is also required to be installed properly with instructions
[here](https://github.com/openai/mujoco-py).

## The task

For this tutorial, we'll focus on one of the continuous-control environments under the `mujoco` group of `gym`
environments: [Ant-v2](https://gym.openai.com/envs/Ant-v2/). In this task, the goal
is to make a four-legged creature, "ant", walk forward as fast as possible. A random agent of "Ant-v2" is shown below.

![The Ant-v2 task](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_random.gif).

To achieve the goal, we need to provide continuous control for the agent moving forward with four legs with the
`x` velocity as high as possible for at most 1000 episodes steps. The agent is failed, or done, if the `z` position
is out of the range [0.2, 1.0]. The dimension of the action space is 8 and 111 for the dimension of the observation
space that maps to different body parts, including 3D position `(x,y,z)`, orientation(quaternion `x`,`y`,`z`,`w`)
of the torso, and the joint angles, 3D velocity `(x,y,z)`, 3D angular velocity `(x,y,z)`, and joint velocities.
The rewards for the agent "ant" are composed of the forward rewards, healthy rewards, control cost, and contact cost.

## Implementation

For this tutorial, we'll use the readily available `gym_plugin`, which includes a
[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a
[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and
[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a
[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`
environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).
The experiment config, similar to the one used for the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:

```python
from typing import Dict, Optional, List, Any, cast

import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.ppo import PPO

from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)
from allenact.utils.viz_utils import VizSuite, AgentViewViz


class HandManipulateTutorialExperimentConfig(ExperimentConfig):
    @classmethod
    def tag(cls) -> str:
        return "GymMuJoCoTutorial"
```
### Sensors and Model

As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide
full observations from the state of the `gym` environment to our model.

```python
    SENSORS = [
        GymMuJoCoSensor("Ant-v2", uuid="gym_mujoco_data"),
    ]
```
We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,
[MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since
this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a
[Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.

```python
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=gym.spaces.Box(
                -3.0, 3.0, (8,), "float32"
            ),  # 8 actors, each in the range [-3.0, 3.0]
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )
```
### Task samplers
We use an available `TaskSampler` implementation for `gym` environments that allows to sample
[GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):
[GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task
sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created
above, which contain a custom identifier for the actual observation space (`gym_mujoco_data`) also used by the model.

```python
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="Ant-v2", **kwargs)
```
For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three
modes, `train, valid, test`:

```python
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="train", seeds=seeds
        )

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="valid", seeds=seeds
        )

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)
```
Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,
during testing (or validation), we sample a fixed number of tasks.

```python
    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["Ant-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )
```
Note that we just sample 4 tasks for validation and testing in this case, which suffice to illustrate the model's
success.

### Machine parameters

In this tutorial, we just train the model on the CPU. We allocate a larger number of samplers for training (8) than
for validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also
include a video visualizer (`AgentViewViz`) in test mode.

```python
    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        visualizer = None
        if mode == "test":
            visualizer = VizSuite(
                mode=mode,
                video_viz=AgentViewViz(
                    label="episode_vid",
                    max_clip_length=400,
                    vector_task_source=("render", {"mode": "rgb_array"}),
                    fps=30,
                ),
            )
        return {
            "nprocesses": 8 if mode == "train" else 1,  # rollout
            "devices": [],
            "visualizer": visualizer,
        }
```
### Training pipeline

The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate
and 10 single-batch update repeats per rollout. The reward should exceed 4,000
in 20M steps in the test. In order to make the "ant" run with an obvious fast speed, we train the agents using PPO
with 3e7 steps.

```python
    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        lr = 3e-4
        ppo_steps = int(3e7)
        clip_param = 0.2
        value_loss_coef = 0.5
        entropy_coef = 0.0
        num_mini_batch = 4  # optimal 64
        update_repeats = 10
        max_grad_norm = 0.5
        num_steps = 2048
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        advance_scene_rollout_period = None
        save_interval = 200000
        metric_accumulate_interval = 50000
        return TrainingPipeline(
            named_losses=dict(
                ppo_loss=PPO(
                    clip_param=clip_param,
                    value_loss_coef=value_loss_coef,
                    entropy_coef=entropy_coef,
                ),
            ),  # type:ignore
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=advance_scene_rollout_period,
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
            ),
        )
```
## Training and validation

We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_mujoco_tutorial.py`.
To start training from scratch, we just need to invoke

```bash
PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_mujoco_output -s 0 -e
```

from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.

If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/gym_mujoco_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).

After 30,000,000 steps, the script will terminate. If everything went well, the `valid` success rate should be 1
and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a
little below 1,000.

## Testing

The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the
directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:
```bash
PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_mujoco_output \
-s 0 \
-e \
--eval \
--checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE
```

If everything went well, the `test` success rate should converge to 1, the `test` success rate should be 1
and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a
little below 1,000. The `gif` results can be seen in the image tab of Tensorboard while testing.
The output should be something like this:

![results](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.png).

And the `gif` results can be seen in the image tab of Tensorboard while testing.

![mp4 demo](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.gif)

If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running
remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display
available:

```bash
DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_mujoco_output \
-s 0 \
-e \
--eval \
--checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE
```


================================================
FILE: docs/tutorials/gym-tutorial.md
================================================
<!-- DO NOT EDIT THIS FILE. --> 
<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/gym_tutorial.py', EDIT IT INSTEAD. -->

# Tutorial: OpenAI gym for continuous control.
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the
`gym_plugin`. The latter can be installed by

```bash
pip install -r allenact_plugins/gym_plugin/extra_requirements.txt
```

In this tutorial, we:

1. Introduce the `gym_plugin`, which enables some of the tasks in [OpenAI's gym](https://gym.openai.com/) for training
and inference within AllenAct.
1. Show an example of continuous control with an arbitrary action space covering 2 policies for one of the `gym` tasks.


## The task

For this tutorial, we'll focus on one of the continuous-control environments under the `Box2D` group of `gym`
environments: [LunarLanderContinuous-v2](https://gym.openai.com/envs/LunarLanderContinuous-v2/). In this task, the goal
is to smoothly land a lunar module in a landing pad, as shown below.

![The LunarLanderContinuous-v2 task](../img/lunar_lander_continuous_demo.png).

To achieve this goal, we need to provide continuous control for a main engine and directional one (2 real values). In
order to solve the task, the expected reward is of at least 200 points. The controls for main and directional engines
are both in the range [-1.0, 1.0] and the observation space is composed of 8 scalars indicating `x` and `y` positions,
`x` and `y` velocities, lander angle and angular velocity, and left and right ground contact. Note that these 8 scalars
provide a full observation of the state.


## Implementation

For this tutorial, we'll use the readily available `gym_plugin`, which includes a
[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a
[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and
[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a
[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`
environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).

The experiment config, similar to the one used for the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:

```python
from typing import Dict, Optional, List, Any, cast

import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.ppo import PPO
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)
from allenact.utils.viz_utils import VizSuite, AgentViewViz


class GymTutorialExperimentConfig(ExperimentConfig):
    @classmethod
    def tag(cls) -> str:
        return "GymTutorial"
```
### Sensors and Model

As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide
full observations from the state of the `gym` environment to our model.

```python
    SENSORS = [
        GymBox2DSensor("LunarLanderContinuous-v2", uuid="gym_box_data"),
    ]
```
We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,
[MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since
this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`
instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a
[Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.

```python
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return MemorylessActorCritic(
            input_uuid="gym_box_data",
            action_space=gym.spaces.Box(
                -1.0, 1.0, (2,)
            ),  # 2 actors, each in the range [-1.0, 1.0]
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )
```
### Task samplers
We use an available `TaskSampler` implementation for `gym` environments that allows to sample
[GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):
[GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task
sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created
above, which contain a custom identifier for the actual observation space (`gym_box_data`) also used by the model.

```python
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(**kwargs)
```
For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three
modes, `train, valid, test`:

```python
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="train", seeds=seeds
        )

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="valid", seeds=seeds
        )

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)
```
Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,
during testing (or validation), we sample a fixed number of tasks.

```python
    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 3

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["LunarLanderContinuous-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )
```
Note that we just sample 3 tasks for validation and testing in this case, which suffice to illustrate the model's
success.

### Machine parameters

Given the simplicity of the task and model, we can just train the model on the CPU. During training, success should
reach 100% in less than 10 minutes, whereas solving the task (evaluation reward > 200) might take about 20 minutes
(on a laptop CPU).

We allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to
CPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode.

```python
    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        visualizer = None
        if mode == "test":
            visualizer = VizSuite(
                mode=mode,
                video_viz=AgentViewViz(
                    label="episode_vid",
                    max_clip_length=400,
                    vector_task_source=("render", {"mode": "rgb_array"}),
                    fps=30,
                ),
            )
        return {
            "nprocesses": 8 if mode == "train" else 1,
            "devices": [],
            "visualizer": visualizer,
        }
```
### Training pipeline

The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate
and 80 single-batch update repeats per rollout:

```python
    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        ppo_steps = int(1.2e6)
        return TrainingPipeline(
            named_losses=dict(
                ppo_loss=PPO(clip_param=0.2, value_loss_coef=0.5, entropy_coef=0.0,),
            ),  # type:ignore
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-3)),
            num_mini_batch=1,
            update_repeats=80,
            max_grad_norm=100,
            num_steps=2000,
            gamma=0.99,
            use_gae=False,
            gae_lambda=0.95,
            advance_scene_rollout_period=None,
            save_interval=200000,
            metric_accumulate_interval=50000,
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)},  # type:ignore
            ),
        )
```
## Training and validation

We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_tutorial.py`.
To start training from scratch, we just need to invoke

```bash
PYTHONPATH=. python allenact/main.py gym_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_output -s 54321 -e
```

from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.

If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/gym_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).

After 1,200,000 steps, the script will terminate. If everything went well, the `valid` success rate should quickly
converge to 1 and the mean reward to above 250, while the average episode length should stay below or near 300.

## Testing

The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the
directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:
```bash
PYTHONPATH=. python allenact/main.py gym_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_output \
-s 54321 \
-e \
--eval \
--checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \
--approx_ckpt_step_interval 800000 # Skip some checkpoints
```

The option `--approx_ckpt_step_interval 800000` tells AllenAct that we only want to evaluate checkpoints
which were saved every ~800000 steps, this lets us avoid evaluating every saved checkpoint. If everything went well,
the `test` success rate should converge to 1, the episode length below or near 300 steps, and the mean reward to above
250. The images tab in tensorboard will contain videos for the sampled test episodes.

![video_results](../img/lunar_lander_continuous_test.png).

If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running
remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display
available:

```bash
DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/gym_output \
-s 54321 \
-e \
--eval \
--checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \
--approx_ckpt_step_interval 800000
```


================================================
FILE: docs/tutorials/index.md
================================================
# AllenAct Tutorials

**Note** The provided commands to execute these tutorials assume you have
[installed the full library](../installation/installation-allenact.md#full-library)
and the specific requirements for each used plugin.

We provide several tutorials to help ramp up researchers to the field of Embodied-AI as well as to the AllenAct framework.

## [Navigation in MiniGrid](../tutorials/minigrid-tutorial.md)

![MiniGridEmptyRandom5x5 task example](../img/minigrid_environment.png)

We train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the [MiniGrid](https://github.com/maximecb/gym-minigrid) environment. 

This tutorial presents:

* Writing an experiment configuration file with a simple training pipeline from scratch.
* Using one of the supported environments with minimal user effort.
* Training, validation and testing your experiment from the command line.

[Follow the tutorial here.](../tutorials/minigrid-tutorial.md)


## [PointNav in RoboTHOR](../tutorials/training-a-pointnav-model.md)

![RoboTHOR Robot](../img/RoboTHOR_robot.jpg)

We train an agent on the Point Navigation task within the RoboTHOR Embodied-AI environment.

This tutorial presents:

* The basics of the Point Navigation task, a common task in Embodied AI
* Using an external dataset
* Writing an experiment configuration file with a simple training pipeline from scratch.
* Use one of the supported environments with minimal user effort.
* Train, validate and test your experiment from the command line.
* Testing a pre-trained model

[Follow the tutorial here.](../tutorials/training-a-pointnav-model.md)


## [Swapping in a new environment](../tutorials/transfering-to-a-different-environment-framework.md)

![Environment Transfer](../img/env_transfer.jpg)

This tutorial demonstrates how easy it is modify the experiment config created in the RoboTHOR PointNav tutorial to work with the iTHOR and Habitat environments. 

[Follow the tutorial here.](../tutorials/transfering-to-a-different-environment-framework.md)


## [Using a pretrained model](../tutorials/running-inference-on-a-pretrained-model.md)

![Pretrained inference](../img/viz_pretrained_2videos.jpg)

This tutorial shows how to run inference on one or more checkpoints of a pretrained model and generate
visualizations of different types.

[Follow the tutorial here.](../tutorials/running-inference-on-a-pretrained-model.md)


## [Off-policy training](../tutorials/offpolicy-tutorial.md)

This tutorial shows how to train an Actor using an off-policy dataset with expert actions.

[Follow the tutorial here.](../tutorials/offpolicy-tutorial.md)


## [OpenAI gym for continuous control](../tutorials/gym-tutorial.md)

![gym task example](../img/lunar_lander_continuous_demo.png)

We train an agent to complete the `LunarLanderContinuous-v2` task from
[OpenAI gym](https://gym.openai.com/envs/LunarLanderContinuous-v2). 

This tutorial presents:

* A `gym` plugin fopr `AllenAct`. 
* A continuous control example with multiple actors using PPO.

[Follow the tutorial here.](../tutorials/gym-tutorial.md)


<!---
## [OpenAI gym for MuJoCo](../tutorials/gym-mujoco-tutorial.md)

In this tutorial, we train a four-legged creature,
"ant", to walk forward as fast as possible in [MuJoCo](http://www.mujoco.org/).

[Follow the tutorial here.](../tutorials/gym-mujoco-tutorial.md)
--->


## [Multi-node training for RoboTHOR ObjectNav](../tutorials/distributed-objectnav-tutorial.md)

![training speedup](../img/multinode_training.jpg)

We train an agent to navigate to an object in a fraction of the time
required for training in one node by distributing training across multiple
nodes. 

This tutorial presents:

1. The AllenAct API for training across multiple nodes, as well as
experimental scripts for distributed configuration, training start
and termination, and remote command execution.
2. The introduction of the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenaAct`.

[Follow the tutorial here.](../tutorials/distributed-objectnav-tutorial.md)


================================================
FILE: docs/tutorials/minigrid-tutorial.md
================================================
<!-- DO NOT EDIT THIS FILE. --> 
<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/minigrid_tutorial.py', EDIT IT INSTEAD. -->

# Tutorial: Navigation in MiniGrid.
In this tutorial, we will train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the
[MiniGrid](https://github.com/maximecb/gym-minigrid) environment. We will demonstrate how to:

* Write an experiment configuration file with a simple training pipeline from scratch.
* Use one of the supported environments with minimal user effort.
* Train, validate and test your experiment from the command line.

This tutorial assumes the [installation instructions](../installation/installation-allenact.md) have already been
followed and that, to some extent, this framework's [abstractions](../getting_started/abstractions.md) are known.
The `extra_requirements` for `minigrid_plugin` and `babyai_plugin` can be installed with.

```bash
pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt; pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt
```

## The task
A `MiniGrid-Empty-Random-5x5-v0` task consists of a grid of dimensions 5x5 where an agent spawned at a random
location and orientation has to navigate to the visitable bottom right corner cell of the grid by sequences of three
possible actions (rotate left/right and move forward). A visualization of the environment with expert steps in a random
`MiniGrid-Empty-Random-5x5-v0` task looks like

![MiniGridEmptyRandom5x5 task example](../img/minigrid_environment.png)

The observation for the agent is a subset of the entire grid, simulating a simplified limited field of view, as
depicted by the highlighted rectangle (observed subset of the grid) around the agent (red arrow). Gray cells correspond
to walls.

## Experiment configuration file

Our complete experiment consists of:

* Training a basic actor-critic agent with memory to solve randomly sampled navigation tasks.
* Validation on a fixed set of tasks (running in parallel with training).
* A second stage where we test saved checkpoints with a larger fixed set of tasks.

The entire configuration for the experiment, including training, validation, and testing, is encapsulated in a single
class implementing the `ExperimentConfig` abstraction. For this tutorial, we will follow the config under
`projects/tutorials/minigrid_tutorial.py`.

The `ExperimentConfig` abstraction is used by the
[OnPolicyTrainer](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicytrainer) class (for training) and the
[OnPolicyInference](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicyinference) class (for validation and testing)
invoked through the entry script `main.py` that calls an orchestrating
[OnPolicyRunner](../api/allenact/algorithms/onpolicy_sync/runner.md#onpolicyrunner) class. It includes:

* A `tag` method to identify the experiment.
* A `create_model` method to instantiate actor-critic models.
* A `make_sampler_fn` method to instantiate task samplers.
* Three `{train,valid,test}_task_sampler_args` methods describing initialization parameters for task samplers used in
training, validation, and testing; including assignment of workers to devices for simulation.
* A `machine_params` method with configuration parameters that will be used for training, validation, and testing.
* A `training_pipeline` method describing a possibly multi-staged training pipeline with different types of losses,
an optimizer, and other parameters like learning rates, batch sizes, etc.

### Preliminaries

We first import everything we'll need to define our experiment.

```python
from typing import Dict, Optional, List, Any, cast

import gym
from gym_minigrid.envs import EmptyRandomEnv5x5
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)
from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvRNN
from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor
from allenact_plugins.minigrid_plugin.minigrid_tasks import (
    MiniGridTaskSampler,
    MiniGridTask,
)
```
We now create the `MiniGridTutorialExperimentConfig` class which we will use to define our experiment.
For pedagogical reasons, we will add methods to this class one at a time below with a description of what
these classes do.

```python
class MiniGridTutorialExperimentConfig(ExperimentConfig):
```
An experiment is identified by a `tag`.
```python
    @classmethod
    def tag(cls) -> str:
        return "MiniGridTutorial"
```
### Sensors and Model

A readily available Sensor type for MiniGrid,
[EgocentricMiniGridSensor](../api/allenact_plugins/minigrid_plugin/minigrid_sensors.md#egocentricminigridsensor),
allows us to extract observations in a format consumable by an `ActorCriticModel` agent:

```python
    SENSORS = [
        EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),
    ]
```
The three `view_channels` include objects, colors and states corresponding to a partial observation of the environment
as an image tensor, equivalent to that from `ImgObsWrapper` in
[MiniGrid](https://github.com/maximecb/gym-minigrid#wrappers). The
relatively large `agent_view_size` means the view will only be clipped by the environment walls in the forward and
lateral directions with respect to the agent's orientation.

We define our `ActorCriticModel` agent using a lightweight implementation with recurrent memory for MiniGrid
environments, [MiniGridSimpleConvRNN](../api/allenact_plugins/minigrid_plugin/minigrid_models.md#minigridsimpleconvrnn):

```python
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return MiniGridSimpleConvRNN(
            action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())),
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            num_objects=cls.SENSORS[0].num_objects,
            num_colors=cls.SENSORS[0].num_colors,
            num_states=cls.SENSORS[0].num_states,
        )
```
### Task samplers

We use an available TaskSampler implementation for MiniGrid environments that allows to sample both random and
deterministic `MiniGridTasks`,
[MiniGridTaskSampler](../api/allenact_plugins/minigrid_plugin/minigrid_tasks.md#minigridtasksampler):

```python
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return MiniGridTaskSampler(**kwargs)
```
This task sampler will during training (or validation/testing), randomly initialize new tasks for the agent to complete.
While it is not quite as important for this task type (as we test our agent in the same setting it is trained on) there
are a lot of good reasons we would like to sample tasks differently during training than during validation or testing.
One good reason, that is applicable in this tutorial, is that, during training, we would like to be able to sample tasks
forever while, during testing, we would like to sample a fixed number of tasks (as otherwise we would never finish
testing!). In `allenact` this is made possible by defining different arguments for the task sampler:

```python
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="train")

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="valid")

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test")
```
where, for convenience, we have defined a `_get_sampler_args` method:

```python
    def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 20 + 20 * (mode == "test")  # 20 tasks for valid, 40 for test

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            max_tasks=max_tasks,  # see above
            env_class=self.make_env,  # builder for third-party environment (defined below)
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            env_info=dict(),  # parameters for environment builder (none for now)
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
        )

    @staticmethod
    def make_env(*args, **kwargs):
        return EmptyRandomEnv5x5()
```
Note that the `env_class` argument to the Task Sampler is the one determining which task type we are going to train the
model for (in this case, `MiniGrid-Empty-Random-5x5-v0` from
[gym-minigrid](https://github.com/maximecb/gym-minigrid#empty-environment))
. The sparse reward is
[given by the environment](https://github.com/maximecb/gym-minigrid/blob/6e22a44dc67414b647063692258a4f95ce789161/gym_minigrid/minigrid.py#L819)
, and the maximum task length is 100. For training, we opt for a default random sampling, whereas for validation and
test we define fixed sets of randomly sampled tasks without needing to explicitly define a dataset.

In this toy example, the maximum number of different tasks is 32. For validation we sample 320 tasks using 16 samplers,
or 640 for testing, so we can be fairly sure that all possible tasks are visited at least once during evaluation.

### Machine parameters

Given the simplicity of the task and model, we can quickly train the model on the CPU:

```python
    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        return {
            "nprocesses": 128 if mode == "train" else 16,
            "devices": [],
        }
```
We allocate a larger number of samplers for training (128) than for validation or testing (16), and we default to CPU
usage by returning an empty list of `devices`.

### Training pipeline

The last definition required before starting to train is a training pipeline. In this case, we just use a single PPO
stage with linearly decaying learning rate:

```python
    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        ppo_steps = int(150000)
        return TrainingPipeline(
            named_losses=dict(ppo_loss=PPO(**PPOConfig)),  # type:ignore
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
            num_mini_batch=4,
            update_repeats=3,
            max_grad_norm=0.5,
            num_steps=16,
            gamma=0.99,
            use_gae=True,
            gae_lambda=0.95,
            advance_scene_rollout_period=None,
            save_interval=10000,
            metric_accumulate_interval=1,
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}  # type:ignore
            ),
        )
```
You can see that we use a `Builder` class to postpone the construction of some of the elements, like the optimizer,
for which the model weights need to be known.

## Training and validation

We have a complete implementation of this experiment's configuration class in `projects/tutorials/minigrid_tutorial.py`.
To start training from scratch, we just need to invoke

```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o /PATH/TO/minigrid_output -s 12345
```

from the `allenact` root directory.

* With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file
will be found in the `projects/tutorials` directory.
* With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers).
* With `-o minigrid_output` we set the output folder into which results and logs will be saved.
* With `-s 12345` we set the random seed.

If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/minigrid_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).

After 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder.
The training curves should look similar to:

![training curves](../img/minigrid_train.png)

If everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4.
(For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the
not-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example
with a different random seed). The validation curves should look similar to:

![validation curves](../img/minigrid_valid.png)

## Testing

The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a particular checkpoint, we need to pass the `--eval` flag and specify the checkpoint with the
`--checkpoint CHECKPOINT_PATH` option:
```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial \
-b projects/tutorials \
-m 1 \
-o /PATH/TO/minigrid_output \
-s 12345 \
--eval \
--checkpoint /PATH/TO/minigrid_output/checkpoints/MiniGridTutorial/YOUR_START_DATE/exp_MiniGridTutorial__stage_00__steps_000000151552.pt
```

Again, if everything went well, the `test` success rate should converge to 1 and the mean episode length to a value
below 4. Detailed results are saved under a `metrics` subfolder in the output folder.
The test curves should look similar to:

![test curves](../img/minigrid_test.png)


================================================
FILE: docs/tutorials/offpolicy-tutorial.md
================================================
<!-- DO NOT EDIT THIS FILE. --> 
<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/minigrid_offpolicy_tutorial.py', EDIT IT INSTEAD. -->

# Tutorial: Off-policy training.
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the `extra_requirements`
for the `babyai_plugin` and `minigrid_plugin`. The latter can be installed with:

```bash
pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt; pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt
```

In this tutorial we'll learn how to train an agent from an external dataset by imitating expert actions via
Behavior Cloning. We'll use a [BabyAI agent](/api/allenact_plugins/babyai_plugin/babyai_models#BabyAIRecurrentACModel) to solve
`GoToLocal` tasks on [MiniGrid](https://github.com/maximecb/gym-minigrid); see the
`projects/babyai_baselines/experiments/go_to_local` directory for more details.

This tutorial assumes `AllenAct`'s [abstractions](../getting_started/abstractions.md) are known.

## The task

In a `GoToLocal` task, the agent immersed in a grid world has to navigate to a specific object in the presence of
multiple distractors, requiring the agent to understand `go to` instructions like "go to the red ball". For further
details, please consult the [original paper](https://arxiv.org/abs/1810.08272).

## Getting the dataset

We will use a large dataset (**more than 4 GB**) including expert demonstrations for `GoToLocal` tasks. To download
the data we'll run

```bash
PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py GoToLocal
```

from the project's root directory, which will download `BabyAI-GoToLocal-v0.pkl` and `BabyAI-GoToLocal-v0_valid.pkl` to
the `allenact_plugins/babyai_plugin/data/demos` directory.

We will also generate small versions of the datasets, which will be useful if running on CPU, by calling

```bash
PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py
```
from the project's root directory, which will generate `BabyAI-GoToLocal-v0-small.pkl` under the same
`allenact_plugins/babyai_plugin/data/demos` directory.

## Data storage

In order to train with an off-policy dataset, we need to define an `ExperienceStorage`. In AllenAct, an
`ExperienceStorage` object has two primary functions:
1. It stores/manages relevant data (e.g. similarly to the `Dataset` class in PyTorch).
2. It loads stored data into batches that will be used for loss computation (e.g. similarly to the `Dataloader`
class in PyTorch).
Unlike a PyTorch `Dataset` however, an `ExperienceStorage` object can build its dataset **at runtime** by processing
rollouts from the agent. This flexibility allows for us to, for exmaple, implement the experience replay datastructure
used in deep Q-learning. For this tutorial we won't need this additional functionality as our off-policy dataset
is a fixed collection of expert trajectories.

An example of a `ExperienceStorage` for BabyAI expert demos might look as follows:

```python
class MiniGridExpertTrajectoryStorage(ExperienceStorage, StreamingStorageMixin):
    def __init__(
        self,
        data_path: str,
        num_samplers: int,
        rollout_len: int,
        instr_len: Optional[int],
        restrict_max_steps_in_dataset: Optional[int] = None,
        device: torch.device = torch.device("cpu"),
    ):
        ...

    def data(self) -> List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]:
        ...

    def set_partition(self, index: int, num_parts: int):
        ...

    def initialize(self, *, observations: ObservationType, **kwargs):
        ...

    def add(
        self,
        observations: ObservationType,
        memory: Optional[Memory],
        actions: torch.Tensor,
        action_log_probs: torch.Tensor,
        value_preds: torch.Tensor,
        rewards: torch.Tensor,
        masks: torch.Tensor,
    ):
        ...

    def to(self, device: torch.device):
        ...

    def total_experiences(self) -> int:
        ...

    def reset_stream(self):
        ...

    def empty(self) -> bool:
        ...

    def _get_next_ind(self):
        ...

    def _fill_rollout_queue(self, q: queue.Queue, sampler: int):
        ...

    def get_data_for_rollout_ind(self, sampler_ind: int) -> Dict[str, np.ndarray]:
        ...

    def next_batch(self) -> Dict[str, torch.Tensor]:
        ...
```
A complete example can be found in
[MiniGridExpertTrajectoryStorage](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridExpertTrajectoryStorage).

## Loss function

Off-policy losses must implement the
[`GenericAbstractLoss`](/api/allenact/base_abstractions/misc/#genericabstractloss)
interface. In this case, we minimize the cross-entropy between the actor's policy and the expert action:

```python
class MiniGridOffPolicyExpertCELoss(GenericAbstractLoss):
    def __init__(self, total_episodes_in_epoch: Optional[int] = None):
        super().__init__()
        self.total_episodes_in_epoch = total_episodes_in_epoch

    def loss(  # type: ignore
        self,
        *,  # No positional arguments
        model: ModelType,
        batch: ObservationType,
        batch_memory: Memory,
        stream_memory: Memory,
    ) -> LossOutput:
        rollout_len, nrollouts = cast(torch.Tensor, batch["minigrid_ego_image"]).shape[
            :2
        ]

        # Initialize Memory if empty
        if len(stream_memory) == 0:
            spec = model.recurrent_memory_specification
            for key in spec:
                dims_template, dtype = spec[key]
                # get sampler_dim and all_dims from dims_template (and nrollouts)

                dim_names = [d[0] for d in dims_template]
                sampler_dim = dim_names.index("sampler")

                all_dims = [d[1] for d in dims_template]
                all_dims[sampler_dim] = nrollouts

                stream_memory.check_append(
                    key=key,
                    tensor=torch.zeros(
                        *all_dims,
                        dtype=dtype,
                        device=cast(torch.Tensor, batch["minigrid_ego_image"]).device,
                    ),
                    sampler_dim=sampler_dim,
                )

        # Forward data (through the actor and critic)
        ac_out, stream_memory = model.forward(
            observations=batch,
            memory=stream_memory,
            prev_actions=None,  # type:ignore
            masks=cast(torch.FloatTensor, batch["masks"]),
        )

        # Compute the loss from the actor's output and expert action
        expert_ce_loss = -ac_out.distributions.log_prob(batch["expert_action"]).mean()

        info = {"expert_ce": expert_ce_loss.item()}

        return LossOutput(
            value=expert_ce_loss,
            info=info,
            per_epoch_info={},
            batch_memory=batch_memory,
            stream_memory=stream_memory,
            bsize=rollout_len * nrollouts,
        )

```
A complete example can be found in
[MiniGridOffPolicyExpertCELoss](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridOffPolicyExpertCELoss).
Note that in this case we train the entire actor, but it would also be possible to forward data through a different
subgraph of the ActorCriticModel.

## Experiment configuration

For the experiment configuration, we'll build on top of an existing
[base BabyAI GoToLocal Experiment Config](/api/projects/babyai_baselines/experiments/go_to_local/base/#basebabyaigotolocalexperimentconfig).
The complete `ExperimentConfig` file for off-policy training is
[here](/api/projects/tutorials/minigrid_offpolicy_tutorial/#bcoffpolicybabyaigotolocalexperimentconfig), but let's
focus on the most relevant aspect to enable this type of training:
providing an [OffPolicyPipelineComponent](/api/allenact/utils/experiment_utils/#offpolicypipelinecomponent) object as input to a
`PipelineStage` when instantiating the `TrainingPipeline` in the `training_pipeline` method.

```python
class BCOffPolicyBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
    """BC Off-policy imitation."""

    DATASET: Optional[List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]] = None

    GPU_ID = 0 if torch.cuda.is_available() else None

    @classmethod
    def tag(cls):
        return "BabyAIGoToLocalBCOffPolicy"

    @classmethod
    def METRIC_ACCUMULATE_INTERVAL(cls):
        # See BaseBabyAIGoToLocalExperimentConfig for how this is used.
        return 1

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
        ppo_info = cls.rl_loss_default("ppo", steps=-1)

        num_mini_batch = ppo_info["num_mini_batch"]
        update_repeats = ppo_info["update_repeats"]

        # fmt: off
        return cls._training_pipeline(
            named_losses={
                "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss(
                    total_episodes_in_epoch=int(1e6)
                ),
            },
            named_storages={
                "onpolicy": RolloutBlockStorage(),
                "minigrid_offpolicy_expert": MiniGridExpertTrajectoryStorage(
                    data_path=os.path.join(
                                BABYAI_EXPERT_TRAJECTORIES_DIR,
                                "BabyAI-GoToLocal-v0{}.pkl".format(
                                    "" if torch.cuda.is_available() else "-small"
                                ),
                            ),
                    num_samplers=cls.NUM_TRAIN_SAMPLERS,
                    rollout_len=cls.ROLLOUT_STEPS,
                    instr_len=cls.INSTR_LEN,
                ),
            },
            pipeline_stages=[
                # Single stage, only with off-policy training
                PipelineStage(
                    loss_names=["offpolicy_expert_ce_loss"],                                              # no on-policy losses
                    max_stage_steps=total_train_steps,                          # keep sampling episodes in the stage
                    stage_components=[
                        StageComponent(
                            uuid="offpolicy",
                            storage_uuid="minigrid_offpolicy_expert",
                            loss_names=["offpolicy_expert_ce_loss"],
                            training_settings=TrainingSettings(
                                update_repeats=num_mini_batch * update_repeats,
                                num_mini_batch=1,
                            )
                        )
                    ],
                ),
            ],
            # As we don't have any on-policy losses, we set the next
            # two values to zero to ensure we don't attempt to
            # compute gradients for on-policy rollouts:
            num_mini_batch=0,
            update_repeats=0,
            total_train_steps=total_train_steps,
        )
        # fmt: on
```
You'll have noted that it is possible to combine on-policy and off-policy training in the same stage, even though here
we apply pure off-policy training.

## Training

We recommend using a machine with a CUDA-capable GPU for this experiment. In order to start training, we just need to
invoke

```bash
PYTHONPATH=. python allenact/main.py -b projects/tutorials minigrid_offpolicy_tutorial -m 8 -o <OUTPUT_PATH>
```

Note that with the `-m 8` option we limit to 8 the number of on-policy task sampling processes used between off-policy
updates.

If everything goes well, the training success should quickly reach values around 0.7-0.8 on GPU and converge to values
close to 1 if given sufficient time to train.

If running tensorboard, you'll notice a separate group of scalars named `train-offpolicy-losses` and
`train-offpolicy-misc` with losses, approximate "experiences per second" (i.e. the number of off-policy experiences/steps
being used to update the model per second), and other tracked values in addition to the standard `train-onpolicy-*`
used for on-policy training. In the `train-metrics` and `train-misc` sections you'll find the metrics
quantifying the performance of the agent throughout training and some other plots showing training details.
*Note that the x-axis for these plots is different than for the `train-offpolicy-*` sections*. This
is because these plots use the number of rollout steps as the x-axis (i.e. steps that the trained agent
takes interactively) while the `train-offpolicy-*` plots uses the number of offpolicy "experiences" that have
been shown to the agent.


A view of the training progress about 5 hours after starting on a CUDA-capable GPU should look similar to the below
(note that training reached >99% success after about 50 minutes).

![off-policy progress](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/minigrid-offpolicy/minigrid-offpolicy-tutorial-tb.png)


================================================
FILE: docs/tutorials/running-inference-on-a-pretrained-model.md
================================================
<!-- DO NOT EDIT THIS FILE. --> 
<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/running_inference_tutorial.py', EDIT IT INSTEAD. -->

# Tutorial: Inference with a pre-trained model.
In this tutorial we will run inference on a pre-trained model for the PointNav task
in the RoboTHOR environment. In this task the agent is tasked with going to a specific location
within a realistic 3D environment.

For information on how to train a PointNav Model see [this tutorial](training-a-pointnav-model.md)

We will need to [install the full AllenAct library](../installation/installation-allenact.md#full-library),
the `robothor_plugin` requirements via

```bash
pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt
```

and [download the
RoboTHOR Pointnav dataset](../installation/download-datasets.md) before we get started.

For this tutorial we will download the weights of a model trained on the debug dataset.
This can be done with a handy script in the `pretrained_model_ckpts` directory:
```bash
bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-pointnav-rgb-resnet
```
This will download the weights for an RGB model that has been
trained on the PointNav task in RoboTHOR to `pretrained_model_ckpts/robothor-pointnav-rgb-resnet`


Next we need to run the inference, using the PointNav experiment config from the
[tutorial on making a PointNav experiment](training-a-pointnav-model.md).
We can do this with the following command:

```bash
PYTHONPATH=. python allenact/main.py -o <PATH_TO_OUTPUT> -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> -c <PATH_TO_CHECKPOINT> --eval
```

Where `<PATH_TO_OUTPUT>` is the location where the results of the test will be dumped, `<PATH_TO_CHECKPOINT>` is the
location of the downloaded model weights, and `<BASE_DIRECTORY_OF_YOUR_EXPERIMENT>` is a path to the directory where
our experiment definition is stored.

For our current setup the following command would work:

```bash
PYTHONPATH=. python allenact/main.py \
training_a_pointnav_model \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \
--eval
```

For testing on all saved checkpoints we pass a directory to `--checkpoint` rather than just a single file:

```bash
PYTHONPATH=. python allenact/main.py \
training_a_pointnav_model \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials  \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30
--eval
```
## Visualization

We also show examples of visualizations that can be extracted from the `"valid"` and `"test"` modes. Currently,
visualization is still undergoing design changes and does not support multi-agent tasks, but the available functionality
is sufficient for pointnav in RoboThor.

Following up on the example above, we can make a specialized pontnav `ExperimentConfig` where we instantiate
the base visualization class, `VizSuite`, defined in
[`allenact.utils.viz_utils`](https://github.com/allenai/allenact/tree/master/allenact/utils/viz_utils.py), when in `test` mode.

Each visualization type can be thought of as a plugin to the base `VizSuite`. For example, all `episode_ids` passed to
`VizSuite` will be processed with each of the instantiated visualization types (possibly with the exception of the
`AgentViewViz`). In the example below we show how to instantiate different visualization types from 4 different data
sources.

The data sources available to `VizSuite` are:

* Task output (e.g. 2D trajectories)
* Vector task (e.g. egocentric views)
* Rollout storage (e.g. recurrent memory, taken action logprobs...)
* `ActorCriticOutput` (e.g. action probabilities)

The visualization types included below are:

* `TrajectoryViz`: Generic 2D trajectory view.
* `AgentViewViz`: RGB egocentric view.
* `ActorViz`: Action probabilities from `ActorCriticOutput[CategoricalDistr]`.
* `TensorViz1D`: Evolution of a point from RolloutStorage over time.
* `TensorViz2D`: Evolution of a vector from RolloutStorage over time.
* `ThorViz`: Specialized 2D trajectory view
[for RoboThor](https://github.com/allenai/allenact/tree/master/allenact_plugins/robothor_plugin/robothor_viz.py).

Note that we need to explicitly set the `episode_ids` that we wish to visualize. For `AgentViewViz` we have the option
of using a different (typically shorter) list of episodes or enforce the ones used for the rest of visualizations.

```python
class PointNavRoboThorRGBPPOVizExperimentConfig(PointNavRoboThorRGBPPOExperimentConfig):
    """ExperimentConfig used to demonstrate how to set up visualization code.

    # Attributes

    viz_ep_ids : Scene names that will be visualized.
    viz_video_ids : Scene names that will have videos visualizations associated with them.
    """

    viz_ep_ids = [
        "FloorPlan_Train1_1_3",
        "FloorPlan_Train1_1_4",
        "FloorPlan_Train1_1_5",
        "FloorPlan_Train1_1_6",
    ]
    viz_video_ids = [["FloorPlan_Train1_1_3"], ["FloorPlan_Train1_1_4"]]

    viz: Optional[VizSuite] = None

    def get_viz(self, mode):
        if self.viz is not None:
            return self.viz

        self.viz = VizSuite(
            episode_ids=self.viz_ep_ids,
            mode=mode,
            # Basic 2D trajectory visualizer (task output source):
            base_trajectory=TrajectoryViz(
                path_to_target_location=("task_info", "target",),
            ),
            # Egocentric view visualizer (vector task source):
            egeocentric=AgentViewViz(
                max_video_length=100, episode_ids=self.viz_video_ids
            ),
            # Default action probability visualizer (actor critic output source):
            action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),
            # Default taken action logprob visualizer (rollout storage source):
            taken_action_logprobs=TensorViz1D(),
            # Same episode mask visualizer (rollout storage source):
            episode_mask=TensorViz1D(rollout_source=("masks",)),
            # Default recurrent memory visualizer (rollout storage source):
            rnn_memory=TensorViz2D(rollout_source=("memory", "single_belief")),
            # Specialized 2D trajectory visualizer (task output source):
            thor_trajectory=ThorViz(
                figsize=(16, 8),
                viz_rows_cols=(448, 448),
                scenes=("FloorPlan_Train{}_{}", 1, 1, 1, 1),
            ),
        )

        return self.viz

    def machine_params(self, mode="train", **kwargs):
        res = super().machine_params(mode, **kwargs)
        if mode == "test":
            res.set_visualizer(self.get_viz(mode))

        return res
```
Running test on the same downloaded models, but using the visualization-enabled `ExperimentConfig` with

```bash
PYTHONPATH=. python allenact/main.py \
running_inference_tutorial \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \
--eval
```

generates different types of visualization and logs them in tensorboard. If everything is properly setup and
tensorboard includes the `robothor-pointnav-rgb-resnet` folder, under the `IMAGES` tab, we should see something similar
to

![Visualization example](../img/viz_pretrained_2videos.jpg)


================================================
FILE: docs/tutorials/training-a-pointnav-model.md
================================================
<!-- DO NOT EDIT THIS FILE. --> 
<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/training_a_pointnav_model.py', EDIT IT INSTEAD. -->

# Tutorial: PointNav in RoboTHOR.
![RoboTHOR Robot](../img/RoboTHOR_robot.jpg)
## Introduction
One of the most obvious tasks that an embodied agent should master is navigating the world it inhabits.
Before we can teach a robot to cook or clean it first needs to be able to move around. The simplest
way to formulate "moving around" into a task is by making your agent find a beacon somewhere in the environment.
This beacon transmits its location, such that at any time, the agent can get the direction and euclidian distance
to the beacon. This particular task is often called Point Navigation, or **PointNav** for short.

#### PointNav
At first glance, this task seems trivial. If the agent is given the direction and distance of the target at
all times, can it not simply follow this signal directly? The answer is no, because agents are often trained
on this task in environments that emulate real-world buildings which are not wide-open spaces, but rather
contain many smaller rooms. Because of this, the agent has to learn to navigate human spaces and use doors
and hallways to efficiently navigate from one side of the building to the other. This task becomes particularly
difficult when the agent is tested in an environment that it is not trained in. If the agent does not know
how the floor plan of an environment looks, it has to learn to predict the design of man-made structures,
to efficiently navigate across them, much like how people instinctively know how to move around a building
they have never seen before based on their experience navigating similar buildings.

#### What is an environment anyways?
Environments are worlds in which embodied agents exist. If our embodied agent is simply a neural network that is being
trained in a simulator, then that simulator is its environment. Similarly, if our agent is a
physical robot then its environment is the real world. The agent interacts with the environment by taking one
of several available actions (such as "move forward", or "turn left"). After each action, the environment
produces a new frame that the agent can analyze to determine its next step. For many tasks, including PointNav
the agent also has a special "stop" action which indicates that the agent thinks it has reached the target.
After this action is called the agent will be reset to a new location, regardless if it reached the
target. The hope is that after enough training the agent will learn to correctly assess that it has successfully
navigated to the target.

![RoboTHOR Sim vs. Real](../img/RoboTHOR_sim_real.jpg)

There are many simulators designed for the training
of embodied agents. In this tutorial, we will be using a simulator called [RoboTHOR](https://ai2thor.allenai.org/robothor/),
which is designed specifically to train models that can easily be transferred to a real robot, by providing a
photo-realistic virtual environment and a real-world replica of the environment that researchers can have access to.
RoboTHOR contains 60 different virtual scenes with different floor plans and furniture and 15 validation scenes.

It is also important to mention that **AllenAct**
has a class abstraction called Environment. This is not the actual simulator game engine or robotics controller,
but rather a shallow wrapper that provides a uniform interface to the actual environment.

#### Learning algorithm
Finally, let us briefly touch on the algorithm that we will use to train our embodied agent to navigate. While
*AllenAct* offers us great flexibility to train models using complex pipelines, we will be using a simple
pure reinforcement learning approach for this tutorial. More specifically, we will be using DD-PPO,
a decentralized and distributed variant of the ubiquitous PPO algorithm. For those unfamiliar with Reinforcement
Learning we highly recommend [this tutorial](http://karpathy.github.io/2016/05/31/rl/) by Andrej Karpathy, and [this
book](http://www.incompleteideas.net/book/the-book-2nd.html) by Sutton and Barto. Essentially what we are doing
is letting our agent explore the environment on its own, rewarding it for taking actions that bring it closer
to its goal and penalizing it for actions that take it away from its goal. We then optimize the agent's model
to maximize this reward.

## Requirements
To train the model on the PointNav task, we need to [install the RoboTHOR environment](../installation/installation-framework.md)
and [download the RoboTHOR PointNav dataset](../installation/download-datasets.md)

The dataset contains a list of episodes with thousands of randomly generated starting positions and target locations for each of the scenes
as well as a precomputed cache of distances, containing the shortest path from each point in a scene, to every other point in that scene.
This is used to reward the agent for moving closer to the target in terms of geodesic distance - the actual path distance (as opposed to a
straight line distance).

## Config File Setup
Now comes the most important part of the tutorial, we are going to write an experiment config file.
If this is your first experience with experiment config files in AllenAct, we suggest that you
first see our how-to on [defining an experiment](../howtos/defining-an-experiment.md) which will
walk you through creating a simplified experiment config file.

Unlike a library that can be imported into python, **AllenAct** is structured as a framework with a runner script called
`main.py` which will run the experiment specified in a config file. This design forces us to keep meticulous records of
exactly which settings were used to produce a particular result,
which can be very useful given how expensive RL models are to train.

The `projects/` directory is home to different projects using `AllenAct`. Currently it is populated with baselines
of popular tasks and tutorials.

We already have all the code for this tutorial stored in `projects/tutorials/training_a_pointnav_model.py`. We will
be using this file to run our experiments, but you can create a new directory in `projects/` and start writing your
experiment there.

We start off by importing everything we will need:

```python
import glob
import os
from math import ceil
from typing import Dict, Any, List, Optional, Sequence

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.utils.experiment_utils import (
    Builder,
    PipelineStage,
    TrainingPipeline,
    LinearDecay,
    evenly_distribute_count_into_bins,
)
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
    PointNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
from projects.pointnav_baselines.models.point_nav_models import (
    ResnetTensorPointNavActorCritic,
)
```
Next we define a new experiment config class:
```python
class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig):
    """A Point Navigation experiment configuration in RoboThor."""
```
We then define the task parameters. For PointNav, these include the maximum number of steps our agent
can take before being reset (this prevents the agent from wandering on forever), and a configuration
for the reward function that we will be using.

```python
    # Task Parameters
    MAX_STEPS = 500
    REWARD_CONFIG = {
        "step_penalty": -0.01,
        "goal_success_reward": 10.0,
        "failed_stop_reward": 0.0,
        "shaping_weight": 1.0,
    }
```
In this case, we set the maximum number of steps to 500.
We give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal
in as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination.
If the agent selects the `stop` action without reaching the target we do not punish it (although this is
sometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves
closer to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should
be and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around
with them.

Next, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render
every frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set
to a 224 by 224 box).

```python
    # Simulator Parameters
    CAMERA_WIDTH = 640
    CAMERA_HEIGHT = 480
    SCREEN_SIZE = 224
```
Next, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel
processes that will be used to train the model. In general, more processes result in faster training,
but since each process is a unique instance of the environment in which we are training they can take up a
lot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may
need to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to
be the number of training scenes in RoboTHOR, which allows each process to load only a single scene into
memory, saving time and space.

`TRAINING_GPUS` takes the ids of the GPUS on which
the model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which
the validation and testing will occur. During training, a validation process is constantly running and evaluating
the current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea.
If our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default
to running everything on the CPU with only 1 process.

```python
    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    NUM_PROCESSES = 20
    TRAINING_GPUS: Sequence[int] = [0]
    VALIDATION_GPUS: Sequence[int] = [0]
    TESTING_GPUS: Sequence[int] = [0]
```
Since we are using a dataset to train our model we need to define the path to where we have stored it. If we
download the dataset instructed above we can define the path as follows

```python
    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug")
    VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug")
```
Next, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the
raw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we
specified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks
the point our agent needs to move to. It tells us the direction and distance to our goal at every time step.

```python
    SENSORS = [
        RGBSensorThor(
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]
```
For the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct*
the preprocessor abstraction is designed with large models with frozen weights in mind. These models often
hail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a
complex embedding, which then gets stored and used as input to our trainable model instead of the original image.
Most other preprocessing work is done in the sensor classes (as we just saw with the RGB
sensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should
use this abstraction.

```python
    PREPROCESSORS = [
        Builder(
            ResNetPreprocessor,
            {
                "input_height": SCREEN_SIZE,
                "input_width": SCREEN_SIZE,
                "output_width": 7,
                "output_height": 7,
                "output_dims": 512,
                "pool": False,
                "torchvision_resnet_model": models.resnet18,
                "input_uuids": ["rgb_lowres"],
                "output_uuid": "rgb_resnet",
            },
        ),
    ]
```
Next, we must define all of the observation inputs that our model will use. These are just
the hardcoded ids of the sensors we are using in the experiment.

```python
    OBSERVATIONS = [
        "rgb_resnet",
        "target_coordinates_ind",
    ]
```
Finally, we must define the settings of our simulator. We set the camera dimensions to the values
we defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a
turn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the
agent moves forward, it will do so by 0.25 meters.

```python
    ENV_ARGS = dict(
        width=CAMERA_WIDTH,
        height=CAMERA_HEIGHT,
        rotateStepDegrees=30.0,
        visibilityDistance=1.0,
        gridSize=0.25,
    )
```
Now we move on to the methods that we must define to finish implementing an experiment config. Firstly we
have a simple method that just returns the name of the experiment.

```python
    @classmethod
    def tag(cls):
        return "PointNavRobothorRGBPPO"
```
Next, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms
we will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4.
We specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters
respectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval`
sets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how
often we save the model weights and run validation on them.

```python
    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(250000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 3
        num_steps = 30
        save_interval = 5000000
        log_interval = 1000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )
```
The `machine_params` method returns the hardware parameters of each
process, based on the list of devices we defined above.

```python
    def machine_params(self, mode="train", **kwargs):
        sampler_devices: List[int] = []
        if mode == "train":
            workers_per_device = 1
            gpu_ids = (
                []
                if not torch.cuda.is_available()
                else list(self.TRAINING_GPUS) * workers_per_device
            )
            nprocesses = (
                8
                if not torch.cuda.is_available()
                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
            )
            sampler_devices = list(self.TRAINING_GPUS)
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS
        elif mode == "test":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        sensor_preprocessor_graph = (
            SensorPreprocessorGraph(
                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
                preprocessors=self.PREPROCESSORS,
            )
            if mode == "train"
            or (
                (isinstance(nprocesses, int) and nprocesses > 0)
                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
            )
            else None
        )

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sampler_devices=sampler_devices
            if mode == "train"
            else gpu_ids,  # ignored with > 1 gpu_ids
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )
```
Now we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch,
so any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which
unsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different
actions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space`
We specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define
the size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and
distance to the target) with `goal_dims`.

```python
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ResnetTensorPointNavActorCritic(
            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            goal_sensor_uuid="target_coordinates_ind",
            rgb_resnet_preprocessor_uuid="rgb_resnet",
            hidden_size=512,
            goal_dims=32,
        )
```
We also need to define the task sampler that we will be using. This is a piece of code that generates instances
of tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting
our tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets
the agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the
`stop` action.

```python
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return PointNavDatasetTaskSampler(**kwargs)
```
You might notice that we did not specify the task sampler's arguments, but are rather passing them in. The
reason for this is that each process will have its own task sampler, and we need to specify exactly which scenes
each process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this.

```python
    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
            np.int32
        )

    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
        if len(scenes) == 0:
            raise RuntimeError(
                (
                    "Could find no scene dataset information in directory {}."
                    " Are you sure you've downloaded them? "
                    " If not, see https://allenact.org/installation/download-datasets/ information"
                    " on how this can be done."
                ).format(scenes_dir)
            )
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
            "rewards_config": self.REWARD_CONFIG,
        }
```
The very last things we need to define are the sampler arguments themselves. We define them separately for a train,
validation, and test sampler, but in this case, they are almost the same. The arguments need to include the location
of the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above
and are just referencing here. The only consequential differences between these task samplers are the path to the dataset
we are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since
we want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of
RoboTHOR are private we are also testing on our validation set.

```python
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.TRAIN_DATASET_DIR
        res["loop_dataset"] = True
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        res["allow_flipping"] = True
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        return res
```
This is it! If we copy all of the code into a file we should be able to run our experiment!

## Training Model On Debug Dataset
We can test if our installation worked properly by training our model on a small dataset of 4 episodes. This
should take about 20 minutes on a computer with a NVIDIA GPU.

We can now train a model by running:
```bash
PYTHONPATH=. python allenact/main.py -o <PATH_TO_OUTPUT> -c -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> <EXPERIMENT_NAME>
```
If using the same configuration as we have set up, the following command should work:
```bash
PYTHONPATH=. python allenact/main.py training_a_pointnav_model -o storage/robothor-pointnav-rgb-resnet-resnet -b projects/tutorials
```
If we start up a tensorboard server during training and specify that `output_dir=storage` the output should look
something like this:
![tensorboard output](../img/point-nav-baseline-tb.png)

## Training Model On Full Dataset
We can also train the model on the full dataset by changing back our dataset path and running the same command as above.
But be aware, training this takes nearly 2 days on a machine with 8 GPU.

## Testing Model
To test the performance of a model please refer to [this tutorial](running-inference-on-a-pretrained-model.md).

## Conclusion
In this tutorial, we learned how to create a new PointNav experiment using **AllenAct**. There are many simple
and obvious ways to modify the experiment from here - changing the model, the learning algorithm and the environment
each requires very few lines of code changed in the above file, allowing us to explore our embodied ai research ideas
across different frameworks with ease.


================================================
FILE: docs/tutorials/training-pipelines.md
================================================
# Tutorial: IL to RL with a training pipeline


================================================
FILE: docs/tutorials/transfering-to-a-different-environment-framework.md
================================================
# Tutorial: Swapping in a new environment

**Note** The provided paths in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library).

## Introduction
This tutorial was designed as a continuation of the `Robothor PointNav Tutorial` and explains
how to modify the experiment config created in that tutorial to work with the iTHOR and
Habitat environments.

Cross-platform support is one of the key design goals of `allenact`. This is achieved through
a total decoupling of the environment code from the engine, model and algorithm code, so that
swapping in a new environment is as plug and play as possible. Crucially we will be able to 
run a model on different environments without touching the model code at all, which will allow
us to train neural networks in one environment and test them in another.

## RoboTHOR to iTHOR
![iTHOR Framework](../img/iTHOR_framework.jpg)
Since both the `RoboTHOR` and the `iTHOR` environment stem from the same family and are developed
by the same organization, switching between the two is incredibly easy. We only have to change
the path parameter to point to an iTHOR dataset rather than the RoboTHOR one.

```python
    # Dataset Parameters
    TRAIN_DATASET_DIR = "datasets/ithor-pointnav/train"
    VAL_DATASET_DIR = "datasets/ithor-pointnav/val"
```

We also have to download the `iTHOR-PointNav` dataset, following [these instructions](../installation/download-datasets.md).

We might also want to modify the `tag` method to accurately reflect our config but this will not change
the behavior at all and is merely a bookkeeping convenience.
```python
    @classmethod
    def tag(cls):
        return "PointNavRobothorRGBPPO"
```

## RoboTHOR to Habitat
![Habitat Framework](../img/habitat_framework.jpg)

To train experiments using the Habitat framework we need to install it following [these instructions](../installation/installation-framework.md).

Since the roboTHOR and Habitat simulators are sufficiently different and have different parameters to configure
this transformation takes a bit more effort, but we only need to modify the environment config and TaskSampler (we
have to change the former because the habitat simulator accepts a different format of configuration and the latter
because the habitat dataset is formatted differently and thus needs to be parsed differently.)

As part of our environment modification, we need to switch from using RoboTHOR sensors to using Habitat sensors.
The implementation of sensors we provide offer an uniform interface across all the environments so we simply have
to swap out our sensor classes:
```python
    SENSORS = [
        DepthSensorHabitat(
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_normalization=True,
        ),
        TargetCoordinatesSensorHabitat(coordinate_dims=2),
    ]
```

Next we need to define the simulator config:

```python
    CONFIG = get_habitat_config("configs/gibson.yaml")
    CONFIG.defrost()
    CONFIG.NUM_PROCESSES = NUM_PROCESSES
    CONFIG.SIMULATOR_GPU_IDS = TRAIN_GPUS
    CONFIG.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR
    CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = ["*"]
    CONFIG.DATASET.DATA_PATH = TRAIN_SCENES
    CONFIG.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"]
    CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH
    CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT
    CONFIG.SIMULATOR.TURN_ANGLE = 30
    CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25
    CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS

    CONFIG.TASK.TYPE = "Nav-v0"
    CONFIG.TASK.SUCCESS_DISTANCE = 0.2
    CONFIG.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR"
    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2
    CONFIG.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass"
    CONFIG.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"]
    CONFIG.TASK.SPL.TYPE = "SPL"
    CONFIG.TASK.SPL.SUCCESS_DISTANCE = 0.2
    CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = 0.2

    CONFIG.MODE = "train"
```
This `CONFIG` object holds very similar values to the ones `ENV_ARGS` held in the RoboTHOR example. We
decided to leave this way of passing in configurations exposed to the user to offer maximum customization
of the underlying environment.

Finally we need to replace the task sampler and its argument generating functions:

```python
    # Define Task Sampler
from allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler


@classmethod
def make_sampler_fn(cls, **kwargs) -> TaskSampler:
    return PointNavTaskSampler(**kwargs)


def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
    config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind]
    return {
        "env_config": config,
        "max_steps": self.MAX_STEPS,
        "sensors": self.SENSORS,
        "action_space": gym.spaces.Discrete(len(PointNavTask.action_names())),
        "distance_to_goal": self.DISTANCE_TO_GOAL,
    }


def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
    config = self.CONFIG.clone()
    config.defrost()
    config.DATASET.DATA_PATH = self.VALID_SCENES_PATH
    config.MODE = "validate"
    config.freeze()
    return {
        "env_config": config,
        "max_steps": self.MAX_STEPS,
        "sensors": self.SENSORS,
        "action_space": gym.spaces.Discrete(len(PointNavTask.action_names())),
        "distance_to_goal": self.DISTANCE_TO_GOAL,
    }


def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
) -> Dict[str, Any]:
    config = self.TEST_CONFIGS[process_ind]
    return {
        "env_config": config,
        "max_steps": self.MAX_STEPS,
        "sensors": self.SENSORS,
        "action_space": gym.spaces.Discrete(len(PointNavTask.action_names())),
        "distance_to_goal": self.DISTANCE_TO_GOAL,
    }
```

As we can see this code looks very similar as well, we simply need to pass slightly different parameters.

## Conclusion
In this tutorial, we learned how to modify our experiment configurations to work with different environments. By
providing a high level of modularity and out-of-the-box support for both `Habitat` and `THOR`, two of the most popular embodied frameworks out there **AllenAct** hopes to give researchers the ability to validate their results across many platforms and help guide them towards genuine progress. The source code for this tutorial can be found in `/projects/framework_transfer_tutorial`.


================================================
FILE: main.py
================================================
#!/usr/bin/env python3
"""Entry point to training/validating/testing for a user given experiment
name."""
import allenact.main

if __name__ == "__main__":
    allenact.main.main()


================================================
FILE: mkdocs.yml
================================================
site_name: AllenAct
site_description: An open source framework for research in Embodied-AI from AI2
site_url: https://allenact.org

theme:
  name: material
  custom_dir: overrides
  palette:
    primary: blue
    accent: grey
  logo: img/AI2_Avatar_White.png
  favicon: img/AllenAct_A.svg
  highlightjs: true
  hljs_languages:
  - python
  - typescript
  - json

extra_css:
- css/extra.css

google_analytics: [UA-120916510-8, allenact.org]

repo_name: allenai/allenact
repo_url: https://github.com/allenai/allenact
docs_dir: docs

nav:
- Overview: index.md
- Installation:
  - Install AllenAct: installation/installation-allenact.md
  - Install environments: installation/installation-framework.md
  - Download datasets: installation/download-datasets.md
- Getting started:
  - Run your first experiment: getting_started/running-your-first-experiment.md
  - Primary abstractions: getting_started/abstractions.md
  - Structure of the codebase: getting_started/structure.md
- Tutorials:
  - AllenAct Tutorials: tutorials/index.md
  - Navigation in Minigrid: tutorials/minigrid-tutorial.md
  - PointNav in RoboTHOR: tutorials/training-a-pointnav-model.md
  - Swapping environments: tutorials/transfering-to-a-different-environment-framework.md
  - Using a pre-trained model: tutorials/running-inference-on-a-pretrained-model.md
  - Off-policy training: tutorials/offpolicy-tutorial.md
  - OpenAI gym for continuous control: tutorials/gym-tutorial.md
  - Multi-node ObjectNav training: tutorials/distributed-objectnav-tutorial.md
  - OpenAI gym for MuJoCo tasks: tutorials/gym-mujoco-tutorial.md
#  - IL to RL with pipelines: tutorials/training-pipelines.md
- HowTos:
  - Define an experiment: howtos/defining-an-experiment.md
  - Change rewards and losses: howtos/changing-rewards-and-losses.md
  - Define a new model: howtos/defining-a-new-model.md
  - Define a new task: howtos/defining-a-new-task.md
  - Define a new training pipeline: howtos/defining-a-new-training-pipeline.md
  # - Visualize results: howtos/visualizing-results.md
  # - Run a multi-agent experiment: howtos/running-a-multi-agent-experiment.md
- Projects:
  - BabyAI baselines: projects/babyai_baselines/README.md
  - PointNav baselines: projects/pointnav_baselines/README.md
  - ObjectNav baselines: projects/objectnav_baselines/README.md
  # - Advisor code: projects/advisor_2020/README.md
  # - Two Body Problem code: projects/two_body_problem_2019/README.md
- FAQ: FAQ.md
- Contributing: CONTRIBUTING.md
- Licence: LICENSE.md
- API:
  - allenact:
    - _constants: api/allenact/_constants.md
    - embodiedai:
      - mapping:
        - mapping_utils:
          - map_builders: api/allenact/embodiedai/mapping/mapping_utils/map_builders.md
          - point_cloud_utils: api/allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.md
        - mapping_losses: api/allenact/embodiedai/mapping/mapping_losses.md
        - mapping_models:
          - active_neural_slam: api/allenact/embodiedai/mapping/mapping_models/active_neural_slam.md
      - preprocessors:
        - resnet: api/allenact/embodiedai/preprocessors/resnet.md
      - sensors:
        - vision_sensors: api/allenact/embodiedai/sensors/vision_sensors.md
      - models:
        - aux_models: api/allenact/embodiedai/models/aux_models.md
        - basic_models: api/allenact/embodiedai/models/basic_models.md
        - resnet: api/allenact/embodiedai/models/resnet.md
        - fusion_models: api/allenact/embodiedai/models/fusion_models.md
        - visual_nav_models: api/allenact/embodiedai/models/visual_nav_models.md
      - storage:
        - vdr_storage: api/allenact/embodiedai/storage/vdr_storage.md
      - aux_losses:
        - losses: api/allenact/embodiedai/aux_losses/losses.md
    - base_abstractions:
      - experiment_config: api/allenact/base_abstractions/experiment_config.md
      - misc: api/allenact/base_abstractions/misc.md
      - task: api/allenact/base_abstractions/task.md
      - sensor: api/allenact/base_abstractions/sensor.md
      - preprocessor: api/allenact/base_abstractions/preprocessor.md
      - distributions: api/allenact/base_abstractions/distributions.md
    - algorithms:
      - onpolicy_sync:
        - losses:
          - grouped_action_imitation: api/allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.md
          - imitation: api/allenact/algorithms/onpolicy_sync/losses/imitation.md
          - abstract_loss: api/allenact/algorithms/onpolicy_sync/losses/abstract_loss.md
          - ppo: api/allenact/algorithms/onpolicy_sync/losses/ppo.md
          - a2cacktr: api/allenact/algorithms/onpolicy_sync/losses/a2cacktr.md
        - misc: api/allenact/algorithms/onpolicy_sync/misc.md
        - runner: api/allenact/algorithms/onpolicy_sync/runner.md
        - policy: api/allenact/algorithms/onpolicy_sync/policy.md
        - engine: api/allenact/algorithms/onpolicy_sync/engine.md
        - vector_sampled_tasks: api/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.md
        - storage: api/allenact/algorithms/onpolicy_sync/storage.md
      - offpolicy_sync:
        - losses:
          - abstract_offpolicy_loss: api/allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss.md
    - utils:
      - model_utils: api/allenact/utils/model_utils.md
      - experiment_utils: api/allenact/utils/experiment_utils.md
      - spaces_utils: api/allenact/utils/spaces_utils.md
      - system: api/allenact/utils/system.md
      - cacheless_frcnn: api/allenact/utils/cacheless_frcnn.md
      - misc_utils: api/allenact/utils/misc_utils.md
      - multi_agent_viz_utils: api/allenact/utils/multi_agent_viz_utils.md
      - viz_utils: api/allenact/utils/viz_utils.md
      - tensor_utils: api/allenact/utils/tensor_utils.md
      - cache_utils: api/allenact/utils/cache_utils.md
  - allenact_plugins:
    - habitat_plugin:
      - habitat_constants: api/allenact_plugins/habitat_plugin/habitat_constants.md
      - habitat_tasks: api/allenact_plugins/habitat_plugin/habitat_tasks.md
      - habitat_sensors: api/allenact_plugins/habitat_plugin/habitat_sensors.md
      - habitat_environment: api/allenact_plugins/habitat_plugin/habitat_environment.md
      - habitat_preprocessors: api/allenact_plugins/habitat_plugin/habitat_preprocessors.md
      - habitat_task_samplers: api/allenact_plugins/habitat_plugin/habitat_task_samplers.md
      - scripts:
        - agent_demo: api/allenact_plugins/habitat_plugin/scripts/agent_demo.md
        - make_map: api/allenact_plugins/habitat_plugin/scripts/make_map.md
      - habitat_utils: api/allenact_plugins/habitat_plugin/habitat_utils.md
    - lighthouse_plugin:
      - lighthouse_models: api/allenact_plugins/lighthouse_plugin/lighthouse_models.md
      - lighthouse_environment: api/allenact_plugins/lighthouse_plugin/lighthouse_environment.md
      - lighthouse_tasks: api/allenact_plugins/lighthouse_plugin/lighthouse_tasks.md
      - lighthouse_sensors: api/allenact_plugins/lighthouse_plugin/lighthouse_sensors.md
      - lighthouse_util: api/allenact_plugins/lighthouse_plugin/lighthouse_util.md
    - babyai_plugin:
      - babyai_constants: api/allenact_plugins/babyai_plugin/babyai_constants.md
      - babyai_models: api/allenact_plugins/babyai_plugin/babyai_models.md
      - scripts:
        - truncate_expert_demos: api/allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.md
        - get_instr_length_percentiles: api/allenact_plugins/babyai_plugin/scripts/get_instr_length_percentiles.md
        - download_babyai_expert_demos: api/allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.md
      - babyai_tasks: api/allenact_plugins/babyai_plugin/babyai_tasks.md
    - ithor_plugin:
      - ithor_tasks: api/allenact_plugins/ithor_plugin/ithor_tasks.md
      - ithor_environment: api/allenact_plugins/ithor_plugin/ithor_environment.md
      - ithor_constants: api/allenact_plugins/ithor_plugin/ithor_constants.md
      - ithor_util: api/allenact_plugins/ithor_plugin/ithor_util.md
      - ithor_sensors: api/allenact_plugins/ithor_plugin/ithor_sensors.md
      - scripts:
        - make_objectnav_debug_dataset: api/allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.md
        - make_pointnav_debug_dataset: api/allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.md
      - ithor_viz: api/allenact_plugins/ithor_plugin/ithor_viz.md
      - ithor_task_samplers: api/allenact_plugins/ithor_plugin/ithor_task_samplers.md
    - robothor_plugin:
      - robothor_preprocessors: api/allenact_plugins/robothor_plugin/robothor_preprocessors.md
      - robothor_task_samplers: api/allenact_plugins/robothor_plugin/robothor_task_samplers.md
      - robothor_environment: api/allenact_plugins/robothor_plugin/robothor_environment.md
      - robothor_constants: api/allenact_plugins/robothor_plugin/robothor_constants.md
      - robothor_distributions: api/allenact_plugins/robothor_plugin/robothor_distributions.md
      - robothor_models: api/allenact_plugins/robothor_plugin/robothor_models.md
      - robothor_tasks: api/allenact_plugins/robothor_plugin/robothor_tasks.md
      - scripts:
        - make_objectnav_debug_dataset: api/allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.md
        - make_pointnav_debug_dataset: api/allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.md
      - robothor_sensors: api/allenact_plugins/robothor_plugin/robothor_sensors.md
      - robothor_viz: api/allenact_plugins/robothor_plugin/robothor_viz.md
    - minigrid_plugin:
      - minigrid_tasks: api/allenact_plugins/minigrid_plugin/minigrid_tasks.md
      - minigrid_environments: api/allenact_plugins/minigrid_plugin/minigrid_environments.md
      - minigrid_offpolicy: api/allenact_plugins/minigrid_plugin/minigrid_offpolicy.md
      - minigrid_sensors: api/allenact_plugins/minigrid_plugin/minigrid_sensors.md
      - configs:
        - minigrid_nomemory: api/allenact_plugins/minigrid_plugin/configs/minigrid_nomemory.md
      - minigrid_models: api/allenact_plugins/minigrid_plugin/minigrid_models.md
    - manipulathor_plugin:
      - manipulathor_viz: api/allenact_plugins/manipulathor_plugin/manipulathor_viz.md
      - manipulathor_tasks: api/allenact_plugins/manipulathor_plugin/manipulathor_tasks.md
      - manipulathor_task_samplers: api/allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.md
      - manipulathor_constants: api/allenact_plugins/manipulathor_plugin/manipulathor_constants.md
      - armpointnav_constants: api/allenact_plugins/manipulathor_plugin/armpointnav_constants.md
      - manipulathor_sensors: api/allenact_plugins/manipulathor_plugin/manipulathor_sensors.md
      - arm_calculation_utils: api/allenact_plugins/manipulathor_plugin/arm_calculation_utils.md
      - manipulathor_utils: api/allenact_plugins/manipulathor_plugin/manipulathor_utils.md
      - manipulathor_environment: api/allenact_plugins/manipulathor_plugin/manipulathor_environment.md
    - gym_plugin:
      - gym_environment: api/allenact_plugins/gym_plugin/gym_environment.md
      - gym_sensors: api/allenact_plugins/gym_plugin/gym_sensors.md
      - gym_distributions: api/allenact_plugins/gym_plugin/gym_distributions.md
      - gym_models: api/allenact_plugins/gym_plugin/gym_models.md
      - gym_tasks: api/allenact_plugins/gym_plugin/gym_tasks.md
  - constants: api/constants.md
  - projects:
    - gym_baselines:
      - experiments:
        - gym_base: api/projects/gym_baselines/experiments/gym_base.md
        - gym_humanoid_base: api/projects/gym_baselines/experiments/gym_humanoid_base.md
        - gym_mujoco_base: api/projects/gym_baselines/experiments/gym_mujoco_base.md
        - gym_humanoid_ddppo: api/projects/gym_baselines/experiments/gym_humanoid_ddppo.md
        - mujoco:
          - gym_mujoco_swimmer_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_swimmer_ddppo.md
          - gym_mujoco_reacher_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_reacher_ddppo.md
          - gym_mujoco_walker2d_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_walker2d_ddppo.md
          - gym_mujoco_halfcheetah_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_halfcheetah_ddppo.md
          - gym_mujoco_humanoid_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_humanoid_ddppo.md
          - gym_mujoco_inverteddoublependulum_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_inverteddoublependulum_ddppo.md
          - gym_mujoco_ant_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.md
          - gym_mujoco_hopper_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_hopper_ddppo.md
          - gym_mujoco_invertedpendulum_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_invertedpendulum_ddppo.md
        - gym_mujoco_ddppo: api/projects/gym_baselines/experiments/gym_mujoco_ddppo.md
      - models:
        - gym_models: api/projects/gym_baselines/models/gym_models.md
    - objectnav_baselines:
      - experiments:
        - robothor:
          - objectnav_robothor_base: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.md
          - objectnav_robothor_rgb_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_ddppo.md
          - objectnav_robothor_rgb_resnetgru_dagger: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_dagger.md
          - objectnav_robothor_rgbd_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.md
          - objectnav_robothor_rgb_resnetgru_ddppo_and_gbc: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.md
          - objectnav_robothor_rgb_unfrozenresnet_gru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet_gru_ddppo.md
          - objectnav_robothor_rgb_unfrozenresnet_gru_vdr_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet_gru_vdr_ddppo.md
          - objectnav_robothor_depth_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_depth_resnetgru_ddppo.md
        - objectnav_mixin_unfrozenresnet_gru: api/projects/objectnav_baselines/experiments/objectnav_mixin_unfrozenresnet_gru.md
        - ithor:
          - objectnav_ithor_rgbd_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnetgru_ddppo.md
          - objectnav_ithor_depth_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnetgru_ddppo.md
          - objectnav_ithor_rgb_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnetgru_ddppo.md
          - objectnav_ithor_base: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_base.md
        - objectnav_thor_mixin_ddppo_and_gbc: api/projects/objectnav_baselines/experiments/objectnav_thor_mixin_ddppo_and_gbc.md
        - objectnav_thor_base: api/projects/objectnav_baselines/experiments/objectnav_thor_base.md
        - objectnav_mixin_resnetgru: api/projects/objectnav_baselines/experiments/objectnav_mixin_resnetgru.md
        - objectnav_base: api/projects/objectnav_baselines/experiments/objectnav_base.md
        - objectnav_mixin_ddppo: api/projects/objectnav_baselines/experiments/objectnav_mixin_ddppo.md
        - objectnav_mixin_dagger: api/projects/objectnav_baselines/experiments/objectnav_mixin_dagger.md
      - models:
        - object_nav_models: api/projects/objectnav_baselines/models/object_nav_models.md
    - babyai_baselines:
      - experiments:
        - go_to_local:
          - bc: api/projects/babyai_baselines/experiments/go_to_local/bc.md
          - distributed_bc_offpolicy: api/projects/babyai_baselines/experiments/go_to_local/distributed_bc_offpolicy.md
          - dagger: api/projects/babyai_baselines/experiments/go_to_local/dagger.md
          - bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.md
          - distributed_bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_local/distributed_bc_teacher_forcing.md
          - ppo: api/projects/babyai_baselines/experiments/go_to_local/ppo.md
          - a2c: api/projects/babyai_baselines/experiments/go_to_local/a2c.md
          - base: api/projects/babyai_baselines/experiments/go_to_local/base.md
        - go_to_obj:
          - bc: api/projects/babyai_baselines/experiments/go_to_obj/bc.md
          - dagger: api/projects/babyai_baselines/experiments/go_to_obj/dagger.md
          - bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.md
          - ppo: api/projects/babyai_baselines/experiments/go_to_obj/ppo.md
          - a2c: api/projects/babyai_baselines/experiments/go_to_obj/a2c.md
          - base: api/projects/babyai_baselines/experiments/go_to_obj/base.md
        - base: api/projects/babyai_baselines/experiments/base.md
    - pointnav_baselines:
      - experiments:
        - robothor:
          - pointnav_robothor_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.md
          - pointnav_robothor_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.md
          - pointnav_robothor_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.md
          - pointnav_robothor_rgb_simpleconvgru_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo_and_gbc.md
          - pointnav_robothor_base: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_base.md
        - habitat:
          - pointnav_habitat_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.md
          - pointnav_habitat_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.md
          - pointnav_habitat_base: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_base.md
          - debug_pointnav_habitat_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgbd_simpleconvgru_ddppo.md
          - debug_pointnav_habitat_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgb_simpleconvgru_ddppo.md
          - debug_pointnav_habitat_rgb_simpleconvgru_bc: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgb_simpleconvgru_bc.md
          - pointnav_habitat_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.md
          - debug_pointnav_habitat_base: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_base.md
        - pointnav_base: api/projects/pointnav_baselines/experiments/pointnav_base.md
        - pointnav_habitat_mixin_ddppo: api/projects/pointnav_baselines/experiments/pointnav_habitat_mixin_ddppo.md
        - ithor:
          - pointnav_ithor_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.md
          - pointnav_ithor_depth_simpleconvgru_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo_and_gbc.md
          - pointnav_ithor_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.md
          - pointnav_ithor_base: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_base.md
          - pointnav_ithor_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.md
        - pointnav_mixin_simpleconvgru: api/projects/pointnav_baselines/experiments/pointnav_mixin_simpleconvgru.md
        - pointnav_thor_mixin_ddppo: api/projects/pointnav_baselines/experiments/pointnav_thor_mixin_ddppo.md
        - pointnav_thor_mixin_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/pointnav_thor_mixin_ddppo_and_gbc.md
        - pointnav_thor_base: api/projects/pointnav_baselines/experiments/pointnav_thor_base.md
      - models:
        - point_nav_models: api/projects/pointnav_baselines/models/point_nav_models.md
    - tutorials:
      - pointnav_habitat_rgb_ddppo: api/projects/tutorials/pointnav_habitat_rgb_ddppo.md
      - object_nav_ithor_dagger_then_ppo_one_object: api/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.md
      - minigrid_offpolicy_tutorial: api/projects/tutorials/minigrid_offpolicy_tutorial.md
      - navtopartner_robothor_rgb_ppo: api/projects/tutorials/navtopartner_robothor_rgb_ppo.md
      - pointnav_ithor_rgb_ddppo: api/projects/tutorials/pointnav_ithor_rgb_ddppo.md
      - object_nav_ithor_dagger_then_ppo_one_object_viz: api/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.md
      - object_nav_ithor_ppo_one_object: api/projects/tutorials/object_nav_ithor_ppo_one_object.md
      - minigrid_tutorial_conds: api/projects/tutorials/minigrid_tutorial_conds.md
    - manipulathor_baselines:
      - armpointnav_baselines:
        - experiments:
          - armpointnav_thor_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.md
          - armpointnav_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_base.md
          - armpointnav_mixin_ddppo: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_ddppo.md
          - ithor:
            - armpointnav_no_vision: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_no_vision.md
            - armpointnav_ithor_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_ithor_base.md
            - armpointnav_depth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_depth.md
            - armpointnav_rgb: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgb.md
            - armpointnav_rgbdepth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgbdepth.md
            - armpointnav_disjoint_depth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_disjoint_depth.md
          - armpointnav_mixin_simplegru: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_simplegru.md
        - models:
          - arm_pointnav_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.md
          - base_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/base_models.md
          - disjoint_arm_pointnav_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.md
          - manipulathor_net_utils: api/projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.md
  - tests:
    - mapping:
      - test_ai2thor_mapping: api/tests/mapping/test_ai2thor_mapping.md
    - multiprocessing:
      - test_frozen_attribs: api/tests/multiprocessing/test_frozen_attribs.md
    - utils:
      - test_spaces: api/tests/utils/test_spaces.md
    - vision:
      - test_pillow_rescaling: api/tests/vision/test_pillow_rescaling.md
    - sync_algs_cpu:
      - test_to_to_obj_trains: api/tests/sync_algs_cpu/test_to_to_obj_trains.md
    - manipulathor_plugin:
      - test_utils: api/tests/manipulathor_plugin/test_utils.md
    - hierarchical_policies:
      - test_minigrid_conditional: api/tests/hierarchical_policies/test_minigrid_conditional.md
markdown_extensions:
- toc:
    permalink: '#'
- markdown.extensions.codehilite:
    guess_lang: true
- meta
- admonition
- codehilite

# extra_javascript:
#  - javascripts/extra.js
#plugins:
#  - search
#  - mkpdfs


================================================
FILE: mypy.ini
================================================
[mypy]
python_version = 3.7
follow_imports = skip
ignore_missing_imports = True
strict_optional = False

[mypy-demo.*]
ignore_errors = True


================================================
FILE: overrides/main.html
================================================
{% extends "base.html" %}

{% block extrahead %}
  {% set title = config.site_name %}
  {% if page and page.meta and page.meta.title %}
    {% set title = title ~ " - " ~ page.meta.title %}
  {% elif page and page.title and not page.is_homepage %}
    {% set title = title ~ " - " ~ page.title | striptags %}
  {% endif %}
  <meta name="twitter:card" content="summary_large_image" />
  <meta name="twitter:title" content="{{ title }}" />
  <meta name="twitter:description" content="{{ config.site_description }}" />
  <meta name="twitter:image" content="https://www.allenact.org/img/social-card-AllenAct.png" />
{% endblock %}


================================================
FILE: pretrained_model_ckpts/.gitignore
================================================
*
!.gitignore
!*.sh


================================================
FILE: pretrained_model_ckpts/download_navigation_model_ckpts.sh
================================================
#!/bin/bash

# Move to the directory containing this file
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" || exit

# Download, Unzip, and Remove zip
if [ "$1" = "robothor-pointnav-rgb-resnet" ]
then
    echo "Downloading pretrained RoboTHOR PointNav model..."
    wget https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/navigation/robothor-pointnav-rgb-resnet.tar.gz
    tar -xf robothor-pointnav-rgb-resnet.tar.gz && rm robothor-pointnav-rgb-resnet.tar.gz
    echo "saved folder: robothor-pointnav-rgb-resnet"
elif [ "$1" = "robothor-objectnav-challenge-2021" ]
then
    echo "Downloading pretrained RoboTHOR ObjectNav model..."
    wget https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/navigation/robothor-objectnav-challenge-2021.tar.gz
    tar -xf robothor-objectnav-challenge-2021.tar.gz && rm robothor-objectnav-challenge-2021.tar.gz
    echo "saved folder: robothor-objectnav-challenge-2021"
else
    echo "Failed: Usage download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021"
    exit 1
fi


================================================
FILE: projects/__init__.py
================================================


================================================
FILE: projects/babyai_baselines/README.md
================================================
# Baseline experiments for the BabyAI environment

We perform a collection of baseline experiments within the BabyAI environment
 on the GoToLocal task, see the `projects/babyai_baselines/experiments/go_to_local` directory.
 For instance, to train a model using PPO, run
 
```bash
python main.py go_to_local.ppo --experiment_base projects/babyai_baselines/experiments
```

Note that these experiments will be quite slow when not using a GPU as the BabyAI model architecture is surprisingly 
large. Specifying a GPU (if available) can be done from the command line using hooks we created using 
[gin-config](https://github.com/google/gin-config). E.g. to train using the 0th GPU device, add

```bash
--gp "machine_params.gpu_id = 0"
```  

to the above command.

================================================
FILE: projects/babyai_baselines/__init__.py
================================================


================================================
FILE: projects/babyai_baselines/experiments/__init__.py
================================================


================================================
FILE: projects/babyai_baselines/experiments/base.py
================================================
from abc import ABC
from typing import Dict, Any, List, Optional, Union, Sequence, cast

import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO, A2C
from allenact.algorithms.onpolicy_sync.losses.a2cacktr import A2CConfig
from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.misc import Loss
from allenact.base_abstractions.sensor import SensorSuite, Sensor, ExpertActionSensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import (
    Builder,
    LinearDecay,
    PipelineStage,
    TrainingPipeline,
)
from allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel
from allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask, BabyAITaskSampler
from allenact_plugins.minigrid_plugin.minigrid_sensors import (
    EgocentricMiniGridSensor,
    MiniGridMissionSensor,
)


class BaseBabyAIExperimentConfig(ExperimentConfig, ABC):
    """Base experimental config."""

    LEVEL: Optional[str] = None
    TOTAL_RL_TRAIN_STEPS: Optional[int] = None
    AGENT_VIEW_SIZE: int = 7
    ROLLOUT_STEPS: Optional[int] = None
    NUM_TRAIN_SAMPLERS: Optional[int] = None
    NUM_TEST_TASKS: Optional[int] = None
    INSTR_LEN: Optional[int] = None
    USE_INSTR: Optional[bool] = None
    GPU_ID: Optional[int] = None
    USE_EXPERT = False
    SHOULD_LOG = True
    PPO_NUM_MINI_BATCH = 2
    ARCH: Optional[str] = None
    NUM_CKPTS_TO_SAVE = 50

    TEST_SEED_OFFSET = 0

    DEFAULT_LR = 1e-3

    @classmethod
    def METRIC_ACCUMULATE_INTERVAL(cls):
        return cls.NUM_TRAIN_SAMPLERS * 1000

    @classmethod
    def get_sensors(cls) -> Sequence[Sensor]:
        assert cls.USE_INSTR is not None

        return (
            [
                EgocentricMiniGridSensor(
                    agent_view_size=cls.AGENT_VIEW_SIZE, view_channels=3
                ),
            ]
            + (
                [MiniGridMissionSensor(instr_len=cls.INSTR_LEN)]  # type:ignore
                if cls.USE_INSTR
                else []
            )
            + (
                [
                    ExpertActionSensor(  # type: ignore
                        nactions=len(BabyAITask.class_action_names())
                    )
                ]
                if cls.USE_EXPERT
                else []
            )
        )

    @classmethod
    def rl_loss_default(cls, alg: str, steps: Optional[int] = None):
        if alg == "ppo":
            assert steps is not None
            return {
                "loss": Builder(
                    PPO,
                    kwargs={"clip_decay": LinearDecay(steps)},
                    default=PPOConfig,
                ),
                "num_mini_batch": cls.PPO_NUM_MINI_BATCH,
                "update_repeats": 4,
            }
        elif alg == "a2c":
            return {
                "loss": A2C(**A2CConfig),
                "num_mini_batch": 1,
                "update_repeats": 1,
            }
        elif alg == "imitation":
            return {
                "loss": Imitation(),
                "num_mini_batch": cls.PPO_NUM_MINI_BATCH,
                "update_repeats": 4,
            }
        else:
            raise NotImplementedError

    @classmethod
    def _training_pipeline(
        cls,
        named_losses: Dict[str, Union[Loss, Builder]],
        pipeline_stages: List[PipelineStage],
        num_mini_batch: int,
        update_repeats: int,
        total_train_steps: int,
        lr: Optional[float] = None,
    ):
        lr = cls.DEFAULT_LR if lr is None else lr

        num_steps = cls.ROLLOUT_STEPS
        metric_accumulate_interval = (
            cls.METRIC_ACCUMULATE_INTERVAL()
        )  # Log every 10 max length tasks
        save_interval = int(cls.TOTAL_RL_TRAIN_STEPS / cls.NUM_CKPTS_TO_SAVE)
        gamma = 0.99

        use_gae = "reinforce_loss" not in named_losses
        gae_lambda = 0.99
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses=named_losses,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=None,
            should_log=cls.SHOULD_LOG,
            pipeline_stages=pipeline_stages,
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=cls.TOTAL_RL_TRAIN_STEPS)}  # type: ignore
            ),
        )

    @classmethod
    def machine_params(
        cls, mode="train", gpu_id="default", n_train_processes="default", **kwargs
    ):
        if mode == "train":
            if n_train_processes == "default":
                nprocesses = cls.NUM_TRAIN_SAMPLERS
            else:
                nprocesses = n_train_processes
        elif mode == "valid":
            nprocesses = 0
        elif mode == "test":
            nprocesses = min(
                100 if torch.cuda.is_available() else 8, cls.NUM_TEST_TASKS
            )
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        if gpu_id == "default":
            devices = [] if cls.GPU_ID is None else [cls.GPU_ID]
        else:
            devices = [gpu_id]

        return MachineParams(nprocesses=nprocesses, devices=devices)

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        sensors = cls.get_sensors()
        return BabyAIRecurrentACModel(
            action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())),
            observation_space=SensorSuite(sensors).observation_spaces,
            use_instr=cls.USE_INSTR,
            use_memory=True,
            arch=cls.ARCH,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return BabyAITaskSampler(**kwargs)

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return {
            "env_builder": self.LEVEL,
            "sensors": self.get_sensors(),
            "seed": seeds[process_ind] if seeds is not None else None,
        }

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        raise RuntimeError

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        max_tasks = self.NUM_TEST_TASKS // total_processes + (
            process_ind < (self.NUM_TEST_TASKS % total_processes)
        )
        task_seeds_list = [
            2**31 - 1 + self.TEST_SEED_OFFSET + process_ind + total_processes * i
            for i in range(max_tasks)
        ]
        # print(max_tasks, process_ind, total_processes, task_seeds_list)

        assert len(task_seeds_list) == 0 or (
            min(task_seeds_list) >= 0 and max(task_seeds_list) <= 2**32 - 1
        )

        train_sampler_args = self.train_task_sampler_args(
            process_ind=process_ind,
            total_processes=total_processes,
            devices=devices,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        return {
            **train_sampler_args,
            "task_seeds_list": task_seeds_list,
            "max_tasks": max_tasks,
            "deterministic_sampling": True,
            "sensors": [
                s for s in train_sampler_args["sensors"] if "Expert" not in str(type(s))
            ],
        }


================================================
FILE: projects/babyai_baselines/experiments/go_to_local/__init__.py
================================================


================================================
FILE: projects/babyai_baselines/experiments/go_to_local/a2c.py
================================================
import torch

from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_local.base import (
    BaseBabyAIGoToLocalExperimentConfig,
)


class A2CBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
    """A2C only."""

    NUM_TRAIN_SAMPLERS: int = (
        128 * 6
        if torch.cuda.is_available()
        else BaseBabyAIGoToLocalExperimentConfig.NUM_TRAIN_SAMPLERS
    )
    ROLLOUT_STEPS: int = 16
    USE_LR_DECAY = False
    DEFAULT_LR = 1e-4

    @classmethod
    def tag(cls):
        return "BabyAIGoToLocalA2C"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_training_steps = cls.TOTAL_RL_TRAIN_STEPS
        a2c_info = cls.rl_loss_default("a2c", steps=total_training_steps)

        return cls._training_pipeline(
            named_losses={
                "a2c_loss": a2c_info["loss"],
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["a2c_loss"],
                    max_stage_steps=total_training_steps,
                ),
            ],
            num_mini_batch=a2c_info["num_mini_batch"],
            update_repeats=a2c_info["update_repeats"],
            total_train_steps=total_training_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_local/base.py
================================================
from abc import ABC
from typing import Dict, List, Optional, Union, Any, cast

import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.storage import ExperienceStorage
from allenact.base_abstractions.misc import Loss
from allenact.base_abstractions.sensor import SensorSuite
from allenact.utils.experiment_utils import (
    Builder,
    LinearDecay,
    PipelineStage,
    TrainingPipeline,
)
from allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel
from allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask
from projects.babyai_baselines.experiments.base import BaseBabyAIExperimentConfig


class BaseBabyAIGoToLocalExperimentConfig(BaseBabyAIExperimentConfig, ABC):
    """Base experimental config."""

    LEVEL: Optional[str] = "BabyAI-GoToLocal-v0"
    TOTAL_RL_TRAIN_STEPS = int(15e6)
    TOTAL_IL_TRAIN_STEPS = int(7.5e6)
    ROLLOUT_STEPS: int = 128
    NUM_TRAIN_SAMPLERS: int = 128 if torch.cuda.is_available() else 4
    PPO_NUM_MINI_BATCH = 4
    NUM_CKPTS_TO_SAVE = 20
    NUM_TEST_TASKS: int = 1000
    USE_LR_DECAY: bool = True

    # ARCH = "cnn1"
    # ARCH = "cnn2"
    ARCH = "expert_filmcnn"

    USE_INSTR = True
    INSTR_LEN: int = 5

    INCLUDE_AUXILIARY_HEAD = False

    @classmethod
    def METRIC_ACCUMULATE_INTERVAL(cls):
        return cls.NUM_TRAIN_SAMPLERS * 64

    @classmethod
    def _training_pipeline(  # type:ignore
        cls,
        named_losses: Dict[str, Union[Loss, Builder]],
        pipeline_stages: List[PipelineStage],
        num_mini_batch: int,
        update_repeats: int,
        total_train_steps: int,
        lr: Optional[float] = None,
        named_storages: Optional[Dict[str, Union[ExperienceStorage, Builder]]] = None,
    ):
        lr = cls.DEFAULT_LR

        num_steps = cls.ROLLOUT_STEPS
        metric_accumulate_interval = (
            cls.METRIC_ACCUMULATE_INTERVAL()
        )  # Log every 10 max length tasks
        save_interval = int(total_train_steps / cls.NUM_CKPTS_TO_SAVE)
        gamma = 0.99

        use_gae = "reinforce_loss" not in named_losses
        gae_lambda = 0.99
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses=named_losses,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=None,
            should_log=cls.SHOULD_LOG,
            pipeline_stages=pipeline_stages,
            named_storages=named_storages,
            lr_scheduler_builder=(
                Builder(
                    LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)}  # type: ignore
                )
                if cls.USE_LR_DECAY
                else None
            ),
        )

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        sensors = cls.get_sensors()
        return BabyAIRecurrentACModel(
            action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())),
            observation_space=SensorSuite(sensors).observation_spaces,
            use_instr=cls.USE_INSTR,
            use_memory=True,
            arch=cls.ARCH,
            instr_dim=256,
            lang_model="attgru",
            memory_dim=2048,
            include_auxiliary_head=cls.INCLUDE_AUXILIARY_HEAD,
        )

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        raise RuntimeError("No validation processes for these tasks")


================================================
FILE: projects/babyai_baselines/experiments/go_to_local/bc.py
================================================
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_local.base import (
    BaseBabyAIGoToLocalExperimentConfig,
)


class PPOBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
    """Behavior clone then PPO."""

    USE_EXPERT = True

    @classmethod
    def tag(cls):
        return "BabyAIGoToLocalBC"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS

        ppo_info = cls.rl_loss_default("ppo", steps=-1)
        imitation_info = cls.rl_loss_default("imitation")

        return cls._training_pipeline(
            named_losses={
                "imitation_loss": imitation_info["loss"],
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    max_stage_steps=total_train_steps,
                ),
            ],
            num_mini_batch=min(
                info["num_mini_batch"] for info in [ppo_info, imitation_info]
            ),
            update_repeats=min(
                info["update_repeats"] for info in [ppo_info, imitation_info]
            ),
            total_train_steps=total_train_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.py
================================================
import torch

from allenact.utils.experiment_utils import PipelineStage, LinearDecay
from projects.babyai_baselines.experiments.go_to_local.base import (
    BaseBabyAIGoToLocalExperimentConfig,
)


class BCTeacherForcingBabyAIGoToLocalExperimentConfig(
    BaseBabyAIGoToLocalExperimentConfig
):
    """Behavior clone with teacher forcing."""

    USE_EXPERT = True

    GPU_ID = 0 if torch.cuda.is_available() else None

    @classmethod
    def METRIC_ACCUMULATE_INTERVAL(cls):
        return 1

    @classmethod
    def tag(cls):
        return "BabyAIGoToLocalBCTeacherForcing"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS

        ppo_info = cls.rl_loss_default("ppo", steps=-1)
        imitation_info = cls.rl_loss_default("imitation")

        return cls._training_pipeline(
            named_losses={
                "imitation_loss": imitation_info["loss"],
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=1.0,
                        steps=total_train_steps,
                    ),
                    max_stage_steps=total_train_steps,
                ),
            ],
            num_mini_batch=min(
                info["num_mini_batch"] for info in [ppo_info, imitation_info]
            ),
            update_repeats=min(
                info["update_repeats"] for info in [ppo_info, imitation_info]
            ),
            total_train_steps=total_train_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_local/dagger.py
================================================
from allenact.utils.experiment_utils import PipelineStage, LinearDecay
from projects.babyai_baselines.experiments.go_to_local.base import (
    BaseBabyAIGoToLocalExperimentConfig,
)


class DaggerBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
    """Find goal in lighthouse env using imitation learning.

    Training with Dagger.
    """

    USE_EXPERT = True

    @classmethod
    def tag(cls):
        return "BabyAIGoToLocalDagger"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
        loss_info = cls.rl_loss_default("imitation")
        return cls._training_pipeline(
            named_losses={"imitation_loss": loss_info["loss"]},
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=total_train_steps // 2,
                    ),
                    max_stage_steps=total_train_steps,
                )
            ],
            num_mini_batch=loss_info["num_mini_batch"],
            update_repeats=loss_info["update_repeats"],
            total_train_steps=total_train_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_local/distributed_bc_offpolicy.py
================================================
import os
from typing import Optional
from typing import Sequence

import torch

from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage
from allenact.utils.experiment_utils import (
    PipelineStage,
    StageComponent,
    TrainingSettings,
)
from allenact_plugins.babyai_plugin.babyai_constants import (
    BABYAI_EXPERT_TRAJECTORIES_DIR,
)
from allenact_plugins.minigrid_plugin.minigrid_offpolicy import (
    MiniGridOffPolicyExpertCELoss,
    MiniGridExpertTrajectoryStorage,
)
from projects.tutorials.minigrid_offpolicy_tutorial import (
    BCOffPolicyBabyAIGoToLocalExperimentConfig,
)


class DistributedBCOffPolicyBabyAIGoToLocalExperimentConfig(
    BCOffPolicyBabyAIGoToLocalExperimentConfig
):
    """Distributed Off policy imitation."""

    @classmethod
    def tag(cls):
        return "DistributedBabyAIGoToLocalBCOffPolicy"

    @classmethod
    def machine_params(
        cls, mode="train", gpu_id="default", n_train_processes="default", **kwargs
    ):
        res = super().machine_params(mode, gpu_id, n_train_processes, **kwargs)

        if res["nprocesses"] > 0 and torch.cuda.is_available():
            ngpu_to_use = min(torch.cuda.device_count(), 2)
            res["nprocesses"] = [res["nprocesses"] // ngpu_to_use] * ngpu_to_use
            res["gpu_ids"] = list(range(ngpu_to_use))

        return res

    @classmethod
    def expert_ce_loss_kwargs_generator(
        cls, worker_id: int, rollouts_per_worker: Sequence[int], seed: Optional[int]
    ):
        return dict(num_workers=len(rollouts_per_worker), current_worker=worker_id)

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
        ppo_info = cls.rl_loss_default("ppo", steps=-1)

        num_mini_batch = ppo_info["num_mini_batch"]
        update_repeats = ppo_info["update_repeats"]

        return cls._training_pipeline(
            named_losses={
                "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss(
                    total_episodes_in_epoch=int(1e6)
                ),
            },
            named_storages={
                "onpolicy": RolloutBlockStorage(),
                "minigrid_offpolicy_expert": MiniGridExpertTrajectoryStorage(
                    data_path=os.path.join(
                        BABYAI_EXPERT_TRAJECTORIES_DIR,
                        "BabyAI-GoToLocal-v0{}.pkl".format(
                            "" if torch.cuda.is_available() else "-small"
                        ),
                    ),
                    num_samplers=cls.NUM_TRAIN_SAMPLERS,
                    rollout_len=cls.ROLLOUT_STEPS,
                    instr_len=cls.INSTR_LEN,
                ),
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["offpolicy_expert_ce_loss"],
                    max_stage_steps=total_train_steps,
                    stage_components=[
                        StageComponent(
                            uuid="offpolicy",
                            storage_uuid="minigrid_offpolicy_expert",
                            loss_names=["offpolicy_expert_ce_loss"],
                            training_settings=TrainingSettings(
                                update_repeats=num_mini_batch * update_repeats,
                                num_mini_batch=1,
                            ),
                        )
                    ],
                ),
            ],
            num_mini_batch=0,
            update_repeats=0,
            total_train_steps=total_train_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_local/distributed_bc_teacher_forcing.py
================================================
import torch

from .bc_teacher_forcing import BCTeacherForcingBabyAIGoToLocalExperimentConfig


class DistributedBCTeacherForcingBabyAIGoToLocalExperimentConfig(
    BCTeacherForcingBabyAIGoToLocalExperimentConfig
):
    """Distributed behavior clone with teacher forcing."""

    USE_EXPERT = True

    GPU_ID = 0 if torch.cuda.is_available() else None

    @classmethod
    def METRIC_ACCUMULATE_INTERVAL(cls):
        return 1

    @classmethod
    def tag(cls):
        return "BabyAIGoToLocalBCTeacherForcingDistributed"

    @classmethod
    def machine_params(
        cls, mode="train", gpu_id="default", n_train_processes="default", **kwargs
    ):
        res = super().machine_params(mode, gpu_id, n_train_processes, **kwargs)

        if res["nprocesses"] > 0 and torch.cuda.is_available():
            ngpu_to_use = min(torch.cuda.device_count(), 2)
            res["nprocesses"] = [res["nprocesses"] // ngpu_to_use] * ngpu_to_use
            res["gpu_ids"] = list(range(ngpu_to_use))

        return res


================================================
FILE: projects/babyai_baselines/experiments/go_to_local/ppo.py
================================================
import torch

from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_local.base import (
    BaseBabyAIGoToLocalExperimentConfig,
)


class PPOBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
    """PPO only."""

    NUM_TRAIN_SAMPLERS: int = (
        128 * 12
        if torch.cuda.is_available()
        else BaseBabyAIGoToLocalExperimentConfig.NUM_TRAIN_SAMPLERS
    )
    ROLLOUT_STEPS: int = 32
    USE_LR_DECAY = False
    DEFAULT_LR = 1e-4

    @classmethod
    def tag(cls):
        return "BabyAIGoToLocalPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_RL_TRAIN_STEPS
        ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps)

        return cls._training_pipeline(
            named_losses={
                "ppo_loss": ppo_info["loss"],
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=total_train_steps,
                ),
            ],
            num_mini_batch=ppo_info["num_mini_batch"],
            update_repeats=ppo_info["update_repeats"],
            total_train_steps=total_train_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/__init__.py
================================================


================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/a2c.py
================================================
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_obj.base import (
    BaseBabyAIGoToObjExperimentConfig,
)


class A2CBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
    """A2C only."""

    TOTAL_RL_TRAIN_STEPS = int(1e5)

    @classmethod
    def tag(cls):
        return "BabyAIGoToObjA2C"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_training_steps = cls.TOTAL_RL_TRAIN_STEPS
        a2c_info = cls.rl_loss_default("a2c", steps=total_training_steps)

        return cls._training_pipeline(
            named_losses={
                "a2c_loss": a2c_info["loss"],
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["a2c_loss"],
                    max_stage_steps=total_training_steps,
                ),
            ],
            num_mini_batch=a2c_info["num_mini_batch"],
            update_repeats=a2c_info["update_repeats"],
            total_train_steps=total_training_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/base.py
================================================
from abc import ABC
from typing import Dict, List, Optional, Union, cast

import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.base_abstractions.misc import Loss
from allenact.base_abstractions.sensor import SensorSuite
from allenact.utils.experiment_utils import (
    Builder,
    LinearDecay,
    PipelineStage,
    TrainingPipeline,
)
from allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel
from allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask
from projects.babyai_baselines.experiments.base import BaseBabyAIExperimentConfig


class BaseBabyAIGoToObjExperimentConfig(BaseBabyAIExperimentConfig, ABC):
    """Base experimental config."""

    LEVEL: Optional[str] = "BabyAI-GoToObj-v0"
    TOTAL_RL_TRAIN_STEPS = int(5e4)
    TOTAL_IL_TRAIN_STEPS = int(2e4)
    ROLLOUT_STEPS: int = 32
    NUM_TRAIN_SAMPLERS: int = 16
    PPO_NUM_MINI_BATCH = 2
    NUM_TEST_TASKS: int = 50
    USE_LR_DECAY: bool = False

    DEFAULT_LR = 1e-3

    ARCH = "cnn1"
    # ARCH = "cnn2"
    # ARCH = "expert_filmcnn"

    USE_INSTR = False
    INSTR_LEN: int = -1

    @classmethod
    def METRIC_ACCUMULATE_INTERVAL(cls):
        return cls.NUM_TRAIN_SAMPLERS * 128

    @classmethod
    def _training_pipeline(  # type:ignore
        cls,
        named_losses: Dict[str, Union[Loss, Builder]],
        pipeline_stages: List[PipelineStage],
        num_mini_batch: int,
        update_repeats: int,
        total_train_steps: int,
        lr: Optional[float] = None,
        **kwargs,
    ):
        lr = cls.DEFAULT_LR

        num_steps = cls.ROLLOUT_STEPS
        metric_accumulate_interval = (
            cls.METRIC_ACCUMULATE_INTERVAL()
        )  # Log every 10 max length tasks
        save_interval = 2**31
        gamma = 0.99

        use_gae = "reinforce_loss" not in named_losses
        gae_lambda = 0.99
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses=named_losses,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=None,
            should_log=cls.SHOULD_LOG,
            pipeline_stages=pipeline_stages,
            lr_scheduler_builder=(
                Builder(
                    LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)}  # type: ignore
                )
                if cls.USE_LR_DECAY
                else None
            ),
            **kwargs,
        )

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        sensors = cls.get_sensors()
        return BabyAIRecurrentACModel(
            action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())),
            observation_space=SensorSuite(sensors).observation_spaces,
            use_instr=cls.USE_INSTR,
            use_memory=True,
            arch=cls.ARCH,
            instr_dim=8,
            lang_model="gru",
            memory_dim=128,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/bc.py
================================================
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_obj.base import (
    BaseBabyAIGoToObjExperimentConfig,
)


class PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
    """Behavior clone then PPO."""

    USE_EXPERT = True

    @classmethod
    def tag(cls):
        return "BabyAIGoToObjBC"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS

        ppo_info = cls.rl_loss_default("ppo", steps=-1)
        imitation_info = cls.rl_loss_default("imitation")

        return cls._training_pipeline(
            named_losses={
                "imitation_loss": imitation_info["loss"],
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    max_stage_steps=total_train_steps,
                ),
            ],
            num_mini_batch=min(
                info["num_mini_batch"] for info in [ppo_info, imitation_info]
            ),
            update_repeats=min(
                info["update_repeats"] for info in [ppo_info, imitation_info]
            ),
            total_train_steps=total_train_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.py
================================================
from allenact.utils.experiment_utils import PipelineStage, LinearDecay
from projects.babyai_baselines.experiments.go_to_obj.base import (
    BaseBabyAIGoToObjExperimentConfig,
)


class PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
    """Behavior clone (with teacher forcing) then PPO."""

    USE_EXPERT = True

    @classmethod
    def tag(cls):
        return "BabyAIGoToObjBCTeacherForcing"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS

        ppo_info = cls.rl_loss_default("ppo", steps=-1)
        imitation_info = cls.rl_loss_default("imitation")

        return cls._training_pipeline(
            named_losses={
                "imitation_loss": imitation_info["loss"],
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=1.0,
                        steps=total_train_steps,
                    ),
                    max_stage_steps=total_train_steps,
                ),
            ],
            num_mini_batch=min(
                info["num_mini_batch"] for info in [ppo_info, imitation_info]
            ),
            update_repeats=min(
                info["update_repeats"] for info in [ppo_info, imitation_info]
            ),
            total_train_steps=total_train_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/dagger.py
================================================
from allenact.utils.experiment_utils import PipelineStage, LinearDecay
from projects.babyai_baselines.experiments.go_to_obj.base import (
    BaseBabyAIGoToObjExperimentConfig,
)


class DaggerBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
    """Find goal in lighthouse env using imitation learning.

    Training with Dagger.
    """

    USE_EXPERT = True

    @classmethod
    def tag(cls):
        return "BabyAIGoToObjDagger"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
        loss_info = cls.rl_loss_default("imitation")
        return cls._training_pipeline(
            named_losses={"imitation_loss": loss_info["loss"]},
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=total_train_steps // 2,
                    ),
                    max_stage_steps=total_train_steps,
                )
            ],
            num_mini_batch=loss_info["num_mini_batch"],
            update_repeats=loss_info["update_repeats"],
            total_train_steps=total_train_steps,
        )


================================================
FILE: projects/babyai_baselines/experiments/go_to_obj/ppo.py
================================================
from allenact.utils.experiment_utils import PipelineStage
from projects.babyai_baselines.experiments.go_to_obj.base import (
    BaseBabyAIGoToObjExperimentConfig,
)


class PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):
    """PPO only."""

    @classmethod
    def tag(cls):
        return "BabyAIGoToObjPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_RL_TRAIN_STEPS
        ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps)

        return cls._training_pipeline(
            named_losses={
                "ppo_loss": ppo_info["loss"],
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=total_train_steps,
                ),
            ],
            num_mini_batch=ppo_info["num_mini_batch"],
            update_repeats=ppo_info["update_repeats"],
            total_train_steps=total_train_steps,
        )


================================================
FILE: projects/gym_baselines/README.md
================================================
# Baseline models Gym (for MuJoCo environments)

This project contains the code for training baseline models for the tasks under the [MuJoCo](https://gym.openai.com/envs/#mujoco) group of Gym environments, included ["Ant-v2"](https://gym.openai.com/envs/Ant-v2/), ["HalfCheetah-v2"](https://gym.openai.com/envs/HalfCheetah-v2/), ["Hopper-v2"](https://gym.openai.com/envs/Hopper-v2/), ["Humanoid-v2"](https://gym.openai.com/envs/Humanoid-v2/), ["InvertedDoublePendulum-v2"](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["InvertedPendulum-v2"](https://gym.openai.com/envs/InvertedPendulum-v2/), [Reacher-v2](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["Swimmer-v2"](https://gym.openai.com/envs/Swimmer-v2/), and [Walker2d-v2"](https://gym.openai.com/envs/Walker2d-v2/).

Provided are experiment configs for training a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](https://allenact.org/api/allenact_plugins/gym_plugin/gym_models/#memorylessactorcritic), with a [Gaussian distribution](https://allenact.org/api/allenact_plugins/gym_plugin/gym_distributions/#gaussiandistr) to sample actions for all continuous-control environments under the `MuJoCo` group of `Gym` environments. 

The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm. 

To train an experiment run the following command from the `allenact` root directory:

```bash
python main.py <PATH_TO_EXPERIMENT_CONFIG> -o <PATH_TO_OUTPUT>
```

Where `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights
and logs to be stored and `<PATH_TO_EXPERIMENT_CONFIG>` is the path to the python file containing
the experiment configuration. An example usage of this command would be:

```bash
python main.py projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py -o /YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo
```

This trains a lightweight implementation with separate MLPs for actors and critic with a Gaussian distribution to sample actions in the "Ant-v2" environment, and stores the model weights and logs
to `/YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo`.

## Results

In our experiments, the rewards for MuJoCo environments we obtained after training using PPO are similar to those reported by OpenAI Gym Baselines(1M steps). The Humanoid environment is compared with the original PPO paper where training 50M steps using PPO. Due to the time constraint, we only tested our baseline across two seeds so far. 


| Environment           | Gym Baseline Reward | Ours Reward |
| -----------           | ------------------- | ----------- |
|[Ant-v2](https://gym.openai.com/envs/Ant-v2/)| 1083.2 |1098.6(reached 4719 in 25M steps)  | 
| [HalfCheetah-v2](https://gym.openai.com/envs/HalfCheetah-v2/) | 1795.43             |  1741(reached 4019 in 18M steps)           |
|[Hopper-v2](https://gym.openai.com/envs/Hopper-v2/)|2316.16|2266|
|[Humanoid-v2](https://gym.openai.com/envs/Humanoid-v2/)|4000+|4500+(reached 6500 in 70M steps)|
| [InvertedPendulum-v2](https://gym.openai.com/envs/InvertedPendulum-v2/) | 809.43              |  1000       |
|[Reacher-v2](https://gym.openai.com/envs/Reacher-v2/)|-6.71|-7.045|
|[Swimmer-v2](https://gym.openai.com/envs/Swimmer-v2/)|111.19|124.7|
|[Walker2d](https://gym.openai.com/envs/Walker2d-v2/)|3424.95|2723 in 10M steps|


================================================
FILE: projects/gym_baselines/__init__.py
================================================


================================================
FILE: projects/gym_baselines/experiments/__init__.py
================================================


================================================
FILE: projects/gym_baselines/experiments/gym_base.py
================================================
from abc import ABC
from typing import Dict, Sequence, Optional, List, Any

from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import Sensor


class GymBaseConfig(ExperimentConfig, ABC):

    SENSORS: Optional[Sequence[Sensor]] = None

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        raise NotImplementedError

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="train", seeds=seeds
        )

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="valid", seeds=seeds
        )

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)


================================================
FILE: projects/gym_baselines/experiments/gym_humanoid_base.py
================================================
from abc import ABC
from typing import Dict, Any

from allenact.utils.viz_utils import VizSuite, AgentViewViz

from projects.gym_baselines.experiments.gym_base import GymBaseConfig


class GymHumanoidBaseConfig(GymBaseConfig, ABC):
    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        visualizer = None
        if mode == "test":
            visualizer = VizSuite(
                mode=mode,
                video_viz=AgentViewViz(
                    label="episode_vid",
                    max_clip_length=400,
                    vector_task_source=("render", {"mode": "rgb_array"}),
                    fps=30,
                ),
            )
        return {
            "nprocesses": 8 if mode == "train" else 1,  # rollout
            "devices": [],
            "visualizer": visualizer,
        }


================================================
FILE: projects/gym_baselines/experiments/gym_humanoid_ddppo.py
================================================
from abc import ABC
from typing import cast

import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.ppo import PPO

from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)

from projects.gym_baselines.experiments.gym_humanoid_base import GymHumanoidBaseConfig


class GymHumanoidPPOConfig(GymHumanoidBaseConfig, ABC):
    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        lr = 1e-4
        ppo_steps = int(8e7)  # convergence may be after 1e8
        clip_param = 0.1
        value_loss_coef = 0.5
        entropy_coef = 0.0
        num_mini_batch = 4  # optimal 64
        update_repeats = 10
        max_grad_norm = 0.5
        num_steps = 2048
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        advance_scene_rollout_period = None
        save_interval = 200000
        metric_accumulate_interval = 50000
        return TrainingPipeline(
            named_losses=dict(
                ppo_loss=PPO(
                    clip_param=clip_param,
                    value_loss_coef=value_loss_coef,
                    entropy_coef=entropy_coef,
                ),
            ),  # type:ignore
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=advance_scene_rollout_period,
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            lr_scheduler_builder=Builder(
                LambdaLR,
                {
                    "lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=1)
                },  # constant learning rate
            ),
        )


================================================
FILE: projects/gym_baselines/experiments/gym_mujoco_base.py
================================================
from abc import ABC
from typing import Dict, Any

from allenact.utils.viz_utils import VizSuite, AgentViewViz

from projects.gym_baselines.experiments.gym_base import GymBaseConfig


class GymMoJoCoBaseConfig(GymBaseConfig, ABC):
    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        visualizer = None
        if mode == "test":
            visualizer = VizSuite(
                mode=mode,
                video_viz=AgentViewViz(
                    label="episode_vid",
                    max_clip_length=400,
                    vector_task_source=("render", {"mode": "rgb_array"}),
                    fps=30,
                ),
            )
        return {
            "nprocesses": 8 if mode == "train" else 1,  # rollout
            "devices": [],
            "visualizer": visualizer,
        }


================================================
FILE: projects/gym_baselines/experiments/gym_mujoco_ddppo.py
================================================
from abc import ABC
from typing import cast

import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.ppo import PPO

from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)

from projects.gym_baselines.experiments.gym_mujoco_base import GymMoJoCoBaseConfig


class GymMuJoCoPPOConfig(GymMoJoCoBaseConfig, ABC):
    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        lr = 3e-4
        ppo_steps = int(3e7)
        clip_param = 0.2
        value_loss_coef = 0.5
        entropy_coef = 0.0
        num_mini_batch = 4  # optimal 64
        update_repeats = 10
        max_grad_norm = 0.5
        num_steps = 2048
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        advance_scene_rollout_period = None
        save_interval = 200000
        metric_accumulate_interval = 50000
        return TrainingPipeline(
            named_losses=dict(
                ppo_loss=PPO(
                    clip_param=clip_param,
                    value_loss_coef=value_loss_coef,
                    entropy_coef=entropy_coef,
                ),
            ),  # type:ignore
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=advance_scene_rollout_period,
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            lr_scheduler_builder=Builder(
                LambdaLR,
                {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
            ),
        )


================================================
FILE: projects/gym_baselines/experiments/mujoco/__init__.py
================================================


================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py
================================================
from typing import Dict, List, Any

import gym
import torch.nn as nn

from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler

from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig


class GymMuJoCoAntConfig(GymMuJoCoPPOConfig):

    SENSORS = [
        GymMuJoCoSensor(gym_env_name="Ant-v2", uuid="gym_mujoco_data"),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        action_space = gym.spaces.Box(-3.0, 3.0, (8,), "float32")
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=action_space,  # specific action_space
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="Ant-v2", **kwargs)

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["Ant-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    @classmethod
    def tag(cls) -> str:
        return "Gym-MuJoCo-Ant-v2-PPO"


================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_halfcheetah_ddppo.py
================================================
from typing import Dict, List, Any

import gym
import torch.nn as nn

from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler

from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig


class GymMuJoCoHalfCheetahConfig(GymMuJoCoPPOConfig):

    SENSORS = [
        GymMuJoCoSensor(gym_env_name="HalfCheetah-v2", uuid="gym_mujoco_data"),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        action_space = gym.spaces.Box(-1.0, 1.0, (6,), "float32")
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=action_space,  # specific action_space
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="HalfCheetah-v2", **kwargs)

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["HalfCheetah-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    @classmethod
    def tag(cls) -> str:
        return "Gym-MuJoCo-HalfCheetah-v2-PPO"


================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_hopper_ddppo.py
================================================
from typing import Dict, List, Any

import gym
import torch.nn as nn

from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler

from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig


class GymMuJoCoHopperConfig(GymMuJoCoPPOConfig):

    SENSORS = [
        GymMuJoCoSensor(gym_env_name="Hopper-v2", uuid="gym_mujoco_data"),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        action_space = gym.spaces.Box(-1.0, 1.0, (3,), "float32")
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=action_space,  # specific action_space
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="Hopper-v2", **kwargs)

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["Hopper-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    @classmethod
    def tag(cls) -> str:
        return "Gym-MuJoCo-Hopper-v2-PPO"


================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_humanoid_ddppo.py
================================================
from typing import Dict, List, Any

import gym
import torch.nn as nn

from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler

from projects.gym_baselines.experiments.gym_humanoid_ddppo import GymHumanoidPPOConfig


class GymMuJoCoHumanoidConfig(GymHumanoidPPOConfig):

    SENSORS = [
        GymMuJoCoSensor(gym_env_name="Humanoid-v2", uuid="gym_mujoco_data"),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        action_space = gym.spaces.Box(
            -0.4000000059604645, 0.4000000059604645, (17,), "float32"
        )
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=action_space,  # specific action_space
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="Humanoid-v2", **kwargs)

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["Humanoid-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    @classmethod
    def tag(cls) -> str:
        return "Gym-MuJoCo-Humanoid-v2-PPO"


================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_inverteddoublependulum_ddppo.py
================================================
from typing import Dict, List, Any

import gym
import torch.nn as nn

from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler

from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig


class GymMuJoInvertedDoublePendulumConfig(GymMuJoCoPPOConfig):

    SENSORS = [
        GymMuJoCoSensor(
            gym_env_name="InvertedDoublePendulum-v2", uuid="gym_mujoco_data"
        ),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        action_space = gym.spaces.Box(-1.0, 1.0, (1,), "float32")
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=action_space,  # specific action_space
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="InvertedDoublePendulum-v2", **kwargs)

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["InvertedDoublePendulum-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    @classmethod
    def tag(cls) -> str:
        return "Gym-MuJoCo-InvertedDoublePendulum-v2-PPO"


================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_invertedpendulum_ddppo.py
================================================
from typing import Dict, List, Any

import gym
import torch.nn as nn

from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler

from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig


class GymMuJoCoInvertedPendulumConfig(GymMuJoCoPPOConfig):

    SENSORS = [
        GymMuJoCoSensor(gym_env_name="InvertedPendulum-v2", uuid="gym_mujoco_data"),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        action_space = gym.spaces.Box(-3.0, 3.0, (1,), "float32")
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=action_space,  # specific action_space
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="InvertedPendulum-v2", **kwargs)

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["InvertedPendulum-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    @classmethod
    def tag(cls) -> str:
        return "Gym-MuJoCo-InvertedPendulum-v2-PPO"


================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_reacher_ddppo.py
================================================
from typing import Dict, List, Any

import gym
import torch.nn as nn

from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler

from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig


class GymMuJoCoReacherConfig(GymMuJoCoPPOConfig):

    SENSORS = [
        GymMuJoCoSensor(gym_env_name="Reacher-v2", uuid="gym_mujoco_data"),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        action_space = gym.spaces.Box(-1.0, 1.0, (2,), "float32")
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=action_space,  # specific action_space
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="Reacher-v2", **kwargs)

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["Reacher-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    @classmethod
    def tag(cls) -> str:
        return "Gym-MuJoCo-Reacher-v2-PPO"


================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_swimmer_ddppo.py
================================================
from typing import Dict, List, Any

import gym
import torch.nn as nn

from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler

from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig


class GymMuJoCoSwimmerConfig(GymMuJoCoPPOConfig):

    SENSORS = [
        GymMuJoCoSensor(gym_env_name="Swimmer-v2", uuid="gym_mujoco_data"),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        action_space = gym.spaces.Box(-1.0, 1.0, (2,), "float32")
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=action_space,  # specific action_space
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="Swimmer-v2", **kwargs)

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["Swimmer-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    @classmethod
    def tag(cls) -> str:
        return "Gym-MuJoCo-Swimmer-v2-PPO"


================================================
FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_walker2d_ddppo.py
================================================
from typing import Dict, List, Any

import gym
import torch.nn as nn

from allenact.base_abstractions.experiment_config import TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler

from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig


class GymMuJoCoWalkerConfig(GymMuJoCoPPOConfig):

    SENSORS = [
        GymMuJoCoSensor(gym_env_name="Walker2d-v2", uuid="gym_mujoco_data"),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        action_space = gym.spaces.Box(-1.0, 1.0, (6,), "float32")
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=action_space,  # specific action_space
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="Walker2d-v2", **kwargs)

    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["Walker2d-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    @classmethod
    def tag(cls) -> str:
        return "Gym-MuJoCo-Walker2d-v2-PPO"


================================================
FILE: projects/gym_baselines/models/__init__.py
================================================


================================================
FILE: projects/gym_baselines/models/gym_models.py
================================================
"""
Note: I add this file just for the format consistence with other baselines in the project, so it is just the same as
`allenact_plugins.gym_models.py` so far. However, if it is in the Gym Robotics, some modification is need.
For example, for `state_dim`:
        if input_uuid == 'gym_robotics_data':
            # consider that the observation space is Dict for robotics env
            state_dim = observation_space[self.input_uuid]['observation'].shape[0]
        else:
            assert len(observation_space[self.input_uuid].shape) == 1
            state_dim = observation_space[self.input_uuid].shape[0]
"""


================================================
FILE: projects/manipulathor_baselines/__init__.py
================================================


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/__init__.py
================================================


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/__init__.py
================================================


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_base.py
================================================
from abc import ABC
from typing import Optional, Sequence, Union

from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import Sensor
from allenact.utils.experiment_utils import Builder


class ArmPointNavBaseConfig(ExperimentConfig, ABC):
    """The base object navigation configuration file."""

    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    SENSORS: Optional[Sequence[Sensor]] = None

    STEP_SIZE = 0.25
    ROTATION_DEGREES = 45.0
    VISIBILITY_DISTANCE = 1.0
    STOCHASTIC = False

    CAMERA_WIDTH = 224
    CAMERA_HEIGHT = 224
    SCREEN_SIZE = 224
    MAX_STEPS = 200

    def __init__(self):
        self.REWARD_CONFIG = {
            "step_penalty": -0.01,
            "goal_success_reward": 10.0,
            "pickup_success_reward": 5.0,
            "failed_stop_reward": 0.0,
            "shaping_weight": 1.0,  # we are not using this
            "failed_action_penalty": -0.03,
        }

    @classmethod
    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return tuple()


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_ddppo.py
================================================
import torch.optim as optim
from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
    Builder,
    PipelineStage,
    TrainingPipeline,
    LinearDecay,
)
from torch.optim.lr_scheduler import LambdaLR

from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import (
    ArmPointNavBaseConfig,
)


class ArmPointNavMixInPPOConfig(ArmPointNavBaseConfig):
    def training_pipeline(self, **kwargs):
        ppo_steps = int(300000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = self.MAX_STEPS
        save_interval = 500000  # from 50k
        log_interval = 1000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_simplegru.py
================================================
from typing import Sequence, Union

import gym
import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import Builder
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import (
    ArmPointNavBaseConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.models.arm_pointnav_models import (
    ArmPointNavBaselineActorCritic,
)


class ArmPointNavMixInSimpleGRUConfig(ArmPointNavBaseConfig):
    TASK_SAMPLER: TaskSampler

    @classmethod
    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        preprocessors = []
        return preprocessors

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:

        return ArmPointNavBaselineActorCritic(
            action_space=gym.spaces.Discrete(
                len(cls.TASK_SAMPLER._TASK_TYPE.class_action_names())
            ),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            hidden_size=512,
        )


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.py
================================================
import platform
from abc import ABC
from math import ceil
from typing import Dict, Any, List, Optional, Sequence

import gym
import numpy as np
import torch

from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
    SimpleArmPointNavGeneralSampler,
)
from allenact_plugins.manipulathor_plugin.manipulathor_viz import (
    ImageVisualizer,
    TestMetricLogger,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import (
    ArmPointNavBaseConfig,
)


class ArmPointNavThorBaseConfig(ArmPointNavBaseConfig, ABC):
    """The base config for all iTHOR PointNav experiments."""

    TASK_SAMPLER = SimpleArmPointNavGeneralSampler
    VISUALIZE = False
    if platform.system() == "Darwin":
        VISUALIZE = True

    NUM_PROCESSES: Optional[int] = None
    TRAIN_GPU_IDS = list(range(torch.cuda.device_count()))
    SAMPLER_GPU_IDS = TRAIN_GPU_IDS
    VALID_GPU_IDS = [torch.cuda.device_count() - 1]
    TEST_GPU_IDS = [torch.cuda.device_count() - 1]

    TRAIN_DATASET_DIR: Optional[str] = None
    VAL_DATASET_DIR: Optional[str] = None

    CAP_TRAINING = None

    TRAIN_SCENES: Optional[List[str]] = None
    VAL_SCENES: Optional[List[str]] = None
    TEST_SCENES: Optional[List[str]] = None

    OBJECT_TYPES: Optional[Sequence[str]] = None
    VALID_SAMPLES_IN_SCENE = 1
    TEST_SAMPLES_IN_SCENE = 1

    NUMBER_OF_TEST_PROCESS = 10

    def __init__(self):
        super().__init__()

        assert (
            self.CAMERA_WIDTH == 224
            and self.CAMERA_HEIGHT == 224
            and self.VISIBILITY_DISTANCE == 1
            and self.STEP_SIZE == 0.25
        )
        self.ENV_ARGS = ENV_ARGS

    def machine_params(self, mode="train", **kwargs):
        sampler_devices: Sequence[int] = []
        if mode == "train":
            workers_per_device = 1
            gpu_ids = (
                []
                if not torch.cuda.is_available()
                else self.TRAIN_GPU_IDS * workers_per_device
            )
            nprocesses = (
                1
                if not torch.cuda.is_available()
                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
            )
            sampler_devices = self.SAMPLER_GPU_IDS
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS
        elif mode == "test":
            nprocesses = self.NUMBER_OF_TEST_PROCESS if torch.cuda.is_available() else 1
            gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        sensors = [*self.SENSORS]
        if mode != "train":
            sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)]

        sensor_preprocessor_graph = (
            SensorPreprocessorGraph(
                source_observation_spaces=SensorSuite(sensors).observation_spaces,
                preprocessors=self.preprocessors(),
            )
            if mode == "train"
            or (
                (isinstance(nprocesses, int) and nprocesses > 0)
                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
            )
            else None
        )

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sampler_devices=(
                sampler_devices if mode == "train" else gpu_ids
            ),  # ignored with > 1 gpu_ids
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        from datetime import datetime

        now = datetime.now()
        exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f")
        if cls.VISUALIZE:
            visualizers = [
                ImageVisualizer(exp_name=exp_name_w_time),
                TestMetricLogger(exp_name=exp_name_w_time),
            ]

            kwargs["visualizers"] = visualizers
        kwargs["objects"] = cls.OBJECT_TYPES
        kwargs["exp_name"] = exp_name_w_time
        return cls.TASK_SAMPLER(**kwargs)

    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
            np.int32
        )

    def _get_sampler_args_for_scene_split(
        self,
        scenes: List[str],
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "env_args": self.ENV_ARGS,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(
                len(self.TASK_SAMPLER._TASK_TYPE.class_action_names())
            ),
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
            "rewards_config": self.REWARD_CONFIG,
        }

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            self.TRAIN_SCENES,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_period"] = "manual"
        res["sampler_mode"] = "train"
        res["cap_training"] = self.CAP_TRAINING
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None
        )
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]],
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            self.VALID_SCENES,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_period"] = self.VALID_SAMPLES_IN_SCENE
        res["sampler_mode"] = "val"
        res["cap_training"] = self.CAP_TRAINING
        res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"])
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None
        )
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]],
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            self.TEST_SCENES,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_period"] = self.TEST_SAMPLES_IN_SCENE
        res["sampler_mode"] = "test"
        res["env_args"] = {}
        res["cap_training"] = self.CAP_TRAINING
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None
        )
        return res


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/__init__.py
================================================


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_depth.py
================================================
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_sensors import (
    DepthSensorThor,
    RelativeAgentArmToObjectSensor,
    RelativeObjectToGoalSensor,
    PickedUpObjSensor,
)
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
    ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (
    ArmPointNavMixInPPOConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (
    ArmPointNavMixInSimpleGRUConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (
    ArmPointNaviThorBaseConfig,
)


class ArmPointNavDepth(
    ArmPointNaviThorBaseConfig,
    ArmPointNavMixInPPOConfig,
    ArmPointNavMixInSimpleGRUConfig,
):
    """An Object Navigation experiment configuration in iThor with RGB
    input."""

    SENSORS = [
        DepthSensorThor(
            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        RelativeAgentArmToObjectSensor(),
        RelativeObjectToGoalSensor(),
        PickedUpObjSensor(),
    ]

    MAX_STEPS = 200
    TASK_SAMPLER = ArmPointNavTaskSampler

    def __init__(self):
        super().__init__()

        assert (
            self.CAMERA_WIDTH == 224
            and self.CAMERA_HEIGHT == 224
            and self.VISIBILITY_DISTANCE == 1
            and self.STEP_SIZE == 0.25
        )
        self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": True}

    @classmethod
    def tag(cls):
        return cls.__name__


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_disjoint_depth.py
================================================
import gym
import torch.nn as nn

from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
    ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_depth import (
    ArmPointNavDepth,
)
from projects.manipulathor_baselines.armpointnav_baselines.models.disjoint_arm_pointnav_models import (
    DisjointArmPointNavBaselineActorCritic,
)


class ArmPointNavDisjointDepth(ArmPointNavDepth):
    """An Object Navigation experiment configuration in iThor with RGB
    input."""

    TASK_SAMPLER = ArmPointNavTaskSampler

    def __init__(self):
        super().__init__()

        assert (
            self.CAMERA_WIDTH == 224
            and self.CAMERA_HEIGHT == 224
            and self.VISIBILITY_DISTANCE == 1
            and self.STEP_SIZE == 0.25
        )
        self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": True}

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return DisjointArmPointNavBaselineActorCritic(
            action_space=gym.spaces.Discrete(
                len(cls.TASK_SAMPLER._TASK_TYPE.class_action_names())
            ),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            hidden_size=512,
        )

    @classmethod
    def tag(cls):
        return cls.__name__


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_ithor_base.py
================================================
from abc import ABC

from allenact_plugins.manipulathor_plugin.armpointnav_constants import (
    TRAIN_OBJECTS,
    TEST_OBJECTS,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_thor_base import (
    ArmPointNavThorBaseConfig,
)


class ArmPointNaviThorBaseConfig(ArmPointNavThorBaseConfig, ABC):
    """The base config for all iTHOR ObjectNav experiments."""

    NUM_PROCESSES = 40
    # add all the arguments here
    TOTAL_NUMBER_SCENES = 30

    TRAIN_SCENES = [
        "FloorPlan{}_physics".format(str(i))
        for i in range(1, TOTAL_NUMBER_SCENES + 1)
        if (i % 3 == 1 or i % 3 == 0) and i != 28
    ]  # last scenes are really bad
    TEST_SCENES = [
        "FloorPlan{}_physics".format(str(i))
        for i in range(1, TOTAL_NUMBER_SCENES + 1)
        if i % 3 == 2 and i % 6 == 2
    ]
    VALID_SCENES = [
        "FloorPlan{}_physics".format(str(i))
        for i in range(1, TOTAL_NUMBER_SCENES + 1)
        if i % 3 == 2 and i % 6 == 5
    ]

    ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES

    assert (
        len(ALL_SCENES) == TOTAL_NUMBER_SCENES - 1
        and len(set(ALL_SCENES)) == TOTAL_NUMBER_SCENES - 1
    )

    OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS))

    UNSEEN_OBJECT_TYPES = tuple(sorted(TEST_OBJECTS))


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_no_vision.py
================================================
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_sensors import (
    NoVisionSensorThor,
    RelativeAgentArmToObjectSensor,
    RelativeObjectToGoalSensor,
    PickedUpObjSensor,
)
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
    ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (
    ArmPointNavMixInPPOConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (
    ArmPointNavMixInSimpleGRUConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (
    ArmPointNaviThorBaseConfig,
)


class ArmPointNavNoVision(
    ArmPointNaviThorBaseConfig,
    ArmPointNavMixInPPOConfig,
    ArmPointNavMixInSimpleGRUConfig,
):
    """An Object Navigation experiment configuration in iThor with RGB
    input."""

    SENSORS = [
        NoVisionSensorThor(
            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=False,
            uuid="rgb_lowres",
        ),
        RelativeAgentArmToObjectSensor(),
        RelativeObjectToGoalSensor(),
        PickedUpObjSensor(),
    ]

    MAX_STEPS = 200
    TASK_SAMPLER = ArmPointNavTaskSampler  #

    def __init__(self):
        super().__init__()

        assert (
            self.CAMERA_WIDTH == 224
            and self.CAMERA_HEIGHT == 224
            and self.VISIBILITY_DISTANCE == 1
            and self.STEP_SIZE == 0.25
        )
        self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": False}

    @classmethod
    def tag(cls):
        return cls.__name__


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgb.py
================================================
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_sensors import (
    RelativeAgentArmToObjectSensor,
    RelativeObjectToGoalSensor,
    PickedUpObjSensor,
)
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
    ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (
    ArmPointNavMixInPPOConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (
    ArmPointNavMixInSimpleGRUConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (
    ArmPointNaviThorBaseConfig,
)


class ArmPointNavRGB(
    ArmPointNaviThorBaseConfig,
    ArmPointNavMixInPPOConfig,
    ArmPointNavMixInSimpleGRUConfig,
):
    """An Object Navigation experiment configuration in iThor with RGB
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        RelativeAgentArmToObjectSensor(),
        RelativeObjectToGoalSensor(),
        PickedUpObjSensor(),
    ]

    MAX_STEPS = 200
    TASK_SAMPLER = ArmPointNavTaskSampler  #

    def __init__(self):
        super().__init__()

        assert (
            self.CAMERA_WIDTH == 224
            and self.CAMERA_HEIGHT == 224
            and self.VISIBILITY_DISTANCE == 1
            and self.STEP_SIZE == 0.25
        )
        self.ENV_ARGS = {**ENV_ARGS}

    @classmethod
    def tag(cls):
        return cls.__name__


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgbdepth.py
================================================
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS
from allenact_plugins.manipulathor_plugin.manipulathor_sensors import (
    DepthSensorThor,
    RelativeAgentArmToObjectSensor,
    RelativeObjectToGoalSensor,
    PickedUpObjSensor,
)
from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (
    ArmPointNavTaskSampler,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (
    ArmPointNavMixInPPOConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (
    ArmPointNavMixInSimpleGRUConfig,
)
from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (
    ArmPointNaviThorBaseConfig,
)


class ArmPointNavRGBDepth(
    ArmPointNaviThorBaseConfig,
    ArmPointNavMixInPPOConfig,
    ArmPointNavMixInSimpleGRUConfig,
):
    """An Object Navigation experiment configuration in iThor with RGB
    input."""

    SENSORS = [
        DepthSensorThor(
            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        RGBSensorThor(
            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        RelativeAgentArmToObjectSensor(),
        RelativeObjectToGoalSensor(),
        PickedUpObjSensor(),
    ]

    MAX_STEPS = 200
    TASK_SAMPLER = ArmPointNavTaskSampler  #

    def __init__(self):
        super().__init__()

        assert (
            self.CAMERA_WIDTH == 224
            and self.CAMERA_HEIGHT == 224
            and self.VISIBILITY_DISTANCE == 1
            and self.STEP_SIZE == 0.25
        )
        self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": True}

    @classmethod
    def tag(cls):
        return cls.__name__


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/__init__.py
================================================


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.py
================================================
"""Baseline models for use in the Arm Point Navigation task.

Arm Point Navigation is currently available as a Task in ManipulaTHOR.
"""

from typing import Tuple, Optional

import gym
import torch
from gym.spaces.dict import Dict as SpaceDict

from allenact.algorithms.onpolicy_sync.policy import (
    ActorCriticModel,
    LinearCriticHead,
    LinearActorHead,
    DistributionType,
    Memory,
    ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.embodiedai.models.basic_models import SimpleCNN, RNNStateEncoder
from projects.manipulathor_baselines.armpointnav_baselines.models.manipulathor_net_utils import (
    input_embedding_net,
)


class ArmPointNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]):
    """Baseline recurrent actor critic model for armpointnav task.

    # Attributes
    action_space : The space of actions available to the agent. Currently only discrete
        actions are allowed (so this space will always be of type `gym.spaces.Discrete`).
    observation_space : The observation space expected by the agent. This observation space
        should include (optionally) 'rgb' images and 'depth' images.
    hidden_size : The hidden size of the GRU RNN.
    object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal
        object type.
    """

    def __init__(
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        hidden_size=512,
        obj_state_embedding_size=512,
        trainable_masked_hidden_state: bool = False,
        num_rnn_layers=1,
        rnn_type="GRU",
    ):
        """Initializer.

        See class documentation for parameter definitions.
        """
        super().__init__(action_space=action_space, observation_space=observation_space)

        self._hidden_size = hidden_size
        self.object_type_embedding_size = obj_state_embedding_size

        sensor_names = self.observation_space.spaces.keys()
        self.visual_encoder = SimpleCNN(
            self.observation_space,
            self._hidden_size,
            rgb_uuid="rgb_lowres" if "rgb_lowres" in sensor_names else None,
            depth_uuid="depth_lowres" if "depth_lowres" in sensor_names else None,
        )

        if "rgb_lowres" in sensor_names and "depth_lowres" in sensor_names:
            input_visual_feature_num = 2
        elif "rgb_lowres" in sensor_names:
            input_visual_feature_num = 1
        elif "depth_lowres" in sensor_names:
            input_visual_feature_num = 1
        else:
            raise NotImplementedError

        self.state_encoder = RNNStateEncoder(
            self._hidden_size * input_visual_feature_num + obj_state_embedding_size,
            self._hidden_size,
            trainable_masked_hidden_state=trainable_masked_hidden_state,
            num_layers=num_rnn_layers,
            rnn_type=rnn_type,
        )

        self.actor = LinearActorHead(self._hidden_size, action_space.n)
        self.critic = LinearCriticHead(self._hidden_size)
        relative_dist_embedding_size = torch.Tensor([3, 100, obj_state_embedding_size])
        self.relative_dist_embedding = input_embedding_net(
            relative_dist_embedding_size.long().tolist(), dropout=0
        )

        self.train()

    @property
    def recurrent_hidden_state_size(self) -> int:
        """The recurrent hidden state size of the model."""
        return self._hidden_size

    @property
    def num_recurrent_layers(self) -> int:
        """Number of recurrent hidden layers."""
        return self.state_encoder.num_recurrent_layers

    def _recurrent_memory_specification(self):
        return dict(
            rnn=(
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
        )

    def get_relative_distance_embedding(
        self, state_tensor: torch.Tensor
    ) -> torch.FloatTensor:

        return self.relative_dist_embedding(state_tensor)

    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        """Processes input batched observations to produce new actor and critic
        values. Processes input batched observations (along with prior hidden
        states, previous actions, and masks denoting which recurrent hidden
        states should be masked) and returns an `ActorCriticOutput` object
        containing the model's policy (distribution over actions) and
        evaluation of the current state (value).

        # Parameters
        observations : Batched input observations.
        memory : `Memory` containing the hidden states from initial timepoints.
        prev_actions : Tensor of previous actions taken.
        masks : Masks applied to hidden states. See `RNNStateEncoder`.
        # Returns
        Tuple of the `ActorCriticOutput` and recurrent hidden state.
        """

        arm2obj_dist = self.get_relative_distance_embedding(
            observations["relative_agent_arm_to_obj"]
        )
        obj2goal_dist = self.get_relative_distance_embedding(
            observations["relative_obj_to_goal"]
        )

        perception_embed = self.visual_encoder(observations)

        pickup_bool = observations["pickedup_object"]
        after_pickup = pickup_bool == 1
        distances = arm2obj_dist
        distances[after_pickup] = obj2goal_dist[after_pickup]

        x = [distances, perception_embed]

        x_cat = torch.cat(x, dim=-1)
        x_out, rnn_hidden_states = self.state_encoder(
            x_cat, memory.tensor("rnn"), masks
        )

        actor_out = self.actor(x_out)
        critic_out = self.critic(x_out)
        actor_critic_output = ActorCriticOutput(
            distributions=actor_out, values=critic_out, extras={}
        )

        updated_memory = memory.set_tensor("rnn", rnn_hidden_states)

        return (
            actor_critic_output,
            updated_memory,
        )


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/base_models.py
================================================
import torch
import torch.nn as nn


class LinearActorHeadNoCategory(nn.Module):
    def __init__(self, num_inputs: int, num_outputs: int):
        super().__init__()

        self.linear = nn.Linear(num_inputs, num_outputs)
        nn.init.orthogonal_(self.linear.weight, gain=0.01)
        nn.init.constant_(self.linear.bias, 0)

    def forward(self, x: torch.FloatTensor):  # type: ignore
        x = self.linear(x)  # type:ignore
        assert len(x.shape) == 3
        return x


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.py
================================================
"""Baseline models for use in the Arm Point Navigation task.

Arm Point Navigation is currently available as a Task in ManipulaTHOR.
"""

from typing import Tuple, Optional

import gym
import torch
from gym.spaces.dict import Dict as SpaceDict

from allenact.algorithms.onpolicy_sync.policy import (
    ActorCriticModel,
    LinearCriticHead,
    DistributionType,
    Memory,
    ObservationType,
)
from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import ActorCriticOutput
from allenact.embodiedai.models.basic_models import SimpleCNN, RNNStateEncoder
from projects.manipulathor_baselines.armpointnav_baselines.models.base_models import (
    LinearActorHeadNoCategory,
)
from projects.manipulathor_baselines.armpointnav_baselines.models.manipulathor_net_utils import (
    input_embedding_net,
)


class DisjointArmPointNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]):
    """Disjoint Baseline recurrent actor critic model for armpointnav.

    # Attributes
    action_space : The space of actions available to the agent. Currently only discrete
        actions are allowed (so this space will always be of type `gym.spaces.Discrete`).
    observation_space : The observation space expected by the agent. This observation space
        should include (optionally) 'rgb' images and 'depth' images and is required to
        have a component corresponding to the goal `goal_sensor_uuid`.
    goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor`
        as an example of such a sensor.
    hidden_size : The hidden size of the GRU RNN.
    object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal
        object type.
    """

    def __init__(
        self,
        action_space: gym.spaces.Discrete,
        observation_space: SpaceDict,
        hidden_size=512,
        obj_state_embedding_size=512,
        trainable_masked_hidden_state: bool = False,
        num_rnn_layers=1,
        rnn_type="GRU",
    ):
        """Initializer.

        See class documentation for parameter definitions.
        """
        super().__init__(action_space=action_space, observation_space=observation_space)

        self._hidden_size = hidden_size
        self.object_type_embedding_size = obj_state_embedding_size

        self.visual_encoder_pick = SimpleCNN(
            self.observation_space,
            self._hidden_size,
            rgb_uuid=None,
            depth_uuid="depth_lowres",
        )
        self.visual_encoder_drop = SimpleCNN(
            self.observation_space,
            self._hidden_size,
            rgb_uuid=None,
            depth_uuid="depth_lowres",
        )

        self.state_encoder = RNNStateEncoder(
            self._hidden_size + obj_state_embedding_size,
            self._hidden_size,
            trainable_masked_hidden_state=trainable_masked_hidden_state,
            num_layers=num_rnn_layers,
            rnn_type=rnn_type,
        )

        self.actor_pick = LinearActorHeadNoCategory(self._hidden_size, action_space.n)
        self.critic_pick = LinearCriticHead(self._hidden_size)
        self.actor_drop = LinearActorHeadNoCategory(self._hidden_size, action_space.n)
        self.critic_drop = LinearCriticHead(self._hidden_size)

        # self.object_state_embedding = nn.Embedding(num_embeddings=6, embedding_dim=obj_state_embedding_size)

        relative_dist_embedding_size = torch.Tensor([3, 100, obj_state_embedding_size])
        self.relative_dist_embedding_pick = input_embedding_net(
            relative_dist_embedding_size.long().tolist(), dropout=0
        )
        self.relative_dist_embedding_drop = input_embedding_net(
            relative_dist_embedding_size.long().tolist(), dropout=0
        )

        self.train()

    @property
    def recurrent_hidden_state_size(self) -> int:
        """The recurrent hidden state size of the model."""
        return self._hidden_size

    @property
    def num_recurrent_layers(self) -> int:
        """Number of recurrent hidden layers."""
        return self.state_encoder.num_recurrent_layers

    def _recurrent_memory_specification(self):
        return dict(
            rnn=(
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
        )

    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        """Processes input batched observations to produce new actor and critic
        values. Processes input batched observations (along with prior hidden
        states, previous actions, and masks denoting which recurrent hidden
        states should be masked) and returns an `ActorCriticOutput` object
        containing the model's policy (distribution over actions) and
        evaluation of the current state (value).

        # Parameters
        observations : Batched input observations.
        memory : `Memory` containing the hidden states from initial timepoints.
        prev_actions : Tensor of previous actions taken.
        masks : Masks applied to hidden states. See `RNNStateEncoder`.
        # Returns
        Tuple of the `ActorCriticOutput` and recurrent hidden state.
        """

        arm2obj_dist = self.relative_dist_embedding_pick(
            observations["relative_agent_arm_to_obj"]
        )
        obj2goal_dist = self.relative_dist_embedding_drop(
            observations["relative_obj_to_goal"]
        )

        perception_embed_pick = self.visual_encoder_pick(observations)
        perception_embed_drop = self.visual_encoder_drop(observations)

        pickup_bool = observations["pickedup_object"]
        after_pickup = pickup_bool == 1
        distances = arm2obj_dist
        distances[after_pickup] = obj2goal_dist[after_pickup]

        perception_embed = perception_embed_pick
        perception_embed[after_pickup] = perception_embed_drop[after_pickup]

        x = [distances, perception_embed]

        x_cat = torch.cat(x, dim=-1)  # type: ignore
        x_out, rnn_hidden_states = self.state_encoder(
            x_cat, memory.tensor("rnn"), masks
        )
        actor_out_pick = self.actor_pick(x_out)
        critic_out_pick = self.critic_pick(x_out)

        actor_out_drop = self.actor_drop(x_out)
        critic_out_drop = self.critic_drop(x_out)

        actor_out = actor_out_pick
        actor_out[after_pickup] = actor_out_drop[after_pickup]
        critic_out = critic_out_pick
        critic_out[after_pickup] = critic_out_drop[after_pickup]

        actor_out = CategoricalDistr(logits=actor_out)
        actor_critic_output = ActorCriticOutput(
            distributions=actor_out, values=critic_out, extras={}
        )
        updated_memory = memory.set_tensor("rnn", rnn_hidden_states)

        return (
            actor_critic_output,
            updated_memory,
        )


================================================
FILE: projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.py
================================================
import pdb

import torch.nn as nn
import torch.nn.functional as F


def upshuffle(
    in_planes, out_planes, upscale_factor, kernel_size=3, stride=1, padding=1
):
    return nn.Sequential(
        nn.Conv2d(
            in_planes,
            out_planes * upscale_factor**2,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
        ),
        nn.PixelShuffle(upscale_factor),
        nn.LeakyReLU(),
    )


def upshufflenorelu(
    in_planes, out_planes, upscale_factor, kernel_size=3, stride=1, padding=1
):
    return nn.Sequential(
        nn.Conv2d(
            in_planes,
            out_planes * upscale_factor**2,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
        ),
        nn.PixelShuffle(upscale_factor),
    )


def combine_block_w_bn(in_planes, out_planes):
    return nn.Sequential(
        nn.Conv2d(in_planes, out_planes, 1, 1),
        nn.BatchNorm2d(out_planes),
        nn.LeakyReLU(),
    )


def conv2d_block(in_planes, out_planes, kernel_size, stride=1, padding=1):
    return nn.Sequential(
        nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride, padding=padding),
        nn.BatchNorm2d(out_planes),
        nn.LeakyReLU(),
        nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(out_planes),
    )


def combine_block_w_do(in_planes, out_planes, dropout=0.0):
    return nn.Sequential(
        nn.Conv2d(in_planes, out_planes, 1, 1),
        nn.LeakyReLU(),
        nn.Dropout(dropout),
    )


def combine_block_no_do(in_planes, out_planes):
    return nn.Sequential(
        nn.Conv2d(in_planes, out_planes, 1, 1),
        nn.LeakyReLU(),
    )


def linear_block(in_features, out_features, dropout=0.0):
    return nn.Sequential(
        nn.Linear(in_features, out_features),
        nn.LeakyReLU(),
        nn.Dropout(dropout),
    )


def linear_block_norelu(in_features, out_features):
    return nn.Sequential(
        nn.Linear(in_features, out_features),
    )


def input_embedding_net(list_of_feature_sizes, dropout=0.0):
    modules = []
    for i in range(len(list_of_feature_sizes) - 1):
        input_size, output_size = list_of_feature_sizes[i : i + 2]
        if i + 2 == len(list_of_feature_sizes):
            modules.append(linear_block_norelu(input_size, output_size))
        else:
            modules.append(linear_block(input_size, output_size, dropout=dropout))
    return nn.Sequential(*modules)


def _upsample_add(x, y):
    _, _, H, W = y.size()
    return F.upsample(x, size=(H, W), mode="bilinear") + y


def replace_all_relu_w_leakyrelu(model):
    pdb.set_trace()
    print("Not sure if using this is a good idea")
    modules = model._modules
    for m in modules.keys():
        module = modules[m]
        if isinstance(module, nn.ReLU):
            model._modules[m] = nn.LeakyReLU()
        elif isinstance(module, nn.Module):
            model._modules[m] = replace_all_relu_w_leakyrelu(module)
    return model


def replace_all_leakyrelu_w_relu(model):
    modules = model._modules
    for m in modules.keys():
        module = modules[m]
        if isinstance(module, nn.LeakyReLU):
            model._modules[m] = nn.ReLU()
        elif isinstance(module, nn.Module):
            model._modules[m] = replace_all_leakyrelu_w_relu(module)
    return model


def replace_all_bn_w_groupnorm(model):
    pdb.set_trace()
    print("Not sure if using this is a good idea")
    modules = model._modules
    for m in modules.keys():
        module = modules[m]
        if isinstance(module, nn.BatchNorm2d) or isinstance(module, nn.BatchNorm1d):
            feature_number = module.num_features
            model._modules[m] = nn.GroupNorm(32, feature_number)
        elif isinstance(module, nn.BatchNorm3d):
            raise Exception("Not implemented")
        elif isinstance(module, nn.Module):
            model._modules[m] = replace_all_bn_w_groupnorm(module)
    return model


def flat_temporal(tensor, batch_size, sequence_length):
    tensor_shape = [s for s in tensor.shape]
    assert tensor_shape[0] == batch_size and tensor_shape[1] == sequence_length
    result_shape = [batch_size * sequence_length] + tensor_shape[2:]
    return tensor.contiguous().view(result_shape)


def unflat_temporal(tensor, batch_size, sequence_length):
    tensor_shape = [s for s in tensor.shape]
    assert tensor_shape[0] == batch_size * sequence_length
    result_shape = [batch_size, sequence_length] + tensor_shape[1:]
    return tensor.contiguous().view(result_shape)


================================================
FILE: projects/objectnav_baselines/README.md
================================================
# Baseline models ObjectNav (for RoboTHOR/iTHOR)

This project contains the code for training baseline models for the ObjectNav task. In ObjectNav, the agent
spawns at a location in an environment and is tasked to explore the environment until it finds an object of a
certain type (such as TV or Basketball). Once the agent is confident that it has the object within sight
it executes the `END` action which terminates the episode. If the agent is within a set
distance to the target (in our case 1.0 meters) and the target is visible within its observation frame
the agent succeeded, otherwise it failed.

Provided are experiment configs for training a simple convolutional model with
an GRU using `RGB`, `Depth` or `RGB-D` (i.e. `RGB+Depth`) as inputs in
[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).

The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm. For the RoboTHOR environment we also have and experiment
(`objectnav_robothor_rgb_resnetgru_dagger.py`) showing how a model can be trained using DAgger,
a form of imitation learning.

To train an experiment run the following command from the `allenact` root directory:

```bash
python main.py <PATH_TO_EXPERIMENT_CONFIG> -o <PATH_TO_OUTPUT> -c
```

Where `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights
and logs to be stored and `<PATH_TO_EXPERIMENT_CONFIG>` is the path to the python file containing
the experiment configuration. An example usage of this command would be:

```bash
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet_ddppo.py -o storage/objectnav-robothor-rgb
```

This trains a simple convolutional neural network with a GRU using RGB input 
passed through a pretrained ResNet-18 visual encoder on the
PointNav task in the RoboTHOR environment and stores the model weights and logs
to `storage/pointnav-robothor-rgb`.

## RoboTHOR ObjectNav 2021 Challenge

The experiment configs found under the `projects/objectnav_baselines/experiments/robothor` directory are designed
to conform to the requirements of the [RoboTHOR ObjectNav 2021 Challenge](https://ai2thor.allenai.org/robothor/cvpr-2021-challenge).

### Training a baseline
To train a baseline ResNet->GRU model taking RGB-D inputs, run the following command
```bash
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet_ddppo.py -o storage/objectnav-robothor-rgbd
```
By default, when using a machine with a GPU, the above experiment will attempt to train using 60 parallel processes
across all available GPUs. See the `TRAIN_GPU_IDS` constant in `experiments/objectnav_thor_base.py` and
the `NUM_PROCESSES` constant in `experiments/robothor/objectnav_robothor_base.py` if you'd like to change which
GPUs are used or how many processes are run respectively.

### Downloading our pretrained model checkpoint
We provide a pretrained model obtained allowing the above command to run for all 300M training steps and then selecting
the model checkpoint with best validation-set performance (for us occuring at ~170M training steps). You can download 
this model checkpoint by running
```bash
bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021
```
from the top-level directory. This will download the pretrained model weights and save them at the path
```bash
pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt
```

### Running inference on the pretrained model

You can run inference on the above pretrained model (on the test dataset) by running
```bash
export SAVED_MODEL_PATH=pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt
python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.py -c $SAVED_MODEL_PATH --eval
```
To discourage "cheating", the test dataset has been scrubbed of the information needed to actually compute the success rate / SPL
of your model and so running the above will only save the trajectories your models take. To evaluate these
trajectories you will have to submit them to our leaderboard, see [here for more details](https://github.com/allenai/robothor-challenge/).
If you'd like to get a sense of if your model is doing well before submitting to the leaderboard, you can obtain the 
success rate / SPL of it on our validation dataset. To do this, you can simply comment-out the line
```python
    TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/test")
```
within the `projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py` file and rerun the above
`python main.py ...` command (when the test dataset is not given, the code defaults to using the validation set).

================================================
FILE: projects/objectnav_baselines/__init__.py
================================================


================================================
FILE: projects/objectnav_baselines/experiments/__init__.py
================================================


================================================
FILE: projects/objectnav_baselines/experiments/clip/__init__.py
================================================


================================================
FILE: projects/objectnav_baselines/experiments/clip/mixins.py
================================================
from typing import Sequence, Union, Type, Tuple, Optional, Dict, Any

import attr
import gym
import numpy as np
import torch
import torch.nn as nn

from allenact.base_abstractions.distributions import CategoricalDistr
from allenact.base_abstractions.misc import (
    ObservationType,
    Memory,
    ActorCriticOutput,
    DistributionType,
)
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import Sensor
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import Builder
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.navigation_plugin.objectnav.models import (
    ResnetTensorNavActorCritic,
)


class LookDownFirstResnetTensorNavActorCritic(ResnetTensorNavActorCritic):
    def __init__(self, look_down_action_index: int, **kwargs):
        super().__init__(**kwargs)

        self.look_down_action_index = look_down_action_index
        self.register_buffer(
            "look_down_delta", torch.zeros(1, 1, self.action_space.n), persistent=False
        )
        self.look_down_delta[0, 0, self.look_down_action_index] = 99999

    def forward(  # type:ignore
        self,
        observations: ObservationType,
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        ac_out, memory = super(LookDownFirstResnetTensorNavActorCritic, self).forward(
            **prepare_locals_for_super(locals())
        )

        logits = ac_out.distributions.logits * masks + self.look_down_delta * (
            1 - masks
        )
        ac_out = ActorCriticOutput(
            distributions=CategoricalDistr(logits=logits),
            values=ac_out.values,
            extras=ac_out.extras,
        )

        return ac_out, memory


@attr.s(kw_only=True)
class ClipResNetPreprocessGRUActorCriticMixin:
    sensors: Sequence[Sensor] = attr.ib()
    clip_model_type: str = attr.ib()
    screen_size: int = attr.ib()
    goal_sensor_type: Type[Optional[Sensor]] = attr.ib()
    pool: bool = attr.ib(default=False)

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        preprocessors = []

        rgb_sensor = next((s for s in self.sensors if isinstance(s, RGBSensor)), None)
        assert (
            np.linalg.norm(
                np.array(rgb_sensor._norm_means)
                - np.array(ClipResNetPreprocessor.CLIP_RGB_MEANS)
            )
            < 1e-5
        )
        assert (
            np.linalg.norm(
                np.array(rgb_sensor._norm_sds)
                - np.array(ClipResNetPreprocessor.CLIP_RGB_STDS)
            )
            < 1e-5
        )

        if rgb_sensor is not None:
            preprocessors.append(
                ClipResNetPreprocessor(
                    rgb_input_uuid=rgb_sensor.uuid,
                    clip_model_type=self.clip_model_type,
                    pool=self.pool,
                    output_uuid="rgb_clip_resnet",
                    input_img_height_width=(rgb_sensor.height, rgb_sensor.width),
                )
            )

        depth_sensor = next(
            (s for s in self.sensors if isinstance(s, DepthSensor)), None
        )
        if depth_sensor is not None:
            preprocessors.append(
                ClipResNetPreprocessor(
                    rgb_input_uuid=depth_sensor.uuid,
                    clip_model_type=self.clip_model_type,
                    pool=self.pool,
                    output_uuid="depth_clip_resnet",
                    input_img_height_width=(depth_sensor.height, depth_sensor.width),
                )
            )

        return preprocessors

    def create_model(
        self,
        num_actions: int,
        add_prev_actions: bool,
        look_down_first: bool = False,
        look_down_action_index: Optional[int] = None,
        hidden_size: int = 512,
        rnn_type="GRU",
        model_kwargs: Optional[Dict[str, Any]] = None,
        **kwargs
    ) -> nn.Module:
        has_rgb = any(isinstance(s, RGBSensor) for s in self.sensors)
        has_depth = any(isinstance(s, DepthSensor) for s in self.sensors)

        goal_sensor_uuid = next(
            (s.uuid for s in self.sensors if isinstance(s, self.goal_sensor_type)),
            None,
        )

        if model_kwargs is None:
            model_kwargs = {}

        model_kwargs = dict(
            action_space=gym.spaces.Discrete(num_actions),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            goal_sensor_uuid=goal_sensor_uuid,
            rgb_resnet_preprocessor_uuid="rgb_clip_resnet" if has_rgb else None,
            depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None,
            hidden_size=hidden_size,
            goal_dims=32,
            add_prev_actions=add_prev_actions,
            rnn_type=rnn_type,
            **model_kwargs
        )

        if not look_down_first:
            return ResnetTensorNavActorCritic(**model_kwargs)
        else:
            return LookDownFirstResnetTensorNavActorCritic(
                look_down_action_index=look_down_action_index, **model_kwargs
            )


================================================
FILE: projects/objectnav_baselines/experiments/habitat/__init__.py
================================================


================================================
FILE: projects/objectnav_baselines/experiments/habitat/clip/__init__.py
================================================


================================================
FILE: projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn
from torch.distributions.utils import lazy_property

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.habitat_plugin.habitat_sensors import (
    RGBSensorHabitat,
    TargetObjectSensorHabitat,
)
from projects.objectnav_baselines.experiments.clip.mixins import (
    ClipResNetPreprocessGRUActorCriticMixin,
)
from projects.objectnav_baselines.experiments.habitat.objectnav_habitat_base import (
    ObjectNavHabitatBaseConfig,
)
from projects.objectnav_baselines.mixins import ObjectNavPPOMixin


class ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig(
    ObjectNavHabitatBaseConfig
):
    """An Object Navigation experiment configuration in Habitat."""

    CLIP_MODEL_TYPE = "RN50"

    def __init__(self, lr: float, **kwargs):
        super().__init__(**kwargs)

        self.lr = lr

        self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            clip_model_type=self.CLIP_MODEL_TYPE,
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=TargetObjectSensorHabitat,
        )

    @lazy_property
    def SENSORS(self):
        return [
            RGBSensorHabitat(
                height=ObjectNavHabitatBaseConfig.SCREEN_SIZE,
                width=ObjectNavHabitatBaseConfig.SCREEN_SIZE,
                use_resnet_normalization=True,
                mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
                stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
            ),
            TargetObjectSensorHabitat(len(self.DEFAULT_OBJECT_CATEGORIES_TO_IND)),
        ]

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            lr=self.lr,
            auxiliary_uuids=self.auxiliary_uuids,
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n,
            add_prev_actions=self.add_prev_actions,
            auxiliary_uuids=self.auxiliary_uuids,
            **kwargs,
        )

    def tag(self):
        return (
            f"{super(ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig, self).tag()}"
            f"-RGB-ClipResNet50GRU-DDPPO-lr{self.lr}"
        )


================================================
FILE: projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py
================================================
import torch
import torch.optim as optim

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
    Builder,
    TrainingPipeline,
    PipelineStage,
    TrainingSettings,
)
from projects.objectnav_baselines.experiments.habitat.clip.objectnav_habitat_rgb_clipresnet50gru_ddppo import (
    ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig,
)
from projects.objectnav_baselines.mixins import update_with_auxiliary_losses


class ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig(
    ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig
):
    def __init__(self, lr=1e-4, **kwargs):
        super().__init__(lr, **kwargs)
        self.lr = lr

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        auxiliary_uuids = self.auxiliary_uuids
        multiple_beliefs = False
        normalize_advantage = False
        advance_scene_rollout_period = self.ADVANCE_SCENE_ROLLOUT_PERIOD
        log_interval_small = (
            self.num_train_processes * 32 * 10 if torch.cuda.is_available() else 1
        )
        log_interval_med = (
            self.num_train_processes * 64 * 5 if torch.cuda.is_available() else 1
        )
        log_interval_large = (
            self.num_train_processes * 128 * 5 if torch.cuda.is_available() else 1
        )

        batch_steps_0 = int(10e6)
        batch_steps_1 = int(10e6)
        batch_steps_2 = int(1e9) - batch_steps_0 - batch_steps_1

        lr = self.lr
        num_mini_batch = 1
        update_repeats = 4
        save_interval = 5000000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        named_losses = {
            "ppo_loss": (PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0)
        }
        named_losses = update_with_auxiliary_losses(
            named_losses=named_losses,
            auxiliary_uuids=auxiliary_uuids,
            multiple_beliefs=multiple_beliefs,
        )

        return TrainingPipeline(
            save_interval=save_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            named_losses={key: val[0] for key, val in named_losses.items()},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=advance_scene_rollout_period,
            pipeline_stages=[
                PipelineStage(
                    loss_names=list(named_losses.keys()),
                    max_stage_steps=batch_steps_0,
                    training_settings=TrainingSettings(
                        num_steps=32, metric_accumulate_interval=log_interval_small
                    ),
                ),
                PipelineStage(
                    loss_names=list(named_losses.keys()),
                    max_stage_steps=batch_steps_1,
                    training_settings=TrainingSettings(
                        num_steps=64,
                        metric_accumulate_interval=log_interval_med,
                    ),
                ),
                PipelineStage(
                    loss_names=list(named_losses.keys()),
                    max_stage_steps=batch_steps_2,
                    training_settings=TrainingSettings(
                        num_steps=128,
                        metric_accumulate_interval=log_interval_large,
                    ),
                ),
            ],
            lr_scheduler_builder=None,
        )

    def tag(self):
        return (
            super(
                ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig, self
            )
            .tag()
            .replace("-DDPPO-lr", "-DDPPO-IncRollouts-lr")
        )


================================================
FILE: projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
================================================
import glob
import math
import os
import warnings
from abc import ABC
from typing import Dict, Any, List, Optional, Sequence, Union, Tuple

import gym
import numpy as np
import torch
from torch.distributions.utils import lazy_property

# noinspection PyUnresolvedReferences
import habitat
from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import (
    SensorPreprocessorGraph,
    Preprocessor,
)
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins, Builder
from allenact.utils.system import get_logger
from allenact_plugins.habitat_plugin.habitat_constants import (
    HABITAT_DATASETS_DIR,
    HABITAT_CONFIGS_DIR,
    HABITAT_SCENE_DATASETS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_task_samplers import ObjectNavTaskSampler
from allenact_plugins.habitat_plugin.habitat_tasks import ObjectNavTask
from allenact_plugins.habitat_plugin.habitat_utils import (
    get_habitat_config,
    construct_env_configs,
)
from projects.objectnav_baselines.experiments.objectnav_base import ObjectNavBaseConfig


def create_objectnav_config(
    config_yaml_path: str,
    mode: str,
    scenes_path: str,
    simulator_gpu_ids: Sequence[int],
    rotation_degrees: float,
    step_size: float,
    max_steps: int,
    num_processes: int,
    camera_width: int,
    camera_height: int,
    using_rgb: bool,
    using_depth: bool,
    training: bool,
    num_episode_sample: int,
    horizontal_fov: Optional[int] = None,
) -> habitat.Config:
    config = get_habitat_config(config_yaml_path)

    config.defrost()
    config.NUM_PROCESSES = num_processes
    config.SIMULATOR_GPU_IDS = simulator_gpu_ids
    config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR

    config.DATASET.DATA_PATH = scenes_path

    config.SIMULATOR.AGENT_0.SENSORS = []
    if using_rgb:
        config.SIMULATOR.AGENT_0.SENSORS.append("RGB_SENSOR")
    if using_depth:
        config.SIMULATOR.AGENT_0.SENSORS.append("DEPTH_SENSOR")

    config.SIMULATOR.RGB_SENSOR.WIDTH = camera_width
    config.SIMULATOR.RGB_SENSOR.HEIGHT = camera_height
    config.SIMULATOR.DEPTH_SENSOR.WIDTH = camera_width
    config.SIMULATOR.DEPTH_SENSOR.HEIGHT = camera_height
    config.SIMULATOR.SEMANTIC_SENSOR.WIDTH = camera_width
    config.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = camera_height

    if horizontal_fov is not None:
        config.SIMULATOR.RGB_SENSOR.HFOV = horizontal_fov
        config.SIMULATOR.DEPTH_SENSOR.HFOV = horizontal_fov
        config.SIMULATOR.SEMANTIC_SENSOR.HFOV = horizontal_fov

    assert rotation_degrees == config.SIMULATOR.TURN_ANGLE
    assert step_size == config.SIMULATOR.FORWARD_STEP_SIZE
    assert max_steps == config.ENVIRONMENT.MAX_EPISODE_STEPS
    config.SIMULATOR.MAX_EPISODE_STEPS = max_steps

    assert config.TASK.TYPE == "ObjectNav-v1"

    assert config.TASK.SUCCESS.SUCCESS_DISTANCE == 0.1
    assert config.TASK.DISTANCE_TO_GOAL.DISTANCE_TO == "VIEW_POINTS"

    config.TASK.SENSORS = ["OBJECTGOAL_SENSOR", "COMPASS_SENSOR", "GPS_SENSOR"]
    config.TASK.GOAL_SENSOR_UUID = "objectgoal"
    config.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL", "SOFT_SPL"]

    if not training:
        config.SEED = 0
        config.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False

    if num_episode_sample > 0:
        config.ENVIRONMENT.ITERATOR_OPTIONS.NUM_EPISODE_SAMPLE = num_episode_sample

    config.MODE = mode

    config.freeze()

    return config


class ObjectNavHabitatBaseConfig(ObjectNavBaseConfig, ABC):
    """The base config for all Habitat ObjectNav experiments."""

    # selected auxiliary uuids
    ## if comment all the keys, then it's vanilla DD-PPO
    _AUXILIARY_UUIDS = [
        # InverseDynamicsLoss.UUID,
        # TemporalDistanceLoss.UUID,
        # CPCA1Loss.UUID,
        # CPCA4Loss.UUID,
        # CPCA8Loss.UUID,
        # CPCA16Loss.UUID,
    ]
    MULTIPLE_BELIEFS = False
    BELIEF_FUSION = (  # choose one
        None
        # AttentiveFusion
        # AverageFusion
        # SoftmaxFusion
    )

    FAILED_END_REWARD = -1.0

    ACTION_SPACE = gym.spaces.Discrete(len(ObjectNavTask.class_action_names()))

    DEFAULT_NUM_TRAIN_PROCESSES = (
        5 * torch.cuda.device_count() if torch.cuda.is_available() else 1
    )
    DEFAULT_NUM_TEST_PROCESSES = 11

    DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))
    DEFAULT_VALID_GPU_IDS = [torch.cuda.device_count() - 1]
    DEFAULT_TEST_GPU_IDS = tuple(range(torch.cuda.device_count()))

    def __init__(
        self,
        scene_dataset: str,  # Should be "mp3d" or "hm3d"
        debug: bool = False,
        num_train_processes: Optional[int] = None,
        num_test_processes: Optional[int] = None,
        test_on_validation: bool = False,
        run_valid: bool = True,
        train_gpu_ids: Optional[Sequence[int]] = None,
        val_gpu_ids: Optional[Sequence[int]] = None,
        test_gpu_ids: Optional[Sequence[int]] = None,
        add_prev_actions: bool = False,
        look_constraints: Optional[Tuple[int, int]] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)

        self.scene_dataset = scene_dataset
        self.debug = debug

        assert look_constraints is None or all(
            lc in [0, 1, 2, 3] for lc in look_constraints
        ), "Look constraints limit the number of times agents can look up/down when starting from the horizon line."
        assert (
            look_constraints is None or look_constraints[1] > 0
        ), "The agent must be allowed to look down from the horizon at least once."
        self.look_constraints = look_constraints

        def v_or_default(v, default):
            return v if v is not None else default

        self.num_train_processes = v_or_default(
            num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES
        )
        self.num_test_processes = v_or_default(
            num_test_processes, (10 if torch.cuda.is_available() else 1)
        )
        self.test_on_validation = test_on_validation
        self.run_valid = run_valid
        self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS)
        self.val_gpu_ids = v_or_default(
            val_gpu_ids, self.DEFAULT_VALID_GPU_IDS if run_valid else []
        )
        self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)
        self.add_prev_actions = add_prev_actions

        self.auxiliary_uuids = self._AUXILIARY_UUIDS

    def _create_config(
        self,
        mode: str,
        scenes_path: str,
        num_processes: int,
        simulator_gpu_ids: Sequence[int],
        training: bool = True,
        num_episode_sample: int = -1,
    ):
        return create_objectnav_config(
            config_yaml_path=self.BASE_CONFIG_YAML_PATH,
            mode=mode,
            scenes_path=scenes_path,
            simulator_gpu_ids=simulator_gpu_ids,
            rotation_degrees=self.ROTATION_DEGREES,
            step_size=self.STEP_SIZE,
            max_steps=self.MAX_STEPS,
            num_processes=num_processes,
            camera_width=self.CAMERA_WIDTH,
            camera_height=self.CAMERA_HEIGHT,
            horizontal_fov=self.HORIZONTAL_FIELD_OF_VIEW,
            using_rgb=any(isinstance(s, RGBSensor) for s in self.SENSORS),
            using_depth=any(isinstance(s, DepthSensor) for s in self.SENSORS),
            training=training,
            num_episode_sample=num_episode_sample,
        )

    @lazy_property
    def DEFAULT_OBJECT_CATEGORIES_TO_IND(self):
        if self.scene_dataset == "mp3d":
            return {
                "chair": 0,
                "table": 1,
                "picture": 2,
                "cabinet": 3,
                "cushion": 4,
                "sofa": 5,
                "bed": 6,
                "chest_of_drawers": 7,
                "plant": 8,
                "sink": 9,
                "toilet": 10,
                "stool": 11,
                "towel": 12,
                "tv_monitor": 13,
                "shower": 14,
                "bathtub": 15,
                "counter": 16,
                "fireplace": 17,
                "gym_equipment": 18,
                "seating": 19,
                "clothes": 20,
            }
        elif self.scene_dataset == "hm3d":
            return {
                "chair": 0,
                "bed": 1,
                "plant": 2,
                "toilet": 3,
                "tv_monitor": 4,
                "sofa": 5,
            }
        else:
            raise NotImplementedError

    @lazy_property
    def TASK_DATA_DIR_TEMPLATE(self):
        return os.path.join(
            HABITAT_DATASETS_DIR, f"objectnav/{self.scene_dataset}/v1/{{}}/{{}}.json.gz"
        )

    @lazy_property
    def BASE_CONFIG_YAML_PATH(self):
        return os.path.join(
            HABITAT_CONFIGS_DIR, f"tasks/objectnav_{self.scene_dataset}.yaml"
        )

    @lazy_property
    def TRAIN_CONFIG(self):
        return self._create_config(
            mode="train",
            scenes_path=self.train_scenes_path(),
            num_processes=self.num_train_processes,
            simulator_gpu_ids=self.train_gpu_ids,
            training=True,
        )

    @lazy_property
    def VALID_CONFIG(self):
        return self._create_config(
            mode="validate",
            scenes_path=self.valid_scenes_path(),
            num_processes=1,
            simulator_gpu_ids=self.val_gpu_ids,
            training=False,
            num_episode_sample=200,
        )

    @lazy_property
    def TEST_CONFIG(self):
        return self._create_config(
            mode="validate",
            scenes_path=self.test_scenes_path(),
            num_processes=self.num_test_processes,
            simulator_gpu_ids=self.test_gpu_ids,
            training=False,
        )

    @lazy_property
    def TRAIN_CONFIGS_PER_PROCESS(self):
        configs = construct_env_configs(self.TRAIN_CONFIG, allow_scene_repeat=True)

        if len(self.train_gpu_ids) >= 2:
            scenes_dir = configs[0].DATASET.SCENES_DIR
            memory_use_per_config = []
            for config in configs:
                assert (
                    len(config.DATASET.CONTENT_SCENES) == 1
                ), config.DATASET.CONTENT_SCENES
                scene_name = config.DATASET.CONTENT_SCENES[0]

                paths = glob.glob(
                    os.path.join(
                        scenes_dir, self.scene_dataset, "**", f"{scene_name}.*"
                    ),
                    recursive=True,
                )

                if self.scene_dataset == "mp3d":
                    assert len(paths) == 4
                else:
                    assert len(paths) == 2

                memory_use_per_config.append(sum(os.path.getsize(p) for p in paths))

            max_configs_per_device = math.ceil(len(configs) / len(self.train_gpu_ids))
            mem_per_device = np.array([0.0 for _ in range(len(self.train_gpu_ids))])
            configs_per_device = [[] for _ in range(len(mem_per_device))]
            for mem, config in sorted(
                list(zip(memory_use_per_config, configs)), key=lambda x: x[0]
            ):
                ind = int(np.argmin(mem_per_device))
                config.defrost()
                config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = self.train_gpu_ids[ind]
                config.freeze()
                configs_per_device[ind].append(config)

                mem_per_device[ind] += mem
                if len(configs_per_device[ind]) >= max_configs_per_device:
                    mem_per_device[ind] = float("inf")

            configs_per_device.sort(key=lambda x: len(x))
            configs = sum(configs_per_device, [])

        if self.debug:
            warnings.warn(
                "IN DEBUG MODE, WILL ONLY USE `1LXtFkjw3qL` SCENE IN MP3D OR `1S7LAXRdDqK` scene in HM3D!!!"
            )
            for config in configs:
                config.defrost()
                if self.scene_dataset == "mp3d":
                    config.DATASET.CONTENT_SCENES = ["1LXtFkjw3qL"]
                elif self.scene_dataset == "hm3d":
                    config.DATASET.CONTENT_SCENES = ["1S7LAXRdDqK"]
                else:
                    raise NotImplementedError
                config.freeze()
        return configs

    @lazy_property
    def TEST_CONFIG_PER_PROCESS(self):
        return construct_env_configs(self.TEST_CONFIG, allow_scene_repeat=False)

    def train_scenes_path(self):
        return self.TASK_DATA_DIR_TEMPLATE.format(*(["train"] * 2))

    def valid_scenes_path(self):
        return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2))

    def test_scenes_path(self):
        get_logger().warning("Running tests on the validation set!")
        return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2))
        # return self.TASK_DATA_DIR_TEMPLATE.format(*(["test"] * 2))

    def tag(self):
        t = f"ObjectNav-Habitat-{self.scene_dataset.upper()}"
        if self.add_prev_actions:
            t = f"{t}-PrevActions"

        if self.look_constraints is not None:
            t = f"{t}-Look{','.join(map(str, self.look_constraints))}"

        return t

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return tuple()

    def machine_params(self, mode="train", **kwargs):
        has_gpus = torch.cuda.is_available()
        if not has_gpus:
            gpu_ids = []
            nprocesses = 1
        elif mode == "train":
            gpu_ids = self.train_gpu_ids
            nprocesses = self.num_train_processes
        elif mode == "valid":
            gpu_ids = self.val_gpu_ids
            nprocesses = 1 if self.run_valid else 0
        elif mode == "test":
            gpu_ids = self.test_gpu_ids
            nprocesses = self.num_test_processes
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        if has_gpus:
            nprocesses = evenly_distribute_count_into_bins(nprocesses, len(gpu_ids))

        sensor_preprocessor_graph = (
            SensorPreprocessorGraph(
                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
                preprocessors=self.preprocessors(),
            )
            if mode == "train"
            or (
                (isinstance(nprocesses, int) and nprocesses > 0)
                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
            )
            else None
        )

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    def make_sampler_fn(self, **kwargs) -> TaskSampler:
        return ObjectNavTaskSampler(
            task_kwargs={
                "look_constraints": self.look_constraints,
            },
            **{"failed_end_reward": self.FAILED_END_REWARD, **kwargs},  # type: ignore
        )

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind]
        return {
            "env_config": config,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": self.ACTION_SPACE,
        }

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        if total_processes != 1:
            raise NotImplementedError(
                "In validation, `total_processes` must equal 1 for habitat tasks"
            )
        return {
            "env_config": self.VALID_CONFIG,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(
                len(ObjectNavTask.class_action_names())
            ),
        }

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        config = self.TEST_CONFIG_PER_PROCESS[process_ind]
        return {
            "env_config": config,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(
                len(ObjectNavTask.class_action_names())
            ),
        }


================================================
FILE: projects/objectnav_baselines/experiments/ithor/__init__.py
================================================


================================================
FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_base.py
================================================
import os
from abc import ABC

import torch

from projects.objectnav_baselines.experiments.objectnav_thor_base import (
    ObjectNavThorBaseConfig,
)


class ObjectNaviThorBaseConfig(ObjectNavThorBaseConfig, ABC):
    """The base config for all iTHOR ObjectNav experiments."""

    THOR_COMMIT_ID = "9549791ce2e7f472063a10abb1fb7664159fec23"
    AGENT_MODE = "default"

    DEFAULT_NUM_TRAIN_PROCESSES = 40 if torch.cuda.is_available() else 1

    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train")
    VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val")

    TARGET_TYPES = tuple(
        sorted(
            [
                "AlarmClock",
                "Apple",
                "Book",
                "Bowl",
                "Box",
                "Candle",
                "GarbageCan",
                "HousePlant",
                "Laptop",
                "SoapBottle",
                "Television",
                "Toaster",
            ],
        )
    )


================================================
FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import (
    ObjectNaviThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ResNetPreprocessGRUActorCriticMixin,
    ObjectNavPPOMixin,
)


class ObjectNaviThorDepthPPOExperimentConfig(ObjectNaviThorBaseConfig):
    """An Object Navigation experiment configuration in iThor with Depth
    input."""

    SENSORS = (
        DepthSensorThor(
            height=ObjectNaviThorBaseConfig.SCREEN_SIZE,
            width=ObjectNaviThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,
        ),
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            resnet_type="RN18",
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n, **kwargs
        )

    def tag(self):
        return "ObjectNav-iTHOR-Depth-ResNet18GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
    GoalObjectTypeThorSensor,
    RGBSensorThor,
)
from projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import (
    ObjectNaviThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ResNetPreprocessGRUActorCriticMixin,
    ObjectNavPPOMixin,
)


class ObjectNaviThorRGBPPOExperimentConfig(ObjectNaviThorBaseConfig):
    """An Object Navigation experiment configuration in iThor with RGB
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNaviThorBaseConfig.SCREEN_SIZE,
            width=ObjectNaviThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            resnet_type="RN18",
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n, **kwargs
        )

    @classmethod
    def tag(cls):
        return "ObjectNav-iTHOR-RGB-ResNet18GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
    RGBSensorThor,
    GoalObjectTypeThorSensor,
)
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import (
    ObjectNaviThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ResNetPreprocessGRUActorCriticMixin,
    ObjectNavPPOMixin,
)


class ObjectNaviThorRGBDPPOExperimentConfig(ObjectNaviThorBaseConfig):
    """An Object Navigation experiment configuration in iTHOR with RGBD
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNaviThorBaseConfig.SCREEN_SIZE,
            width=ObjectNaviThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        DepthSensorThor(
            height=ObjectNaviThorBaseConfig.SCREEN_SIZE,
            width=ObjectNaviThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            resnet_type="RN18",
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n, **kwargs
        )

    def tag(self):
        return "ObjectNav-iTHOR-RGBD-ResNet18GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/experiments/objectnav_base.py
================================================
from abc import ABC
from typing import Optional, Sequence, Union

from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import Sensor
from allenact.utils.experiment_utils import Builder


class ObjectNavBaseConfig(ExperimentConfig, ABC):
    """The base object navigation configuration file."""

    STEP_SIZE = 0.25
    ROTATION_DEGREES = 30.0
    VISIBILITY_DISTANCE = 1.0
    STOCHASTIC = True
    HORIZONTAL_FIELD_OF_VIEW = 79

    CAMERA_WIDTH = 400
    CAMERA_HEIGHT = 300
    SCREEN_SIZE = 224
    MAX_STEPS = 500

    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    SENSORS: Sequence[Sensor] = []

    def __init__(self):
        self.REWARD_CONFIG = {
            "step_penalty": -0.01,
            "goal_success_reward": 10.0,
            "failed_stop_reward": 0.0,
            "shaping_weight": 1.0,
        }

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return tuple()


================================================
FILE: projects/objectnav_baselines/experiments/objectnav_thor_base.py
================================================
import glob
import os
import platform
from abc import ABC
from math import ceil
from typing import Dict, Any, List, Optional, Sequence, Tuple, cast

import ai2thor
import ai2thor.build
import gym
import numpy as np
import torch
from packaging import version

from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_util import (
    horizontal_to_vertical_fov,
    get_open_x_displays,
)
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
    ObjectNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask
from projects.objectnav_baselines.experiments.objectnav_base import ObjectNavBaseConfig

if (
    ai2thor.__version__ not in ["0.0.1", None]
    and not ai2thor.__version__.startswith("0+")
    and version.parse(ai2thor.__version__) < version.parse("3.2.0")
):
    raise ImportError(
        "To run the AI2-THOR ObjectNav baseline experiments you must use"
        " ai2thor version 3.2.0 or higher."
    )

import ai2thor.platform


class ObjectNavThorBaseConfig(ObjectNavBaseConfig, ABC):
    """The base config for all AI2-THOR ObjectNav experiments."""

    DEFAULT_NUM_TRAIN_PROCESSES: Optional[int] = None
    DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))
    DEFAULT_VALID_GPU_IDS = (torch.cuda.device_count() - 1,)
    DEFAULT_TEST_GPU_IDS = (torch.cuda.device_count() - 1,)

    TRAIN_DATASET_DIR: Optional[str] = None
    VAL_DATASET_DIR: Optional[str] = None
    TEST_DATASET_DIR: Optional[str] = None

    AGENT_MODE = "default"

    TARGET_TYPES: Optional[Sequence[str]] = None

    THOR_COMMIT_ID: Optional[str] = None
    DEFAULT_THOR_IS_HEADLESS: bool = False

    ACTION_SPACE = gym.spaces.Discrete(len(ObjectNavTask.class_action_names()))

    def __init__(
        self,
        num_train_processes: Optional[int] = None,
        num_test_processes: Optional[int] = None,
        test_on_validation: bool = False,
        train_gpu_ids: Optional[Sequence[int]] = None,
        val_gpu_ids: Optional[Sequence[int]] = None,
        test_gpu_ids: Optional[Sequence[int]] = None,
        randomize_train_materials: bool = False,
        headless: bool = False,
    ):
        super().__init__()

        def v_or_default(v, default):
            return v if v is not None else default

        self.num_train_processes = v_or_default(
            num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES
        )
        self.num_test_processes = v_or_default(
            num_test_processes, (10 if torch.cuda.is_available() else 1)
        )
        self.test_on_validation = test_on_validation
        self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS)
        self.val_gpu_ids = v_or_default(val_gpu_ids, self.DEFAULT_VALID_GPU_IDS)
        self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)

        self.headless = v_or_default(headless, self.DEFAULT_THOR_IS_HEADLESS)

        self.sampler_devices = self.train_gpu_ids
        self.randomize_train_materials = randomize_train_materials

    def env_args(self):
        assert self.THOR_COMMIT_ID is not None

        return dict(
            width=self.CAMERA_WIDTH,
            height=self.CAMERA_HEIGHT,
            commit_id=(
                self.THOR_COMMIT_ID if not self.headless else ai2thor.build.COMMIT_ID
            ),
            stochastic=True,
            continuousMode=True,
            applyActionNoise=self.STOCHASTIC,
            rotateStepDegrees=self.ROTATION_DEGREES,
            visibilityDistance=self.VISIBILITY_DISTANCE,
            gridSize=self.STEP_SIZE,
            snapToGrid=False,
            agentMode=self.AGENT_MODE,
            fieldOfView=horizontal_to_vertical_fov(
                horizontal_fov_in_degrees=self.HORIZONTAL_FIELD_OF_VIEW,
                width=self.CAMERA_WIDTH,
                height=self.CAMERA_HEIGHT,
            ),
            include_private_scenes=False,
            renderDepthImage=any(isinstance(s, DepthSensorThor) for s in self.SENSORS),
        )

    def machine_params(self, mode="train", **kwargs):
        sampler_devices: Sequence[torch.device] = []
        devices: Sequence[torch.device]
        if mode == "train":
            workers_per_device = 1
            devices = (
                [torch.device("cpu")]
                if not torch.cuda.is_available()
                else cast(Tuple, self.train_gpu_ids) * workers_per_device
            )
            nprocesses = evenly_distribute_count_into_bins(
                self.num_train_processes, max(len(devices), 1)
            )
            sampler_devices = self.sampler_devices
        elif mode == "valid":
            nprocesses = 1
            devices = (
                [torch.device("cpu")]
                if not torch.cuda.is_available()
                else self.val_gpu_ids
            )
        elif mode == "test":
            devices = (
                [torch.device("cpu")]
                if not torch.cuda.is_available()
                else self.test_gpu_ids
            )
            nprocesses = evenly_distribute_count_into_bins(
                self.num_test_processes, max(len(devices), 1)
            )
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        sensors = [*self.SENSORS]
        if mode != "train":
            sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)]

        sensor_preprocessor_graph = (
            SensorPreprocessorGraph(
                source_observation_spaces=SensorSuite(sensors).observation_spaces,
                preprocessors=self.preprocessors(),
            )
            if mode == "train"
            or (
                (isinstance(nprocesses, int) and nprocesses > 0)
                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
            )
            else None
        )

        return MachineParams(
            nprocesses=nprocesses,
            devices=devices,
            sampler_devices=(
                sampler_devices if mode == "train" else devices
            ),  # ignored with > 1 gpu_ids
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return ObjectNavDatasetTaskSampler(**kwargs)

    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
            np.int32
        )

    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]],
        seeds: Optional[List[int]],
        deterministic_cudnn: bool,
        include_expert_sensor: bool = True,
        allow_oversample: bool = False,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
        if len(scenes) == 0:
            raise RuntimeError(
                (
                    "Could find no scene dataset information in directory {}."
                    " Are you sure you've downloaded them? "
                    " If not, see https://allenact.org/installation/download-datasets/ information"
                    " on how this can be done."
                ).format(scenes_dir)
            )

        oversample_warning = (
            f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})."
            " You can avoid this by setting a number of workers divisible by the number of scenes"
        )
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if not allow_oversample:
                raise RuntimeError(
                    f"Cannot have `total_processes > len(scenes)`"
                    f" ({total_processes} > {len(scenes)}) when `allow_oversample` is `False`."
                )

            if total_processes % len(scenes) != 0:
                get_logger().warning(oversample_warning)
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        elif len(scenes) % total_processes != 0:
            get_logger().warning(oversample_warning)

        inds = self._partition_inds(len(scenes), total_processes)

        if not self.headless:
            x_display: Optional[str] = None
            if platform.system() == "Linux":
                x_displays = get_open_x_displays(throw_error_if_empty=True)

                if len([d for d in devices if d != torch.device("cpu")]) > len(
                    x_displays
                ):
                    get_logger().warning(
                        f"More GPU devices found than X-displays (devices: `{x_displays}`, x_displays: `{x_displays}`)."
                        f" This is not necessarily a bad thing but may mean that you're not using GPU memory as"
                        f" efficiently as possible. Consider following the instructions here:"
                        f" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin"
                        f" describing how to start an X-display on every GPU."
                    )
                x_display = x_displays[process_ind % len(x_displays)]

            device_dict = dict(x_display=x_display)
        else:
            device_dict = dict(
                gpu_device=devices[process_ind % len(devices)],
                platform=ai2thor.platform.CloudRendering,
            )

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "object_types": self.TARGET_TYPES,
            "max_steps": self.MAX_STEPS,
            "sensors": [
                s
                for s in self.SENSORS
                if (include_expert_sensor or not isinstance(s, ExpertActionSensor))
            ],
            "action_space": self.ACTION_SPACE,
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
            "rewards_config": self.REWARD_CONFIG,
            "env_args": {**self.env_args(), **device_dict},
        }

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            scenes_dir=os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
            process_ind=process_ind,
            total_processes=total_processes,
            devices=devices,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
            allow_oversample=True,
        )
        res["scene_directory"] = self.TRAIN_DATASET_DIR
        res["loop_dataset"] = True
        res["allow_flipping"] = True
        res["randomize_materials_in_training"] = self.randomize_train_materials
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            scenes_dir=os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind=process_ind,
            total_processes=total_processes,
            devices=devices,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
            include_expert_sensor=False,
            allow_oversample=False,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:

        if self.test_on_validation or self.TEST_DATASET_DIR is None:
            if not self.test_on_validation:
                get_logger().warning(
                    "`test_on_validation` is set to `True` and thus we will run evaluation on the validation set instead."
                    " Be careful as the saved metrics json and tensorboard files **will still be labeled as"
                    " 'test' rather than 'valid'**."
                )
            else:
                get_logger().warning(
                    "No test dataset dir detected, running test on validation set instead."
                    " Be careful as the saved metrics json and tensorboard files *will still be labeled as"
                    " 'test' rather than 'valid'**."
                )

            return self.valid_task_sampler_args(
                process_ind=process_ind,
                total_processes=total_processes,
                devices=devices,
                seeds=seeds,
                deterministic_cudnn=deterministic_cudnn,
            )

        else:
            res = self._get_sampler_args_for_scene_split(
                scenes_dir=os.path.join(self.TEST_DATASET_DIR, "episodes"),
                process_ind=process_ind,
                total_processes=total_processes,
                devices=devices,
                seeds=seeds,
                deterministic_cudnn=deterministic_cudnn,
                include_expert_sensor=False,
                allow_oversample=False,
            )
            res["env_args"]["all_metadata_available"] = False
            res["rewards_config"] = {**res["rewards_config"], "shaping_weight": 0}
            res["scene_directory"] = self.TEST_DATASET_DIR
            res["loop_dataset"] = False
            return res


================================================
FILE: projects/objectnav_baselines/experiments/robothor/__init__.py
================================================


================================================
FILE: projects/objectnav_baselines/experiments/robothor/beta/README.md
================================================
# Beta experiments

This folder contains "beta" experiments, e.g. training experiments meant to be used
to test new features. These experiments may have bugs or not train well.

================================================
FILE: projects/objectnav_baselines/experiments/robothor/beta/__init__.py
================================================


================================================
FILE: projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.py
================================================
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.grouped_action_imitation import (
    GroupedActionImitation,
)
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
    Builder,
    PipelineStage,
    TrainingPipeline,
    LinearDecay,
)
from allenact_plugins.ithor_plugin.ithor_sensors import (
    RGBSensorThor,
    GoalObjectTypeThorSensor,
)
from allenact_plugins.ithor_plugin.ithor_sensors import TakeEndActionThorNavSensor
from allenact_plugins.robothor_plugin import robothor_constants
from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import ResNetPreprocessGRUActorCriticMixin


class ObjectNavRoboThorResNet18GRURGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
    """An Object Navigation experiment configuration in RoboThor with RGB
    input."""

    SENSORS = (  # type:ignore
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
        TakeEndActionThorNavSensor(
            nactions=len(ObjectNavTask.class_action_names()), uuid="expert_group_action"
        ),
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            resnet_type="RN18",
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )

    def preprocessors(self):
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs):
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n, **kwargs
        )

    def training_pipeline(self, **kwargs):
        ppo_steps = int(300000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        action_strs = ObjectNavTask.class_action_names()
        non_end_action_inds_set = {
            i for i, a in enumerate(action_strs) if a != robothor_constants.END
        }
        end_action_ind_set = {action_strs.index(robothor_constants.END)}

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(**PPOConfig),
                "grouped_action_imitation": GroupedActionImitation(
                    nactions=len(ObjectNavTask.class_action_names()),
                    action_groups=[non_end_action_inds_set, end_action_ind_set],
                ),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss", "grouped_action_imitation"],
                    max_stage_steps=ppo_steps,
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )

    def tag(self):
        return "ObjectNav-RoboTHOR-RGB-ResNet18GRU-DDPPOAndGBC"


================================================
FILE: projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py
================================================
from typing import Union, Optional, Any

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage

# noinspection PyUnresolvedReferences
from allenact.base_abstractions.sensor import Sensor
from allenact.base_abstractions.task import Task
from allenact.embodiedai.storage.vdr_storage import (
    DiscreteVisualDynamicsReplayStorage,
    InverseDynamicsVDRLoss,
)
from allenact.utils.experiment_utils import Builder, TrainingSettings
from allenact.utils.experiment_utils import (
    PipelineStage,
    LinearDecay,
    StageComponent,
)
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
from allenact_plugins.ithor_plugin.ithor_sensors import (
    RGBSensorThor,
    GoalObjectTypeThorSensor,
)
from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ObjectNavUnfrozenResNetWithGRUActorCriticMixin,
    update_with_auxiliary_losses,
)


def compute_inv_dyn_action_logits(
    model,
    img0,
    img1,
):
    rgb_uuid = model.visual_encoder.rgb_uuid
    img0_enc = model.visual_encoder({rgb_uuid: img0.unsqueeze(0)}).squeeze(0)
    img1_enc = model.visual_encoder({rgb_uuid: img1.unsqueeze(0)}).squeeze(0)
    return model.inv_dyn_mlp(torch.cat((img0_enc, img1_enc), dim=1))


class LastActionSuccessSensor(
    Sensor[
        Union[IThorEnvironment, RoboThorEnvironment],
        Union[Task[IThorEnvironment], Task[RoboThorEnvironment]],
    ]
):
    def __init__(self, uuid: str = "last_action_success", **kwargs: Any):
        super().__init__(
            uuid=uuid, observation_space=gym.spaces.MultiBinary(1), **kwargs
        )

    def get_observation(
        self,
        env: Union[IThorEnvironment, RoboThorEnvironment],
        task: Optional[Task],
        *args: Any,
        **kwargs: Any
    ) -> Any:
        return 1 * task.last_action_success


class VisibleObjectTypesSensor(
    Sensor[
        Union[IThorEnvironment, RoboThorEnvironment],
        Union[Task[IThorEnvironment], Task[RoboThorEnvironment]],
    ]
):
    def __init__(self, uuid: str = "visible_objects", **kwargs: Any):
        super().__init__(
            uuid=uuid,
            observation_space=gym.spaces.Box(
                low=0, high=1, shape=(len(ObjectNavRoboThorBaseConfig.TARGET_TYPES),)
            ),
            **kwargs
        )
        self.type_to_index = {
            tt: i for i, tt in enumerate(ObjectNavRoboThorBaseConfig.TARGET_TYPES)
        }

    def get_observation(
        self,
        env: Union[IThorEnvironment, RoboThorEnvironment],
        task: Optional[Task],
        *args: Any,
        **kwargs: Any
    ) -> Any:
        out = np.zeros((len(self.type_to_index),))
        for o in env.controller.last_event.metadata["objects"]:
            if o["visible"] and o["objectType"] in self.type_to_index:
                out[self.type_to_index[o["objectType"]]] = 1.0
        return out


class ObjectNavRoboThorVdrTmpRGBExperimentConfig(ObjectNavRoboThorBaseConfig):
    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
        LastActionSuccessSensor(),
        VisibleObjectTypesSensor(),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.model_creation_handler = ObjectNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="gnresnet18",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs):
        # PPO
        ppo_steps = int(300000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000 if torch.cuda.is_available() else 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        auxiliary_uuids = tuple()
        multiple_beliefs = False

        named_losses = {"ppo_loss": (PPO(**PPOConfig), 1.0)}
        named_losses = update_with_auxiliary_losses(
            named_losses=named_losses,
            auxiliary_uuids=auxiliary_uuids,
            multiple_beliefs=multiple_beliefs,
        )

        default_ts = TrainingSettings(
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
        )

        named_losses = {
            **named_losses,
            "inv_dyn_vdr": (
                InverseDynamicsVDRLoss(
                    compute_action_logits_fn=compute_inv_dyn_action_logits,
                    img0_key="img0",
                    img1_key="img1",
                    action_key="action",
                ),
                1.0,
            ),
        }

        sorted_loss_names = list(sorted(named_losses.keys()))
        return TrainingPipeline(
            training_settings=default_ts,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            named_losses={k: v[0] for k, v in named_losses.items()},
            named_storages={
                "onpolicy": RolloutBlockStorage(init_size=num_steps),
                "discrete_vdr": DiscreteVisualDynamicsReplayStorage(
                    image_uuid="rgb_lowres",
                    action_success_uuid="last_action_success",
                    extra_targets=["visible_objects"],
                    nactions=6,
                    num_to_store_per_action=200 if torch.cuda.is_available() else 10,
                    max_to_save_per_episode=6,
                    target_batch_size=256 if torch.cuda.is_available() else 128,
                ),
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=sorted_loss_names,
                    max_stage_steps=ppo_steps,
                    loss_weights=[
                        named_losses[loss_name][1] for loss_name in sorted_loss_names
                    ],
                    stage_components=[
                        StageComponent(
                            uuid="onpolicy",
                            storage_uuid="onpolicy",
                            loss_names=[
                                ln for ln in sorted_loss_names if ln != "inv_dyn_vdr"
                            ],
                        ),
                        StageComponent(
                            uuid="vdr",
                            storage_uuid="discrete_vdr",
                            loss_names=["inv_dyn_vdr"],
                            training_settings=TrainingSettings(
                                num_mini_batch=1,
                                update_repeats=1,
                            ),
                        ),
                    ],
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )

    def create_model(self, **kwargs) -> nn.Module:
        model = self.model_creation_handler.create_model(**kwargs)
        model.inv_dyn_mlp = nn.Sequential(
            nn.Linear(1024, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 6),
        )
        return model

    def tag(self):
        return "Objectnav-RoboTHOR-RGB-UnfrozenResNet18GRU-VDR"


================================================
FILE: projects/objectnav_baselines/experiments/robothor/clip/__init__.py
================================================


================================================
FILE: projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.ithor_plugin.ithor_sensors import (
    GoalObjectTypeThorSensor,
    RGBSensorThor,
)
from projects.objectnav_baselines.experiments.clip.mixins import (
    ClipResNetPreprocessGRUActorCriticMixin,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import ObjectNavPPOMixin


class ObjectNavRoboThorClipRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
    """An Object Navigation experiment configuration in RoboThor with RGB
    input."""

    CLIP_MODEL_TYPE = "RN50"

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
    ]

    def __init__(self, add_prev_actions: bool = False, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            clip_model_type=self.CLIP_MODEL_TYPE,
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )
        self.add_prev_actions = add_prev_actions

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n,
            add_prev_actions=self.add_prev_actions,
            **kwargs
        )

    @classmethod
    def tag(cls):
        return "ObjectNav-RoboTHOR-RGB-ClipResNet50GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.ithor_plugin.ithor_sensors import (
    GoalObjectTypeThorSensor,
    RGBSensorThor,
)
from projects.objectnav_baselines.experiments.clip.mixins import (
    ClipResNetPreprocessGRUActorCriticMixin,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import ObjectNavPPOMixin


class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
    """An Object Navigation experiment configuration in RoboThor with RGB
    input."""

    CLIP_MODEL_TYPE = "RN50x16"

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
    ]

    def __init__(self, add_prev_actions: bool = False, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            clip_model_type=self.CLIP_MODEL_TYPE,
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )
        self.add_prev_actions = add_prev_actions

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n,
            add_prev_actions=self.add_prev_actions,
            **kwargs
        )

    @classmethod
    def tag(cls):
        return "ObjectNav-RoboTHOR-RGB-ClipResNet50x16GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py
================================================
import os
from abc import ABC
from typing import Optional, List, Any, Dict

import torch

from allenact.utils.misc_utils import prepare_locals_for_super
from projects.objectnav_baselines.experiments.objectnav_thor_base import (
    ObjectNavThorBaseConfig,
)


class ObjectNavRoboThorBaseConfig(ObjectNavThorBaseConfig, ABC):
    """The base config for all RoboTHOR ObjectNav experiments."""

    THOR_COMMIT_ID = "bad5bc2b250615cb766ffb45d455c211329af17e"
    THOR_COMMIT_ID_FOR_RAND_MATERIALS = "9549791ce2e7f472063a10abb1fb7664159fec23"

    AGENT_MODE = "locobot"

    DEFAULT_NUM_TRAIN_PROCESSES = 60 if torch.cuda.is_available() else 1

    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/train")
    VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/val")
    TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/test")

    TARGET_TYPES = tuple(
        sorted(
            [
                "AlarmClock",
                "Apple",
                "BaseballBat",
                "BasketBall",
                "Bowl",
                "GarbageCan",
                "HousePlant",
                "Laptop",
                "Mug",
                "SprayBottle",
                "Television",
                "Vase",
            ]
        )
    )

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        kwargs = super(ObjectNavRoboThorBaseConfig, self).train_task_sampler_args(
            **prepare_locals_for_super(locals())
        )
        if self.randomize_train_materials:
            kwargs["env_args"]["commit_id"] = self.THOR_COMMIT_ID_FOR_RAND_MATERIALS
        return kwargs


================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_depth_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ResNetPreprocessGRUActorCriticMixin,
    ObjectNavPPOMixin,
)


class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
    """An Object Navigation experiment configuration in RoboThor with Depth
    input."""

    SENSORS = (
        DepthSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            resnet_type="RN18",
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n, **kwargs
        )

    def tag(self):
        return "ObjectNav-RoboTHOR-Depth-ResNet18GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_dagger.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import ExpertActionSensor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
    GoalObjectTypeThorSensor,
    RGBSensorThor,
)
from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ResNetPreprocessGRUActorCriticMixin,
    ObjectNavDAggerMixin,
)


class ObjectNavRoboThorRGBDAggerExperimentConfig(ObjectNavRoboThorBaseConfig):
    """An Object Navigation experiment configuration in RoboThor with RGB
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
        ExpertActionSensor(
            nactions=len(ObjectNavTask.class_action_names()),
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.REWARD_CONFIG["shaping"] = 0

        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            resnet_type="RN18",
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavDAggerMixin.training_pipeline(
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n, **kwargs
        )

    @classmethod
    def tag(cls):
        return "ObjectNav-RoboTHOR-RGB-ResNet18GRU-DAgger"


================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
    GoalObjectTypeThorSensor,
    RGBSensorThor,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ResNetPreprocessGRUActorCriticMixin,
    ObjectNavPPOMixin,
)


class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
    """An Object Navigation experiment configuration in RoboThor with RGB
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            resnet_type="RN18",
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n, **kwargs
        )

    @classmethod
    def tag(cls):
        return "ObjectNav-RoboTHOR-RGB-ResNet18GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet50gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
    GoalObjectTypeThorSensor,
    RGBSensorThor,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ResNetPreprocessGRUActorCriticMixin,
    ObjectNavPPOMixin,
)


class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
    """An Object Navigation experiment configuration in RoboThor with RGB
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            resnet_type="RN50",
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n, **kwargs
        )

    def tag(self):
        return "ObjectNav-RoboTHOR-RGB-ResNet50GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet18gru_ddppo.py
================================================
import torch.nn as nn

from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
    RGBSensorThor,
    GoalObjectTypeThorSensor,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ObjectNavUnfrozenResNetWithGRUActorCriticMixin,
    ObjectNavPPOMixin,
)


class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
    """An Object Navigation experiment configuration in RoboThor with RGB input
    without preprocessing by frozen ResNet (instead, a trainable ResNet)."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.model_creation_handler = ObjectNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="gnresnet18",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs) -> nn.Module:
        return self.model_creation_handler.create_model(**kwargs)

    def tag(self):
        return "ObjectNav-RoboTHOR-RGB-UnfrozenResNet18GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet18gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import (
    RGBSensorThor,
    GoalObjectTypeThorSensor,
)
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (
    ObjectNavRoboThorBaseConfig,
)
from projects.objectnav_baselines.mixins import (
    ResNetPreprocessGRUActorCriticMixin,
    ObjectNavPPOMixin,
)


class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
    """An Object Navigation experiment configuration in RoboThor with RGBD
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        DepthSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            resnet_type="RN18",
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=GoalObjectTypeThorSensor,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return ObjectNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n, **kwargs
        )

    def tag(self):
        return "ObjectNav-RoboTHOR-RGBD-ResNet18GRU-DDPPO"


================================================
FILE: projects/objectnav_baselines/mixins.py
================================================
from typing import Sequence, Union, Optional, Dict, Tuple, Type

import attr
import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.base_abstractions.sensor import Sensor
from allenact.embodiedai.aux_losses.losses import (
    InverseDynamicsLoss,
    TemporalDistanceLoss,
    CPCA1Loss,
    CPCA2Loss,
    CPCA4Loss,
    CPCA8Loss,
    CPCA16Loss,
    MultiAuxTaskNegEntropyLoss,
    CPCA1SoftMaxLoss,
    CPCA2SoftMaxLoss,
    CPCA4SoftMaxLoss,
    CPCA8SoftMaxLoss,
    CPCA16SoftMaxLoss,
)
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import (
    Builder,
    TrainingPipeline,
    PipelineStage,
    LinearDecay,
)
from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
from allenact_plugins.navigation_plugin.objectnav.models import (
    ResnetTensorNavActorCritic,
    ObjectNavActorCritic,
)
from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask


@attr.s(kw_only=True)
class ResNetPreprocessGRUActorCriticMixin:
    sensors: Sequence[Sensor] = attr.ib()
    resnet_type: str = attr.ib()
    screen_size: int = attr.ib()
    goal_sensor_type: Type[Sensor] = attr.ib()

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        preprocessors = []

        if self.resnet_type in ["RN18", "RN34"]:
            output_shape = (512, 7, 7)
        elif self.resnet_type in ["RN50", "RN101", "RN152"]:
            output_shape = (2048, 7, 7)
        else:
            raise NotImplementedError(
                f"`RESNET_TYPE` must be one 'RNx' with x equaling one of"
                f" 18, 34, 50, 101, or 152."
            )

        rgb_sensor = next((s for s in self.sensors if isinstance(s, RGBSensor)), None)
        if rgb_sensor is not None:
            preprocessors.append(
                ResNetPreprocessor(
                    input_height=self.screen_size,
                    input_width=self.screen_size,
                    output_width=output_shape[2],
                    output_height=output_shape[1],
                    output_dims=output_shape[0],
                    pool=False,
                    torchvision_resnet_model=getattr(
                        models, f"resnet{self.resnet_type.replace('RN', '')}"
                    ),
                    input_uuids=[rgb_sensor.uuid],
                    output_uuid="rgb_resnet_imagenet",
                )
            )

        depth_sensor = next(
            (s for s in self.sensors if isinstance(s, DepthSensor)), None
        )
        if depth_sensor is not None:
            preprocessors.append(
                ResNetPreprocessor(
                    input_height=self.screen_size,
                    input_width=self.screen_size,
                    output_width=output_shape[2],
                    output_height=output_shape[1],
                    output_dims=output_shape[0],
                    pool=False,
                    torchvision_resnet_model=getattr(
                        models, f"resnet{self.resnet_type.replace('RN', '')}"
                    ),
                    input_uuids=[depth_sensor.uuid],
                    output_uuid="depth_resnet_imagenet",
                )
            )

        return preprocessors

    def create_model(self, **kwargs) -> nn.Module:
        has_rgb = any(isinstance(s, RGBSensor) for s in self.sensors)
        has_depth = any(isinstance(s, DepthSensor) for s in self.sensors)
        goal_sensor_uuid = next(
            (s.uuid for s in self.sensors if isinstance(s, self.goal_sensor_type)),
            None,
        )

        return ResnetTensorNavActorCritic(
            action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            goal_sensor_uuid=goal_sensor_uuid,
            rgb_resnet_preprocessor_uuid="rgb_resnet_imagenet" if has_rgb else None,
            depth_resnet_preprocessor_uuid=(
                "depth_resnet_imagenet" if has_depth else None
            ),
            hidden_size=512,
            goal_dims=32,
        )


@attr.s(kw_only=True)
class ObjectNavUnfrozenResNetWithGRUActorCriticMixin:
    backbone: str = attr.ib()
    sensors: Sequence[Sensor] = attr.ib()
    auxiliary_uuids: Sequence[str] = attr.ib()
    add_prev_actions: bool = attr.ib()
    multiple_beliefs: bool = attr.ib()
    belief_fusion: Optional[str] = attr.ib()

    def create_model(self, **kwargs) -> nn.Module:
        rgb_uuid = next(
            (s.uuid for s in self.sensors if isinstance(s, RGBSensor)), None
        )
        depth_uuid = next(
            (s.uuid for s in self.sensors if isinstance(s, DepthSensor)), None
        )
        goal_sensor_uuid = next(
            (s.uuid for s in self.sensors if isinstance(s, GoalObjectTypeThorSensor))
        )

        return ObjectNavActorCritic(
            action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            rgb_uuid=rgb_uuid,
            depth_uuid=depth_uuid,
            goal_sensor_uuid=goal_sensor_uuid,
            hidden_size=(
                192 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512
            ),
            backbone=self.backbone,
            resnet_baseplanes=32,
            object_type_embedding_dim=32,
            num_rnn_layers=1,
            rnn_type="GRU",
            add_prev_actions=self.add_prev_actions,
            action_embed_size=6,
            auxiliary_uuids=self.auxiliary_uuids,
            multiple_beliefs=self.multiple_beliefs,
            beliefs_fusion=self.belief_fusion,
        )


class ObjectNavDAggerMixin:
    @staticmethod
    def training_pipeline(
        advance_scene_rollout_period: Optional[int] = None,
    ) -> TrainingPipeline:
        training_steps = int(300000000)
        tf_steps = int(5e6)
        anneal_steps = int(5e6)
        il_no_tf_steps = training_steps - tf_steps - anneal_steps
        assert il_no_tf_steps > 0

        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000 if torch.cuda.is_available() else 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "imitation_loss": Imitation(),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=advance_scene_rollout_period,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    max_stage_steps=tf_steps,
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=1.0,
                        steps=tf_steps,
                    ),
                ),
                PipelineStage(
                    loss_names=["imitation_loss"],
                    max_stage_steps=anneal_steps + il_no_tf_steps,
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=anneal_steps,
                    ),
                ),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR,
                {"lr_lambda": LinearDecay(steps=training_steps)},
            ),
        )


def update_with_auxiliary_losses(
    named_losses: Dict[str, Tuple[AbstractActorCriticLoss, float]],
    auxiliary_uuids: Sequence[str],
    multiple_beliefs: bool,
) -> Dict[str, Tuple[AbstractActorCriticLoss, float]]:
    # auxliary losses
    aux_loss_total_weight = 2.0

    # Total losses
    total_aux_losses: Dict[str, Tuple[AbstractActorCriticLoss, float]] = {
        InverseDynamicsLoss.UUID: (
            InverseDynamicsLoss(
                subsample_rate=0.2,
                subsample_min_num=10,  # TODO: test its effects
            ),
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        TemporalDistanceLoss.UUID: (
            TemporalDistanceLoss(
                num_pairs=8,
                epsiode_len_min=5,  # TODO: test its effects
            ),
            0.2 * aux_loss_total_weight,  # should times 2
        ),
        CPCA1Loss.UUID: (
            CPCA1Loss(
                subsample_rate=0.2,
            ),  # TODO: test its effects
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        CPCA2Loss.UUID: (
            CPCA2Loss(
                subsample_rate=0.2,
            ),  # TODO: test its effects
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        CPCA4Loss.UUID: (
            CPCA4Loss(
                subsample_rate=0.2,
            ),  # TODO: test its effects
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        CPCA8Loss.UUID: (
            CPCA8Loss(
                subsample_rate=0.2,
            ),  # TODO: test its effects
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        CPCA16Loss.UUID: (
            CPCA16Loss(
                subsample_rate=0.2,
            ),  # TODO: test its effects
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        CPCA1SoftMaxLoss.UUID: (
            CPCA1SoftMaxLoss(
                subsample_rate=1.0,
            ),
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        CPCA2SoftMaxLoss.UUID: (
            CPCA2SoftMaxLoss(
                subsample_rate=1.0,
            ),
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        CPCA4SoftMaxLoss.UUID: (
            CPCA4SoftMaxLoss(
                subsample_rate=1.0,
            ),
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        CPCA8SoftMaxLoss.UUID: (
            CPCA8SoftMaxLoss(
                subsample_rate=1.0,
            ),
            0.05 * aux_loss_total_weight,  # should times 2
        ),
        CPCA16SoftMaxLoss.UUID: (
            CPCA16SoftMaxLoss(
                subsample_rate=1.0,
            ),
            0.05 * aux_loss_total_weight,  # should times 2
        ),
    }
    named_losses.update({uuid: total_aux_losses[uuid] for uuid in auxiliary_uuids})

    if multiple_beliefs:  # add weight entropy loss automatically
        named_losses[MultiAuxTaskNegEntropyLoss.UUID] = (
            MultiAuxTaskNegEntropyLoss(auxiliary_uuids),
            0.01,
        )

    return named_losses


class ObjectNavPPOMixin:
    @staticmethod
    def training_pipeline(
        auxiliary_uuids: Sequence[str],
        multiple_beliefs: bool,
        normalize_advantage: bool = True,
        advance_scene_rollout_period: Optional[int] = None,
        lr=3e-4,
        num_mini_batch=1,
        update_repeats=4,
        num_steps=128,
        save_interval=5000000,
        log_interval=10000 if torch.cuda.is_available() else 1,
        gamma=0.99,
        use_gae=True,
        gae_lambda=0.95,
        max_grad_norm=0.5,
        anneal_lr: bool = True,
        extra_losses: Optional[Dict[str, Tuple[AbstractActorCriticLoss, float]]] = None,
    ) -> TrainingPipeline:
        ppo_steps = int(300000000)

        named_losses = {
            "ppo_loss": (
                PPO(**PPOConfig, normalize_advantage=normalize_advantage),
                1.0,
            ),
            **({} if extra_losses is None else extra_losses),
        }
        named_losses = update_with_auxiliary_losses(
            named_losses=named_losses,
            auxiliary_uuids=auxiliary_uuids,
            multiple_beliefs=multiple_beliefs,
        )

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={key: val[0] for key, val in named_losses.items()},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=advance_scene_rollout_period,
            pipeline_stages=[
                PipelineStage(
                    loss_names=list(named_losses.keys()),
                    max_stage_steps=ppo_steps,
                    loss_weights=[val[1] for val in named_losses.values()],
                )
            ],
            lr_scheduler_builder=(
                Builder(LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)})
                if anneal_lr
                else None
            ),
        )


================================================
FILE: projects/pointnav_baselines/README.md
================================================
# Baseline models for the Point Navigation task in the Habitat, RoboTHOR and iTHOR environments

This project contains the code for training baseline models on the PointNav task. In this setting the agent
spawns at a location in an environment and is tasked to move to another location. The agent is given a "compass"
that tells it the distance and bearing to the target position at every frame. Once the agent is confident that
it has reached the end it executes the `END` action which terminates the episode. If the agent is within a set
distance to the target (in our case 0.2 meters) the agent succeeded, else it failed.

Provided are experiment configs for training a simple convolutional model with
an GRU using `RGB`, `Depth` or `RGBD` as inputs in [Habitat](https://github.com/facebookresearch/habitat-sim), 
[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).

The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)
Reinforcement Learning Algorithm.

To train an experiment run the following command from the `allenact` root directory:

```bash
python main.py -o <PATH_TO_OUTPUT> -c -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> <EXPERIMENT_NAME>
```

Where `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights
and logs to be stored, `<BASE_DIRECTORY_OF_YOUR_EXPERIMENT>` is the directory where our
experiment file is located and `<EXPERIMENT_NAME>` is the name of the python module containing
the experiment. An example usage of this command would be:

```bash
python main.py -o storage/pointnav-robothor-depth -b projects/pointnav_baselines/experiments/robothor/ pointnav_robothor_depth_simpleconvgru_ddppo
```

This trains a simple convolutional neural network with a GRU using Depth input on the
PointNav task in the RoboTHOR environment and stores the model weights and logs
to `storage/pointnav-robothor-rgb`.


================================================
FILE: projects/pointnav_baselines/__init__.py
================================================


================================================
FILE: projects/pointnav_baselines/experiments/__init__.py
================================================


================================================
FILE: projects/pointnav_baselines/experiments/habitat/__init__.py
================================================


================================================
FILE: projects/pointnav_baselines/experiments/habitat/clip/__init__.py
================================================


================================================
FILE: projects/pointnav_baselines/experiments/habitat/clip/pointnav_habitat_rgb_clipresnet50gru_ddppo.py
================================================
from typing import Sequence, Union

import torch.nn as nn

from allenact.base_abstractions.preprocessor import Preprocessor
from allenact.utils.experiment_utils import Builder, TrainingPipeline
from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
from allenact_plugins.habitat_plugin.habitat_sensors import (
    RGBSensorHabitat,
    TargetCoordinatesSensorHabitat,
)
from projects.objectnav_baselines.experiments.clip.mixins import (
    ClipResNetPreprocessGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (
    PointNavHabitatBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin


class PointNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig(PointNavHabitatBaseConfig):
    """An Point Navigation experiment configuration in Habitat with Depth
    input."""

    CLIP_MODEL_TYPE = "RN50"

    SENSORS = [
        RGBSensorHabitat(
            height=PointNavHabitatBaseConfig.SCREEN_SIZE,
            width=PointNavHabitatBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
        ),
        TargetCoordinatesSensorHabitat(coordinate_dims=2),
    ]

    def __init__(self, add_prev_actions: bool = False, **kwargs):
        super().__init__(**kwargs)

        self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
            sensors=self.SENSORS,
            clip_model_type=self.CLIP_MODEL_TYPE,
            screen_size=self.SCREEN_SIZE,
            goal_sensor_type=TargetCoordinatesSensorHabitat,
        )
        self.add_prev_actions = add_prev_actions

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=False,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return self.preprocessing_and_model.preprocessors()

    def create_model(self, **kwargs) -> nn.Module:
        return self.preprocessing_and_model.create_model(
            num_actions=self.ACTION_SPACE.n,
            add_prev_actions=self.add_prev_actions,
            **kwargs,
        )

    @classmethod
    def tag(cls):
        return "PointNav-Habitat-RGB-ClipResNet50GRU-DDPPO"


================================================
FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_base.py
================================================
import os
from abc import ABC
from typing import Dict, Any, List, Optional, Sequence, Union

import gym
import torch

# noinspection PyUnresolvedReferences
import habitat
from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import (
    SensorPreprocessorGraph,
    Preprocessor,
)
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins, Builder
from allenact.utils.system import get_logger
from allenact_plugins.habitat_plugin.habitat_constants import (
    HABITAT_DATASETS_DIR,
    HABITAT_CONFIGS_DIR,
    HABITAT_SCENE_DATASETS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler
from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask
from allenact_plugins.habitat_plugin.habitat_utils import (
    get_habitat_config,
    construct_env_configs,
)
from projects.pointnav_baselines.experiments.pointnav_base import PointNavBaseConfig


def create_pointnav_config(
    config_yaml_path: str,
    mode: str,
    scenes_path: str,
    simulator_gpu_ids: Sequence[int],
    distance_to_goal: float,
    rotation_degrees: float,
    step_size: float,
    max_steps: int,
    num_processes: int,
    camera_width: int,
    camera_height: int,
    using_rgb: bool,
    using_depth: bool,
    training: bool,
    num_episode_sample: int,
) -> habitat.Config:
    config = get_habitat_config(config_yaml_path)

    config.defrost()
    config.NUM_PROCESSES = num_processes
    config.SIMULATOR_GPU_IDS = simulator_gpu_ids
    config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR

    config.DATASET.DATA_PATH = scenes_path

    config.SIMULATOR.AGENT_0.SENSORS = []
    if using_rgb:
        config.SIMULATOR.AGENT_0.SENSORS.append("RGB_SENSOR")
    if using_depth:
        config.SIMULATOR.AGENT_0.SENSORS.append("DEPTH_SENSOR")

    config.SIMULATOR.RGB_SENSOR.WIDTH = camera_width
    config.SIMULATOR.RGB_SENSOR.HEIGHT = camera_height
    config.SIMULATOR.DEPTH_SENSOR.WIDTH = camera_width
    config.SIMULATOR.DEPTH_SENSOR.HEIGHT = camera_height
    config.SIMULATOR.TURN_ANGLE = rotation_degrees
    config.SIMULATOR.FORWARD_STEP_SIZE = step_size
    config.ENVIRONMENT.MAX_EPISODE_STEPS = max_steps

    config.TASK.TYPE = "Nav-v0"
    config.TASK.SUCCESS_DISTANCE = distance_to_goal
    config.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
    config.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR"
    config.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2
    config.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass"
    config.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"]
    config.TASK.SPL.TYPE = "SPL"
    config.TASK.SPL.SUCCESS_DISTANCE = distance_to_goal
    config.TASK.SUCCESS.SUCCESS_DISTANCE = distance_to_goal

    if not training:
        config.SEED = 0
        config.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False

    if num_episode_sample > 0:
        config.ENVIRONMENT.ITERATOR_OPTIONS.NUM_EPISODE_SAMPLE = num_episode_sample

    config.MODE = mode

    config.freeze()

    return config


class PointNavHabitatBaseConfig(PointNavBaseConfig, ABC):
    """The base config for all Habitat PointNav experiments."""

    # selected auxiliary uuids
    ## if comment all the keys, then it's vanilla DD-PPO
    AUXILIARY_UUIDS = [
        # InverseDynamicsLoss.UUID,
        # TemporalDistanceLoss.UUID,
        # CPCA1Loss.UUID,
        # CPCA4Loss.UUID,
        # CPCA8Loss.UUID,
        # CPCA16Loss.UUID,
    ]
    ADD_PREV_ACTIONS = False
    MULTIPLE_BELIEFS = False
    BELIEF_FUSION = (  # choose one
        None
        # AttentiveFusion
        # AverageFusion
        # SoftmaxFusion
    )

    FAILED_END_REWARD = -1.0

    TASK_DATA_DIR_TEMPLATE = os.path.join(
        HABITAT_DATASETS_DIR, "pointnav/gibson/v1/{}/{}.json.gz"
    )
    BASE_CONFIG_YAML_PATH = os.path.join(
        HABITAT_CONFIGS_DIR, "tasks/pointnav_gibson.yaml"
    )

    ACTION_SPACE = gym.spaces.Discrete(len(PointNavTask.class_action_names()))

    DEFAULT_NUM_TRAIN_PROCESSES = (
        5 * torch.cuda.device_count() if torch.cuda.is_available() else 1
    )
    DEFAULT_NUM_TEST_PROCESSES = 10

    DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))
    DEFAULT_VALID_GPU_IDS = [torch.cuda.device_count() - 1]
    DEFAULT_TEST_GPU_IDS = [torch.cuda.device_count() - 1]

    def __init__(
        self,
        debug: bool = False,
        num_train_processes: Optional[int] = None,
        num_test_processes: Optional[int] = None,
        test_on_validation: bool = False,
        run_valid: bool = True,
        train_gpu_ids: Optional[Sequence[int]] = None,
        val_gpu_ids: Optional[Sequence[int]] = None,
        test_gpu_ids: Optional[Sequence[int]] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)

        def v_or_default(v, default):
            return v if v is not None else default

        self.num_train_processes = v_or_default(
            num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES
        )
        self.num_test_processes = v_or_default(
            num_test_processes, (10 if torch.cuda.is_available() else 1)
        )
        self.test_on_validation = test_on_validation
        self.run_valid = run_valid
        self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS)
        self.val_gpu_ids = v_or_default(
            val_gpu_ids, self.DEFAULT_VALID_GPU_IDS if run_valid else []
        )
        self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)

        def create_config(
            mode: str,
            scenes_path: str,
            num_processes: int,
            simulator_gpu_ids: Sequence[int],
            training: bool = True,
            num_episode_sample: int = -1,
        ):
            return create_pointnav_config(
                config_yaml_path=self.BASE_CONFIG_YAML_PATH,
                mode=mode,
                scenes_path=scenes_path,
                simulator_gpu_ids=simulator_gpu_ids,
                distance_to_goal=self.DISTANCE_TO_GOAL,
                rotation_degrees=self.ROTATION_DEGREES,
                step_size=self.STEP_SIZE,
                max_steps=self.MAX_STEPS,
                num_processes=num_processes,
                camera_width=self.CAMERA_WIDTH,
                camera_height=self.CAMERA_HEIGHT,
                using_rgb=any(isinstance(s, RGBSensor) for s in self.SENSORS),
                using_depth=any(isinstance(s, DepthSensor) for s in self.SENSORS),
                training=training,
                num_episode_sample=num_episode_sample,
            )

        self.TRAIN_CONFIG = create_config(
            mode="train",
            scenes_path=self.train_scenes_path(),
            num_processes=self.num_train_processes,
            simulator_gpu_ids=self.train_gpu_ids,
            training=True,
        )
        self.VALID_CONFIG = create_config(
            mode="validate",
            scenes_path=self.valid_scenes_path(),
            num_processes=1,
            simulator_gpu_ids=self.val_gpu_ids,
            training=False,
            num_episode_sample=200,
        )
        self.TEST_CONFIG = create_config(
            mode="validate",
            scenes_path=self.test_scenes_path(),
            num_processes=self.num_test_processes,
            simulator_gpu_ids=self.test_gpu_ids,
            training=False,
        )

        self.TRAIN_CONFIGS_PER_PROCESS = construct_env_configs(
            self.TRAIN_CONFIG, allow_scene_repeat=True
        )

        if debug:
            get_logger().warning("IN DEBUG MODE, WILL ONLY USE `Adrian` SCENE!!!")
            for config in self.TRAIN_CONFIGS_PER_PROCESS:
                config.defrost()
                config.DATASET.CONTENT_SCENES = ["Adrian"]
                config.freeze()

        self.TEST_CONFIG_PER_PROCESS = construct_env_configs(
            self.TEST_CONFIG, allow_scene_repeat=False
        )

    def train_scenes_path(self):
        return self.TASK_DATA_DIR_TEMPLATE.format(*(["train"] * 2))

    def valid_scenes_path(self):
        return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2))

    def test_scenes_path(self):
        get_logger().warning("Running tests on the validation set!")
        return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2))
        # return self.TASK_DATA_DIR_TEMPLATE.format(*(["test"] * 2))

    @classmethod
    def tag(cls):
        return "PointNav"

    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
        return tuple()

    def machine_params(self, mode="train", **kwargs):
        has_gpus = torch.cuda.is_available()
        if not has_gpus:
            gpu_ids = []
            nprocesses = 1
        elif mode == "train":
            gpu_ids = self.train_gpu_ids
            nprocesses = self.num_train_processes
        elif mode == "valid":
            gpu_ids = self.val_gpu_ids
            nprocesses = 1 if self.run_valid else 0
        elif mode == "test":
            gpu_ids = self.test_gpu_ids
            nprocesses = self.num_test_processes
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        if has_gpus:
            nprocesses = evenly_distribute_count_into_bins(nprocesses, len(gpu_ids))

        sensor_preprocessor_graph = (
            SensorPreprocessorGraph(
                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
                preprocessors=self.preprocessors(),
            )
            if mode == "train"
            or (
                (isinstance(nprocesses, int) and nprocesses > 0)
                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
            )
            else None
        )

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return PointNavTaskSampler(
            **{"failed_end_reward": cls.FAILED_END_REWARD, **kwargs}  # type: ignore
        )

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind]
        return {
            "env_config": config,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": self.ACTION_SPACE,
            "distance_to_goal": self.DISTANCE_TO_GOAL,
        }

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        if total_processes != 1:
            raise NotImplementedError(
                "In validation, `total_processes` must equal 1 for habitat tasks"
            )
        return {
            "env_config": self.VALID_CONFIG,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "distance_to_goal": self.DISTANCE_TO_GOAL,
        }

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        config = self.TEST_CONFIG_PER_PROCESS[process_ind]
        return {
            "env_config": config,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "distance_to_goal": self.DISTANCE_TO_GOAL,
        }


================================================
FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.habitat_plugin.habitat_sensors import (
    DepthSensorHabitat,
    TargetCoordinatesSensorHabitat,
)
from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (
    PointNavHabitatBaseConfig,
)
from projects.pointnav_baselines.mixins import (
    PointNavPPOMixin,
    PointNavUnfrozenResNetWithGRUActorCriticMixin,
)


class PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig(
    PointNavHabitatBaseConfig,
):
    """An Point Navigation experiment configuration in Habitat with Depth
    input."""

    SENSORS = [
        DepthSensorHabitat(
            height=PointNavHabitatBaseConfig.SCREEN_SIZE,
            width=PointNavHabitatBaseConfig.SCREEN_SIZE,
            use_normalization=True,
        ),
        TargetCoordinatesSensorHabitat(coordinate_dims=2),
    ]

    def __init__(self):
        super().__init__()

        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="simple_cnn",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=True,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs):
        return self.model_creation_handler.create_model(**kwargs)

    def tag(self):
        return "PointNav-Habitat-Depth-SimpleConv-DDPPO"


================================================
FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.habitat_plugin.habitat_sensors import RGBSensorHabitat
from allenact_plugins.habitat_plugin.habitat_sensors import (
    TargetCoordinatesSensorHabitat,
)
from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (
    PointNavHabitatBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
from projects.pointnav_baselines.mixins import (
    PointNavUnfrozenResNetWithGRUActorCriticMixin,
)


class PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig(
    PointNavHabitatBaseConfig
):
    """An Point Navigation experiment configuration in Habitat with Depth
    input."""

    SENSORS = [
        RGBSensorHabitat(
            height=PointNavHabitatBaseConfig.SCREEN_SIZE,
            width=PointNavHabitatBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
        ),
        TargetCoordinatesSensorHabitat(coordinate_dims=2),
    ]

    def __init__(self):
        super().__init__()

        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="simple_cnn",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=True,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs):
        return self.model_creation_handler.create_model(**kwargs)

    @classmethod
    def tag(cls):
        return "PointNav-Habitat-RGB-SimpleConv-DDPPO"


================================================
FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.habitat_plugin.habitat_sensors import DepthSensorHabitat
from allenact_plugins.habitat_plugin.habitat_sensors import RGBSensorHabitat
from allenact_plugins.habitat_plugin.habitat_sensors import (
    TargetCoordinatesSensorHabitat,
)
from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (
    PointNavHabitatBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin
from projects.pointnav_baselines.mixins import (
    PointNavUnfrozenResNetWithGRUActorCriticMixin,
)


class PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig(
    PointNavHabitatBaseConfig
):
    """An Point Navigation experiment configuration in Habitat with RGBD
    input."""

    SENSORS = [
        RGBSensorHabitat(
            height=PointNavHabitatBaseConfig.SCREEN_SIZE,
            width=PointNavHabitatBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
        ),
        DepthSensorHabitat(
            height=PointNavHabitatBaseConfig.SCREEN_SIZE,
            width=PointNavHabitatBaseConfig.SCREEN_SIZE,
            use_normalization=True,
        ),
        TargetCoordinatesSensorHabitat(coordinate_dims=2),
    ]

    def __init__(self):
        super().__init__()

        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="simple_cnn",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=True,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs):
        return self.model_creation_handler.create_model(**kwargs)

    def tag(self):
        return "PointNav-Habitat-RGBD-SimpleConv-DDPPO"


================================================
FILE: projects/pointnav_baselines/experiments/ithor/__init__.py
================================================


================================================
FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_base.py
================================================
import os
from abc import ABC

from projects.pointnav_baselines.experiments.pointnav_thor_base import (
    PointNavThorBaseConfig,
)


class PointNaviThorBaseConfig(PointNavThorBaseConfig, ABC):
    """The base config for all iTHOR PointNav experiments."""

    NUM_PROCESSES = 40

    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-pointnav/train")
    VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-pointnav/val")


================================================
FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.robothor_plugin.robothor_sensors import (
    DepthSensorThor,
    GPSCompassSensorRoboThor,
)
from projects.pointnav_baselines.mixins import (
    PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import (
    PointNaviThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin


class PointNaviThorDepthPPOExperimentConfig(PointNaviThorBaseConfig):
    """An Point Navigation experiment configuration in iThor with Depth
    input."""

    SENSORS = [
        DepthSensorThor(
            height=PointNaviThorBaseConfig.SCREEN_SIZE,
            width=PointNaviThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    def __init__(self):
        super().__init__()

        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="simple_cnn",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=True,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs):
        return self.model_creation_handler.create_model(**kwargs)

    def tag(self):
        return "PointNav-iTHOR-Depth-SimpleConv-DDPPO"


================================================
FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from projects.pointnav_baselines.mixins import (
    PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import (
    PointNaviThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin


class PointNaviThorRGBPPOExperimentConfig(PointNaviThorBaseConfig):
    """An Point Navigation experiment configuration in iThor with RGB input."""

    SENSORS = [
        RGBSensorThor(
            height=PointNaviThorBaseConfig.SCREEN_SIZE,
            width=PointNaviThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    def __init__(self):
        super().__init__()

        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="simple_cnn",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=True,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs):
        return self.model_creation_handler.create_model(**kwargs)

    def tag(self):
        return "PointNav-iTHOR-RGB-SimpleConv-DDPPO"


================================================
FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from projects.pointnav_baselines.mixins import (
    PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import (
    PointNaviThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin


class PointNaviThorRGBDPPOExperimentConfig(PointNaviThorBaseConfig):
    """An Point Navigation experiment configuration in iThor with RGBD
    input."""

    SENSORS = [
        RGBSensorThor(
            height=PointNaviThorBaseConfig.SCREEN_SIZE,
            width=PointNaviThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        DepthSensorThor(
            height=PointNaviThorBaseConfig.SCREEN_SIZE,
            width=PointNaviThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    def __init__(self):
        super().__init__()

        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="simple_cnn",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=True,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs):
        return self.model_creation_handler.create_model(**kwargs)

    def tag(self):
        return "PointNav-iTHOR-RGBD-SimpleConv-DDPPO"


================================================
FILE: projects/pointnav_baselines/experiments/pointnav_base.py
================================================
from abc import ABC
from typing import Optional, Sequence

from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import Sensor


class PointNavBaseConfig(ExperimentConfig, ABC):
    """An Object Navigation experiment configuration in iThor."""

    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    SENSORS: Optional[Sequence[Sensor]] = None

    STEP_SIZE = 0.25
    ROTATION_DEGREES = 30.0
    DISTANCE_TO_GOAL = 0.2
    STOCHASTIC = True

    CAMERA_WIDTH = 400
    CAMERA_HEIGHT = 300
    SCREEN_SIZE = 224
    MAX_STEPS = 500

    def __init__(self):
        self.REWARD_CONFIG = {
            "step_penalty": -0.01,
            "goal_success_reward": 10.0,
            "failed_stop_reward": 0.0,
            "reached_max_steps_reward": 0.0,
            "shaping_weight": 1.0,
        }


================================================
FILE: projects/pointnav_baselines/experiments/pointnav_thor_base.py
================================================
import glob
import os
import platform
from abc import ABC
from math import ceil
from typing import Dict, Any, List, Optional, Sequence

import ai2thor
import gym
import numpy as np
import torch
from packaging import version

from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import evenly_distribute_count_into_bins
from allenact.utils.system import get_logger
from allenact_plugins.ithor_plugin.ithor_util import get_open_x_displays
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
    PointNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
from projects.pointnav_baselines.experiments.pointnav_base import PointNavBaseConfig

if ai2thor.__version__ not in ["0.0.1", None] and version.parse(
    ai2thor.__version__
) < version.parse("2.7.2"):
    raise ImportError(
        "To run the PointNav baseline experiments you must use"
        " ai2thor version 2.7.1 or higher."
    )


class PointNavThorBaseConfig(PointNavBaseConfig, ABC):
    """The base config for all iTHOR PointNav experiments."""

    NUM_PROCESSES: Optional[int] = None
    TRAIN_GPU_IDS = list(range(torch.cuda.device_count()))
    VALID_GPU_IDS = [torch.cuda.device_count() - 1]
    TEST_GPU_IDS = [torch.cuda.device_count() - 1]

    TRAIN_DATASET_DIR: Optional[str] = None
    VAL_DATASET_DIR: Optional[str] = None

    TARGET_TYPES: Optional[Sequence[str]] = None

    ACTION_SPACE = gym.spaces.Discrete(len(PointNavTask.class_action_names()))

    def __init__(self):
        super().__init__()
        self.ENV_ARGS = dict(
            width=self.CAMERA_WIDTH,
            height=self.CAMERA_HEIGHT,
            continuousMode=True,
            applyActionNoise=self.STOCHASTIC,
            rotateStepDegrees=self.ROTATION_DEGREES,
            gridSize=self.STEP_SIZE,
            snapToGrid=False,
            agentMode="bot",
            include_private_scenes=False,
            renderDepthImage=any(isinstance(s, DepthSensorThor) for s in self.SENSORS),
        )

    def preprocessors(self):
        return tuple()

    def machine_params(self, mode="train", **kwargs):
        sampler_devices: Sequence[int] = []
        if mode == "train":
            workers_per_device = 1
            gpu_ids = (
                []
                if not torch.cuda.is_available()
                else self.TRAIN_GPU_IDS * workers_per_device
            )
            nprocesses = (
                1
                if not torch.cuda.is_available()
                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
            )
            sampler_devices = self.TRAIN_GPU_IDS
        elif mode == "valid":
            nprocesses = 1 if torch.cuda.is_available() else 0
            gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS
        elif mode == "test":
            nprocesses = 10
            gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        sensor_preprocessor_graph = (
            SensorPreprocessorGraph(
                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
                preprocessors=self.preprocessors(),
            )
            if mode == "train"
            or (
                (isinstance(nprocesses, int) and nprocesses > 0)
                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
            )
            else None
        )

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sampler_devices=(
                sampler_devices if mode == "train" else gpu_ids
            ),  # ignored with > 1 gpu_ids
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return PointNavDatasetTaskSampler(**kwargs)

    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
            np.int32
        )

    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]],
        seeds: Optional[List[int]],
        deterministic_cudnn: bool,
        include_expert_sensor: bool = True,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
        if len(scenes) == 0:
            raise RuntimeError(
                (
                    "Could find no scene dataset information in directory {}."
                    " Are you sure you've downloaded them? "
                    " If not, see https://allenact.org/installation/download-datasets/ information"
                    " on how this can be done."
                ).format(scenes_dir)
            )

        oversample_warning = (
            f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})."
            " You can avoid this by setting a number of workers divisible by the number of scenes"
        )
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                get_logger().warning(oversample_warning)
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        elif len(scenes) % total_processes != 0:
            get_logger().warning(oversample_warning)

        inds = self._partition_inds(len(scenes), total_processes)

        x_display: Optional[str] = None
        if platform.system() == "Linux":
            x_displays = get_open_x_displays(throw_error_if_empty=True)

            if len([d for d in devices if d != torch.device("cpu")]) > len(x_displays):
                get_logger().warning(
                    f"More GPU devices found than X-displays (devices: `{x_displays}`, x_displays: `{x_displays}`)."
                    f" This is not necessarily a bad thing but may mean that you're not using GPU memory as"
                    f" efficiently as possible. Consider following the instructions here:"
                    f" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin"
                    f" describing how to start an X-display on every GPU."
                )
            x_display = x_displays[process_ind % len(x_displays)]

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "object_types": self.TARGET_TYPES,
            "max_steps": self.MAX_STEPS,
            "sensors": [
                s
                for s in self.SENSORS
                if (include_expert_sensor or not isinstance(s, ExpertActionSensor))
            ],
            "action_space": self.ACTION_SPACE,
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
            "rewards_config": self.REWARD_CONFIG,
            "env_args": {
                **self.ENV_ARGS,
                "x_display": x_display,
            },
        }

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            devices=devices,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.TRAIN_DATASET_DIR
        res["loop_dataset"] = True
        res["allow_flipping"] = True
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            devices=devices,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
            include_expert_sensor=False,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self.valid_task_sampler_args(
            process_ind=process_ind,
            total_processes=total_processes,
            devices=devices,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )


================================================
FILE: projects/pointnav_baselines/experiments/robothor/__init__.py
================================================


================================================
FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_base.py
================================================
import os
from abc import ABC

from projects.pointnav_baselines.experiments.pointnav_thor_base import (
    PointNavThorBaseConfig,
)


class PointNavRoboThorBaseConfig(PointNavThorBaseConfig, ABC):
    """The base config for all iTHOR PointNav experiments."""

    NUM_PROCESSES = 60

    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/train")
    VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/val")


================================================
FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.robothor_plugin.robothor_sensors import (
    DepthSensorThor,
    GPSCompassSensorRoboThor,
)


from projects.pointnav_baselines.mixins import (
    PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import (
    PointNavRoboThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin


class PointNavRoboThorRGBPPOExperimentConfig(
    PointNavRoboThorBaseConfig,
):
    """An Point Navigation experiment configuration in RoboTHOR with Depth
    input."""

    SENSORS = [
        DepthSensorThor(
            height=PointNavRoboThorBaseConfig.SCREEN_SIZE,
            width=PointNavRoboThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    def __init__(self):
        super().__init__()

        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="simple_cnn",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=True,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs):
        return self.model_creation_handler.create_model(**kwargs)

    def tag(self):
        return "PointNav-RoboTHOR-Depth-SimpleConv-DDPPO"


================================================
FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from projects.pointnav_baselines.mixins import (
    PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import (
    PointNavRoboThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin


class PointNavRoboThorRGBPPOExperimentConfig(
    PointNavRoboThorBaseConfig,
):
    """An Point Navigation experiment configuration in RoboThor with RGB
    input."""

    SENSORS = [
        RGBSensorThor(
            height=PointNavRoboThorBaseConfig.SCREEN_SIZE,
            width=PointNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    def __init__(self):
        super().__init__()

        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="simple_cnn",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=True,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs):
        return self.model_creation_handler.create_model(**kwargs)

    def tag(self):
        return "PointNav-RoboTHOR-RGB-SimpleConv-DDPPO"


================================================
FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py
================================================
from allenact.utils.experiment_utils import TrainingPipeline

from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from projects.pointnav_baselines.mixins import (
    PointNavUnfrozenResNetWithGRUActorCriticMixin,
)
from projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import (
    PointNavRoboThorBaseConfig,
)
from projects.pointnav_baselines.mixins import PointNavPPOMixin


class PointNavRoboThorRGBPPOExperimentConfig(
    PointNavRoboThorBaseConfig,
):
    """An Point Navigation experiment configuration in RoboThor with RGBD
    input."""

    SENSORS = [
        RGBSensorThor(
            height=PointNavRoboThorBaseConfig.SCREEN_SIZE,
            width=PointNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        DepthSensorThor(
            height=PointNavRoboThorBaseConfig.SCREEN_SIZE,
            width=PointNavRoboThorBaseConfig.SCREEN_SIZE,
            use_normalization=True,
            uuid="depth_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    def __init__(self):
        super().__init__()

        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
            backbone="simple_cnn",
            sensors=self.SENSORS,
            auxiliary_uuids=[],
            add_prev_actions=True,
            multiple_beliefs=False,
            belief_fusion=None,
        )

    def training_pipeline(self, **kwargs) -> TrainingPipeline:
        return PointNavPPOMixin.training_pipeline(
            auxiliary_uuids=[],
            multiple_beliefs=False,
            normalize_advantage=True,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
        )

    def create_model(self, **kwargs):
        return self.model_creation_handler.create_model(**kwargs)

    def tag(self):
        return "PointNav-RoboTHOR-RGBD-SimpleConv-DDPPO"


================================================
FILE: projects/pointnav_baselines/mixins.py
================================================
from typing import Optional
from typing import Sequence

import attr
import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.sensor import Sensor
from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
from allenact.utils.experiment_utils import (
    Builder,
    TrainingPipeline,
    PipelineStage,
    LinearDecay,
)
from projects.objectnav_baselines.mixins import update_with_auxiliary_losses

# fmt: off
try:
    # Habitat may not be installed, just create a fake class here in that case
    from allenact_plugins.habitat_plugin.habitat_sensors import TargetCoordinatesSensorHabitat
except ImportError:
    class TargetCoordinatesSensorHabitat:  #type:ignore
        pass
# fmt: on

from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask
from allenact_plugins.navigation_plugin.pointnav.models import PointNavActorCritic


@attr.s(kw_only=True)
class PointNavUnfrozenResNetWithGRUActorCriticMixin:
    backbone: str = attr.ib()
    sensors: Sequence[Sensor] = attr.ib()
    auxiliary_uuids: Sequence[str] = attr.ib()
    add_prev_actions: bool = attr.ib()
    multiple_beliefs: bool = attr.ib()
    belief_fusion: Optional[str] = attr.ib()

    def create_model(self, **kwargs) -> nn.Module:
        rgb_uuid = next(
            (s.uuid for s in self.sensors if isinstance(s, RGBSensor)), None
        )
        depth_uuid = next(
            (s.uuid for s in self.sensors if isinstance(s, DepthSensor)), None
        )
        goal_sensor_uuid = next(
            (
                s.uuid
                for s in self.sensors
                if isinstance(
                    s, (GPSCompassSensorRoboThor, TargetCoordinatesSensorHabitat)
                )
            )
        )

        return PointNavActorCritic(
            # Env and Tak
            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            rgb_uuid=rgb_uuid,
            depth_uuid=depth_uuid,
            goal_sensor_uuid=goal_sensor_uuid,
            # RNN
            hidden_size=(
                228 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512
            ),
            num_rnn_layers=1,
            rnn_type="GRU",
            add_prev_actions=self.add_prev_actions,
            action_embed_size=4,
            # CNN
            backbone=self.backbone,
            resnet_baseplanes=32,
            embed_coordinates=False,
            coordinate_dims=2,
            # Aux
            auxiliary_uuids=self.auxiliary_uuids,
            multiple_beliefs=self.multiple_beliefs,
            beliefs_fusion=self.belief_fusion,
        )


class PointNavPPOMixin:
    @staticmethod
    def training_pipeline(
        auxiliary_uuids: Sequence[str],
        multiple_beliefs: bool,
        normalize_advantage: bool,
        advance_scene_rollout_period: Optional[int] = None,
    ) -> TrainingPipeline:
        ppo_steps = int(75000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000 if torch.cuda.is_available() else 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        named_losses = {
            "ppo_loss": (PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0)
        }
        named_losses = update_with_auxiliary_losses(
            named_losses=named_losses,
            auxiliary_uuids=auxiliary_uuids,
            multiple_beliefs=multiple_beliefs,
        )

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={key: val[0] for key, val in named_losses.items()},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=advance_scene_rollout_period,
            pipeline_stages=[
                PipelineStage(
                    loss_names=list(named_losses.keys()),
                    max_stage_steps=ppo_steps,
                    loss_weights=[val[1] for val in named_losses.values()],
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )


================================================
FILE: projects/tutorials/__init__.py
================================================


================================================
FILE: projects/tutorials/distributed_objectnav_tutorial.py
================================================
# literate: tutorials/distributed-objectnav-tutorial.md
# %%
"""# Tutorial: Distributed training across multiple nodes."""

# %%
"""
**Note** The provided commands to execute in this tutorial assume include a configuration script to
[clone the full library](../installation/installation-allenact.md#full-library). Setting up headless THOR might
require superuser privileges. We also assume [NCCL](https://developer.nvidia.com/nccl) is available for communication
across computation nodes and all nodes have a running `ssh` server. 

The below introduced experimental tools and commands for distributed training assume a Linux OS (tested on Ubuntu
18.04).

In this tutorial, we:

1. Introduce the available API for training across multiple nodes, as well as experimental scripts for distributed
 configuration, training start and termination, and remote command execution.
1. Introduce the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenAct`. Note that, in contrast with
previous tutorials using AI2-THOR, this time we don't require an xserver (in Linux) to be active.
1. Show a training example for RoboTHOR ObjectNav on a cluster, with each node having sufficient GPUs and GPU memory to
host 60 experience samplers collecting rollout data.

Thanks to the massive parallelization of experience collection and model training enabled by
[DD-PPO](https://arxiv.org/abs/1911.00357), we can greatly speed up training by scaling across multiple nodes:

![training speedup](../img/multinode_training.jpg)

## The task: ObjectNav

In ObjectNav, the goal for the agent is to navigate to an object (possibly unseen during training) of a known given
class and signal task completion when it determines it has reached the goal.


## Implementation

For this tutorial, we'll use the readily available `objectnav_baselines` project, which includes configurations for
a wide variety of object navigation experiments for both iTHOR and RoboTHOR. Since those configuration files are
defined for a single-node setup, we will mainly focus on the changes required in the `machine_params` and
`training_pipeline` methods.

Note that, in order to use the headless version of AI2-THOR, we currently need to install a specific THOR commit,
different from the default one in `robothor_plugin`. Note that this command is included in the configuration script
below, so **we don't need to run this**:

```bash
pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48
```

The experiment config starts as follows:
"""

# %%
import math
from typing import Optional, Sequence

import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
    Builder,
    LinearDecay,
    MultiLinearDecay,
    TrainingPipeline,
    PipelineStage,
)
from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_rgb_resnet18gru_ddppo import (
    ObjectNavRoboThorRGBPPOExperimentConfig as BaseConfig,
)


class DistributedObjectNavRoboThorRGBPPOExperimentConfig(BaseConfig):
    def tag(self) -> str:
        return "DistributedObjectNavRoboThorRGBPPO"

    # %%
    """We override ObjectNavRoboThorBaseConfig's THOR_COMMIT_ID to match the installed headless one:"""

    # %%
    THOR_COMMIT_ID = "91139c909576f3bf95a187c5b02c6fd455d06b48"

    # %%
    """Also indicate that we're using headless THOR (for `task_sampler_args` methods):"""

    # %%
    THOR_IS_HEADLESS = True

    # %%
    """**Temporary hack** Disable the `commit_id` argument passed to the THOR `Controller`'s `init` method:"""

    # %%
    def env_args(self):
        res = super().env_args()
        res.pop("commit_id", None)
        return res

    # %%
    """
    And, of course, define the number of nodes. This will be used by `machine_params` and `training_pipeline` below.
    We override the existing `ExperimentConfig`'s `init` method to include control on the number of nodes:
    """

    # %%
    def __init__(
        self,
        distributed_nodes: int = 1,
        num_train_processes: Optional[int] = None,
        train_gpu_ids: Optional[Sequence[int]] = None,
        val_gpu_ids: Optional[Sequence[int]] = None,
        test_gpu_ids: Optional[Sequence[int]] = None,
    ):
        super().__init__(
            num_train_processes=num_train_processes,
            train_gpu_ids=train_gpu_ids,
            val_gpu_ids=val_gpu_ids,
            test_gpu_ids=test_gpu_ids,
        )
        self.distributed_nodes = distributed_nodes

    # %%
    """
    ### Machine parameters

    **Note:** We assume that all nodes are identical (same number and model of GPUs and drivers).

    The `machine_params` method will be invoked by `runner.py` with different arguments, e.g. to determine the
    configuration for validation or training.

    When working in distributed settings, `AllenAct` needs to know the total number of trainers across all nodes as well
    as the local number of trainers. This is accomplished through the introduction of a `machine_id` keyword argument,
    which will be used to define the training parameters as follows:
    """

    # %%
    def machine_params(self, mode="train", **kwargs):
        params = super().machine_params(mode, **kwargs)

        if mode == "train":
            params.devices = params.devices * self.distributed_nodes
            params.nprocesses = params.nprocesses * self.distributed_nodes
            params.sampler_devices = params.sampler_devices * self.distributed_nodes

            if "machine_id" in kwargs:
                machine_id = kwargs["machine_id"]
                assert (
                    0 <= machine_id < self.distributed_nodes
                ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]"

                local_worker_ids = list(
                    range(
                        len(self.train_gpu_ids) * machine_id,
                        len(self.train_gpu_ids) * (machine_id + 1),
                    )
                )

                params.set_local_worker_ids(local_worker_ids)

            # Confirm we're setting up train params nicely:
            print(
                f"devices {params.devices}"
                f"\nnprocesses {params.nprocesses}"
                f"\nsampler_devices {params.sampler_devices}"
                f"\nlocal_worker_ids {params.local_worker_ids}"
            )
        elif mode == "valid":
            # Use all GPUs at their maximum capacity for training
            # (you may run validation in a separate machine)
            params.nprocesses = (0,)

        return params

    # %%
    """
    In summary, we need to specify which indices in `devices`, `nprocesses` and `sampler_devices` correspond to the
    local `machine_id` node (whenever a `machine_id` is given as a keyword argument), otherwise we specify the global
    configuration.

    ### Training pipeline

    In preliminary ObjectNav experiments, we observe that small batches are useful during the initial training steps in
    terms of sample efficiency, whereas large batches are preferred during the rest of training.
    
    In order to scale to the larger amount of collected data in multi-node settings, we will proceed with a two-stage
    pipeline:
    
    1. In the first stage, we'll enforce a number of updates per amount of collected data similar to the
    configuration with a single node by enforcing more batches per rollout (for about 30 million steps).
    1. In the second stage we'll switch to a configuration with larger learning rate and batch size to be
    used up to the grand total of 300 million experience steps.
    
    We first define a helper method to generate a learning rate curve with decay for each stage:
    """

    # %%
    @staticmethod
    def lr_scheduler(small_batch_steps, transition_steps, ppo_steps, lr_scaling):
        safe_small_batch_steps = int(small_batch_steps * 1.02)
        large_batch_and_lr_steps = ppo_steps - safe_small_batch_steps - transition_steps

        # Learning rate after small batch steps (assuming decay to 0)
        break1 = 1.0 - safe_small_batch_steps / ppo_steps

        # Initial learning rate for large batch (after transition from initial to large learning rate)
        break2 = lr_scaling * (
            1.0 - (safe_small_batch_steps + transition_steps) / ppo_steps
        )
        return MultiLinearDecay(
            [
                # Base learning rate phase for small batch (with linear decay towards 0)
                LinearDecay(
                    steps=safe_small_batch_steps,
                    startp=1.0,
                    endp=break1,
                ),
                # Allow the optimizer to adapt its statistics to the changes with a larger learning rate
                LinearDecay(
                    steps=transition_steps,
                    startp=break1,
                    endp=break2,
                ),
                # Scaled learning rate phase for large batch (with linear decay towards 0)
                LinearDecay(
                    steps=large_batch_and_lr_steps,
                    startp=break2,
                    endp=0,
                ),
            ]
        )

    # %%
    """
    The training pipeline looks like:
    """

    # %%
    def training_pipeline(self, **kwargs):
        # These params are identical to the baseline configuration for 60 samplers (1 machine)
        ppo_steps = int(300e6)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000 if torch.cuda.is_available() else 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        # We add 30 million steps for small batch learning
        small_batch_steps = int(30e6)
        # And a short transition phase towards large learning rate
        # (see comment in the `lr_scheduler` helper method
        transition_steps = int(2 / 3 * self.distributed_nodes * 1e6)

        # Find exact number of samplers per GPU
        assert (
            self.num_train_processes % len(self.train_gpu_ids) == 0
        ), "Expected uniform number of samplers per GPU"
        samplers_per_gpu = self.num_train_processes // len(self.train_gpu_ids)

        # Multiply num_mini_batch by the largest divisor of
        # samplers_per_gpu to keep all batches of same size:
        num_mini_batch_multiplier = [
            i
            for i in reversed(
                range(1, min(samplers_per_gpu // 2, self.distributed_nodes) + 1)
            )
            if samplers_per_gpu % i == 0
        ][0]

        # Multiply update_repeats so that the product of this factor and
        # num_mini_batch_multiplier is >= self.distributed_nodes:
        update_repeats_multiplier = int(
            math.ceil(self.distributed_nodes / num_mini_batch_multiplier)
        )

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig, show_ratios=False)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                # We increase the number of batches for the first stage to reach an
                # equivalent number of updates per collected rollout data as in the
                # 1 node/60 samplers setting
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=small_batch_steps,
                    num_mini_batch=num_mini_batch * num_mini_batch_multiplier,
                    update_repeats=update_repeats * update_repeats_multiplier,
                ),
                # The we proceed with the base configuration (leading to larger
                # batches due to the increased number of samplers)
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=ppo_steps - small_batch_steps,
                ),
            ],
            # We use the MultiLinearDecay curve defined by the helper function,
            # setting the learning rate scaling as the square root of the number
            # of nodes. Linear scaling might also works, but we leave that
            # check to the reader.
            lr_scheduler_builder=Builder(
                LambdaLR,
                {
                    "lr_lambda": self.lr_scheduler(
                        small_batch_steps=small_batch_steps,
                        transition_steps=transition_steps,
                        ppo_steps=ppo_steps,
                        lr_scaling=math.sqrt(self.distributed_nodes),
                    )
                },
            ),
        )


# %%
"""
## Multi-node configuration

**Note:** In the following, we'll assume you don't have an available setup for distributed execution, such as
[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup and run
distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for
a rather basic usage pattern that might not suit your needs.

If we haven't set up AllenAct with the headless version of Ai2-THOR in our nodes, we can define a configuration script
similar to:

```bash
#!/bin/bash

# Prepare a virtualenv for allenact
sudo apt-get install -y python3-venv
python3 -mvenv ~/allenact_venv
source ~/allenact_venv/bin/activate
pip install -U pip wheel

# Install AllenAct
cd ~
git clone https://github.com/allenai/allenact.git
cd allenact

# Install AllenaAct + RoboTHOR plugin dependencies
pip install -r requirements.txt
pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt

# Download + setup datasets
bash datasets/download_navigation_datasets.sh robothor-objectnav

# Install headless AI2-THOR and required libvulkan1
sudo apt-get install -y libvulkan1
pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48

# Download AI2-THOR binaries
python -c "from ai2thor.controller import Controller; c=Controller(); c.stop()"

echo DONE
```

and save it as `headless_robothor_config.sh`. Note that some of the configuration steps in the script assume you have
superuser privileges.

Then, we can just copy this file to the first node in our cluster and run it with:

```bash
source <PATH/TO/headless_robothor_config.sh>
```

If everything went well, we should be able to

```bash
cd ~/allenact && source ~/allenact_venv/bin/activate
```

Note that we might need to install `libvulkan1` in each node (even if the AllenAct setup is shared across nodes) if it
is not already available.

### Local filesystems

If our cluster does not use a shared filesystem, we'll need to propagate the setup to the rest of nodes. Assuming
we can just `ssh` with the current user to all nodes, we can propagate our config with

```bash
scripts/dconfig.py --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \
 --config_script <PATH/TO/headless_robothor_config.sh>
```

and we can check the state of the installation with the `scripts/dcommand.py` tool:

```bash
scripts/dcommand.py --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \
 --command 'tail -n 5 ~/log_allenact_distributed_config'
```

If everything went fine, all requirements are ready to start running our experiment.

## Run your experiment

**Note:** In this section, we again assume you don't have an available setup for distributed execution, such as
[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup/run
distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for
a rather basic usage pattern that might not suit your needs.

Our experimental extension to AllenAct's `main.py` script allows using practically identical commands to the ones
used in a single-node setup to start our experiments. From the root `allenact` directory, we can simply invoke

```bash
scripts/dmain.py projects/tutorials/distributed_objectnav_tutorial.py \
 --config_kwargs '{"distributed_nodes":3}' \
 --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \
 --env_activate_path ~/allenact_venv/bin/activate \
 --allenact_path ~/allenact \
 --distributed_ip_and_port <FIRST_IP_ADDRESS_IN_RUNS_ON_LIST>:<FREE_PORT_NUMBER_FOR_THIS_IP_ADDRESS>
```

This script will do several things for you, including synchronization of the changes in the `allenact` directory
to all machines, enabling virtual environments in each node, sharing the same random seed for all `main.py` instances,
assigning `--machine_id` parameters required for multi-node training, and redirecting the process output to a log file
under the output results folder.

Note that by changing the value associated with the `distributed_nodes` key in the `config_kwargs` map and the `runs_on`
list of IPs, we can easily scale our training to e.g. 1, 3, or 8 nodes as shown in the chart above. Note that for this
call to work unmodified, you should have sufficient GPUs/GPU memory to host 60 samplers per node.

## Track and stop your experiment

You might have noticed that, when your experiment started with the above command, a file was created under
`~/.allenact`. This file includes IP addresses and screen session IDs for all nodes. It can be used
by the already introduced `scripts/dcommand.py` script, if we omit the `--runs_on` argument, to call a command on each
node via ssh; but most importantly it is used by the `scripts/dkill.py` script to terminate all screen sessions hosting
our training processes.

### Experiment tracking

A simple way to check all machines are training, assuming you have `nvidia-smi` installed in all nodes, is to just call

```bash
scripts/dcommand.py
```

from the root `allenact` directory. If everything is working well, the GPU usage stats from `nvidia-smi` should reflect
ongoing activity. You can also add different commands to be executed by each node. It is of course also possible to run
tensorboard on any of the nodes, if that's your preference.

### Experiment termination

Just call

```bash
scripts/dkill.py
```

After killing all involved screen sessions, you will be asked about whether you also want to delete the "killfile"
stored under the `~/.allenact` directory (which might be your preferred option once all processes are terminated). 

We hope this tutorial will help you start quickly testing new ideas! Even if we've only explored moderates settings of
up to 480 experience samplers, you might want to consider some additional changes (like the
[choice for the optimizer](https://arxiv.org/abs/2103.07013)) if you plan to run at larger scale. 
"""


================================================
FILE: projects/tutorials/gym_mujoco_tutorial.py
================================================
# literate: tutorials/gym-mujoco-tutorial.md
# %%
"""# Tutorial: OpenAI gym MuJoCo environment."""

# %%
"""
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the
`gym_plugin`. The latter can be installed by

```bash
pip install -r allenact_plugins/gym_plugin/extra_requirements.txt
```

The environments for this tutorial use [MuJoCo](http://www.mujoco.org/)(**Mu**lti-**Jo**int dynamics in **Co**ntact) 
physics simulator, which is also required to be installed properly with instructions 
[here](https://github.com/openai/mujoco-py).

## The task

For this tutorial, we'll focus on one of the continuous-control environments under the `mujoco` group of `gym`
environments: [Ant-v2](https://gym.openai.com/envs/Ant-v2/). In this task, the goal
is to make a four-legged creature, "ant", walk forward as fast as possible. A random agent of "Ant-v2" is shown below.

![The Ant-v2 task](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_random.gif).

To achieve the goal, we need to provide continuous control for the agent moving forward with four legs with the 
`x` velocity as high as possible for at most 1000 episodes steps. The agent is failed, or done, if the `z` position 
is out of the range [0.2, 1.0]. The dimension of the action space is 8 and 111 for the dimension of the observation 
space that maps to different body parts, including 3D position `(x,y,z)`, orientation(quaternion `x`,`y`,`z`,`w`) 
of the torso, and the joint angles, 3D velocity `(x,y,z)`, 3D angular velocity `(x,y,z)`, and joint velocities. 
The rewards for the agent "ant" are composed of the forward rewards, healthy rewards, control cost, and contact cost. 

## Implementation

For this tutorial, we'll use the readily available `gym_plugin`, which includes a
[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a
[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and
[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a
[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`
environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).
The experiment config, similar to the one used for the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:
"""

# %%
from typing import Dict, Optional, List, Any, cast

import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.ppo import PPO

from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor

from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)
from allenact.utils.viz_utils import VizSuite, AgentViewViz


class HandManipulateTutorialExperimentConfig(ExperimentConfig):
    @classmethod
    def tag(cls) -> str:
        return "GymMuJoCoTutorial"

    # %%
    """
    ### Sensors and Model

    As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide
    full observations from the state of the `gym` environment to our model.
    """

    # %%
    SENSORS = [
        GymMuJoCoSensor("Ant-v2", uuid="gym_mujoco_data"),
    ]

    # %%
    """
    We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,
    [MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since
    this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`
    instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a
    [Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.
    """

    # %%
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        """We define our `ActorCriticModel` agent using a lightweight
        implementation with separate MLPs for actors and critic,
        MemorylessActorCritic.

        Since this is a model for continuous control, note that the
        superclass of our model is `ActorCriticModel[GaussianDistr]`
        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use
        a Gaussian distribution to sample actions.
        """
        return MemorylessActorCritic(
            input_uuid="gym_mujoco_data",
            action_space=gym.spaces.Box(
                -3.0, 3.0, (8,), "float32"
            ),  # 8 actors, each in the range [-3.0, 3.0]
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    # %%
    """
    ### Task samplers
    We use an available `TaskSampler` implementation for `gym` environments that allows to sample
    [GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):
    [GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task
    sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created
    above, which contain a custom identifier for the actual observation space (`gym_mujoco_data`) also used by the model.
    """

    # %%
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(gym_env_type="Ant-v2", **kwargs)

    # %%
    """
    For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three
    modes, `train, valid, test`:
    """

    # %%
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="train", seeds=seeds
        )

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="valid", seeds=seeds
        )

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)

    # %%
    """
    Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,
    during testing (or validation), we sample a fixed number of tasks.
    """

    # %%
    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 4

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["Ant-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    # %%
    """
    Note that we just sample 4 tasks for validation and testing in this case, which suffice to illustrate the model's
    success.

    ### Machine parameters

    In this tutorial, we just train the model on the CPU. We allocate a larger number of samplers for training (8) than 
    for validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also 
    include a video visualizer (`AgentViewViz`) in test mode.
    """

    # %%
    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        visualizer = None
        if mode == "test":
            visualizer = VizSuite(
                mode=mode,
                video_viz=AgentViewViz(
                    label="episode_vid",
                    max_clip_length=400,
                    vector_task_source=("render", {"mode": "rgb_array"}),
                    fps=30,
                ),
            )
        return {
            "nprocesses": 8 if mode == "train" else 1,  # rollout
            "devices": [],
            "visualizer": visualizer,
        }

    # %%
    """
    ### Training pipeline

    The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate
    and 10 single-batch update repeats per rollout. The reward should exceed 4,000
    in 20M steps in the test. In order to make the "ant" run with an obvious fast speed, we train the agents using PPO
    with 3e7 steps. 
    """

    # %%
    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        lr = 3e-4
        ppo_steps = int(3e7)
        clip_param = 0.2
        value_loss_coef = 0.5
        entropy_coef = 0.0
        num_mini_batch = 4  # optimal 64
        update_repeats = 10
        max_grad_norm = 0.5
        num_steps = 2048
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        advance_scene_rollout_period = None
        save_interval = 200000
        metric_accumulate_interval = 50000
        return TrainingPipeline(
            named_losses=dict(
                ppo_loss=PPO(
                    clip_param=clip_param,
                    value_loss_coef=value_loss_coef,
                    entropy_coef=entropy_coef,
                ),
            ),  # type:ignore
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=advance_scene_rollout_period,
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            lr_scheduler_builder=Builder(
                LambdaLR,
                {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
            ),
        )


# %%
"""
## Training and validation

We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_mujoco_tutorial.py`.
To start training from scratch, we just need to invoke

```bash
PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_mujoco_output -s 0 -e
```

from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.

If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/gym_mujoco_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).

After 30,000,000 steps, the script will terminate. If everything went well, the `valid` success rate should be 1 
and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a 
little below 1,000.

## Testing

The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the 
directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:
```bash
PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \
    -b projects/tutorials \
    -m 1 \
    -o /PATH/TO/gym_mujoco_output \
    -s 0 \
    -e \
    --eval \
    --checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE 
```

If everything went well, the `test` success rate should converge to 1, the `test` success rate should be 1 
and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a 
little below 1,000. The `gif` results can be seen in the image tab of Tensorboard while testing. 
The output should be something like this:

![results](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.png).

And the `gif` results can be seen in the image tab of Tensorboard while testing.

![mp4 demo](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.gif)

If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running
remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display
available:

```bash
DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \
    -b projects/tutorials \
    -m 1 \
    -o /PATH/TO/gym_mujoco_output \
    -s 0 \
    -e \
    --eval \
    --checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE 
```
"""


================================================
FILE: projects/tutorials/gym_tutorial.py
================================================
# literate: tutorials/gym-tutorial.md
# %%
"""# Tutorial: OpenAI gym for continuous control."""

# %%
"""
**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the
`gym_plugin`. The latter can be installed by

```bash
pip install -r allenact_plugins/gym_plugin/extra_requirements.txt
```

In this tutorial, we:

1. Introduce the `gym_plugin`, which enables some of the tasks in [OpenAI's gym](https://gym.openai.com/) for training
and inference within AllenAct.
1. Show an example of continuous control with an arbitrary action space covering 2 policies for one of the `gym` tasks.


## The task

For this tutorial, we'll focus on one of the continuous-control environments under the `Box2D` group of `gym`
environments: [LunarLanderContinuous-v2](https://gym.openai.com/envs/LunarLanderContinuous-v2/). In this task, the goal
is to smoothly land a lunar module in a landing pad, as shown below.

![The LunarLanderContinuous-v2 task](../img/lunar_lander_continuous_demo.png).

To achieve this goal, we need to provide continuous control for a main engine and directional one (2 real values). In
order to solve the task, the expected reward is of at least 200 points. The controls for main and directional engines
are both in the range [-1.0, 1.0] and the observation space is composed of 8 scalars indicating `x` and `y` positions,
`x` and `y` velocities, lander angle and angular velocity, and left and right ground contact. Note that these 8 scalars
provide a full observation of the state.


## Implementation

For this tutorial, we'll use the readily available `gym_plugin`, which includes a
[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a
[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and
[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a
[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`
environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).

The experiment config, similar to the one used for the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:
"""

# %%
from typing import Dict, Optional, List, Any, cast

import gym
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.ppo import PPO
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic
from allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor
from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler
from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)
from allenact.utils.viz_utils import VizSuite, AgentViewViz


class GymTutorialExperimentConfig(ExperimentConfig):
    @classmethod
    def tag(cls) -> str:
        return "GymTutorial"

    # %%
    """
    ### Sensors and Model
    
    As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide
    full observations from the state of the `gym` environment to our model.
    """

    # %%
    SENSORS = [
        GymBox2DSensor("LunarLanderContinuous-v2", uuid="gym_box_data"),
    ]

    # %%
    """
    We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,
    [MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since
    this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`
    instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a
    [Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.
    """

    # %%
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return MemorylessActorCritic(
            input_uuid="gym_box_data",
            action_space=gym.spaces.Box(
                -1.0, 1.0, (2,)
            ),  # 2 actors, each in the range [-1.0, 1.0]
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            action_std=0.5,
        )

    # %%
    """
    ### Task samplers
    We use an available `TaskSampler` implementation for `gym` environments that allows to sample
    [GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):
    [GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task
    sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created
    above, which contain a custom identifier for the actual observation space (`gym_box_data`) also used by the model.
    """

    # %%
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return GymTaskSampler(**kwargs)

    # %%
    """
    For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three
    modes, `train, valid, test`:
    """

    # %%
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="train", seeds=seeds
        )

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(
            process_ind=process_ind, mode="valid", seeds=seeds
        )

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds)

    # %%
    """
    Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,
    during testing (or validation), we sample a fixed number of tasks.
    """

    # %%
    def _get_sampler_args(
        self, process_ind: int, mode: str, seeds: List[int]
    ) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 3

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            gym_env_types=["LunarLanderContinuous-v2"],
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            max_tasks=max_tasks,  # see above
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            seed=seeds[process_ind],
        )

    # %%
    """
    Note that we just sample 3 tasks for validation and testing in this case, which suffice to illustrate the model's
    success.
    
    ### Machine parameters

    Given the simplicity of the task and model, we can just train the model on the CPU. During training, success should
    reach 100% in less than 10 minutes, whereas solving the task (evaluation reward > 200) might take about 20 minutes
    (on a laptop CPU).
    
    We allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to
    CPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode.
    """

    # %%
    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        visualizer = None
        if mode == "test":
            visualizer = VizSuite(
                mode=mode,
                video_viz=AgentViewViz(
                    label="episode_vid",
                    max_clip_length=400,
                    vector_task_source=("render", {"mode": "rgb_array"}),
                    fps=30,
                ),
            )
        return {
            "nprocesses": 8 if mode == "train" else 1,
            "devices": [],
            "visualizer": visualizer,
        }

    # %%
    """
    ### Training pipeline
    
    The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate
    and 80 single-batch update repeats per rollout:
    """

    # %%
    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        ppo_steps = int(1.2e6)
        return TrainingPipeline(
            named_losses=dict(
                ppo_loss=PPO(
                    clip_param=0.2,
                    value_loss_coef=0.5,
                    entropy_coef=0.0,
                ),
            ),  # type:ignore
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-3)),
            num_mini_batch=1,
            update_repeats=80,
            max_grad_norm=100,
            num_steps=2000,
            gamma=0.99,
            use_gae=False,
            gae_lambda=0.95,
            advance_scene_rollout_period=None,
            save_interval=200000,
            metric_accumulate_interval=50000,
            lr_scheduler_builder=Builder(
                LambdaLR,
                {"lr_lambda": LinearDecay(steps=ppo_steps)},  # type:ignore
            ),
        )


# %%
"""
## Training and validation

We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_tutorial.py`.
To start training from scratch, we just need to invoke

```bash
PYTHONPATH=. python allenact/main.py gym_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_output -s 54321 -e
```

from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the
[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.

If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/gym_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).

After 1,200,000 steps, the script will terminate. If everything went well, the `valid` success rate should quickly
converge to 1 and the mean reward to above 250, while the average episode length should stay below or near 300.

## Testing

The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the 
directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:
```bash
PYTHONPATH=. python allenact/main.py gym_tutorial \
    -b projects/tutorials \
    -m 1 \
    -o /PATH/TO/gym_output \
    -s 54321 \
    -e \
    --eval \
    --checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \
    --approx_ckpt_step_interval 800000 # Skip some checkpoints
```

The option `--approx_ckpt_step_interval 800000` tells AllenAct that we only want to evaluate checkpoints
which were saved every ~800000 steps, this lets us avoid evaluating every saved checkpoint. If everything went well, 
the `test` success rate should converge to 1, the episode length below or near 300 steps, and the mean reward to above 
250. The images tab in tensorboard will contain videos for the sampled test episodes.

![video_results](../img/lunar_lander_continuous_test.png).

If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running
remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display
available:

```bash
DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_tutorial \
    -b projects/tutorials \
    -m 1 \
    -o /PATH/TO/gym_output \
    -s 54321 \
    -e \  
    --eval \
    --checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \
    --approx_ckpt_step_interval 800000 
```
"""


================================================
FILE: projects/tutorials/minigrid_offpolicy_tutorial.py
================================================
# literate: tutorials/offpolicy-tutorial.md
# %%
"""# Tutorial: Off-policy training."""

# %%
"""

**Note** The provided commands to execute in this tutorial assume you have
[installed the full library](../installation/installation-allenact.md#full-library) and the `extra_requirements`
for the `babyai_plugin` and `minigrid_plugin`. The latter can be installed with:

```bash
pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt; pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt
```

In this tutorial we'll learn how to train an agent from an external dataset by imitating expert actions via
Behavior Cloning. We'll use a [BabyAI agent](/api/allenact_plugins/babyai_plugin/babyai_models#BabyAIRecurrentACModel) to solve
`GoToLocal` tasks on [MiniGrid](https://github.com/maximecb/gym-minigrid); see the
`projects/babyai_baselines/experiments/go_to_local` directory for more details.

This tutorial assumes `AllenAct`'s [abstractions](../getting_started/abstractions.md) are known.

## The task

In a `GoToLocal` task, the agent immersed in a grid world has to navigate to a specific object in the presence of
multiple distractors, requiring the agent to understand `go to` instructions like "go to the red ball". For further
details, please consult the [original paper](https://arxiv.org/abs/1810.08272).

## Getting the dataset

We will use a large dataset (**more than 4 GB**) including expert demonstrations for `GoToLocal` tasks. To download
the data we'll run

```bash
PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py GoToLocal
```

from the project's root directory, which will download `BabyAI-GoToLocal-v0.pkl` and `BabyAI-GoToLocal-v0_valid.pkl` to
the `allenact_plugins/babyai_plugin/data/demos` directory.

We will also generate small versions of the datasets, which will be useful if running on CPU, by calling

```bash
PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py
```
from the project's root directory, which will generate `BabyAI-GoToLocal-v0-small.pkl` under the same
`allenact_plugins/babyai_plugin/data/demos` directory.

## Data storage

In order to train with an off-policy dataset, we need to define an `ExperienceStorage`. In AllenAct, an
 `ExperienceStorage` object has two primary functions:
1. It stores/manages relevant data (e.g. similarly to the `Dataset` class in PyTorch).
2. It loads stored data into batches that will be used for loss computation (e.g. similarly to the `Dataloader` 
class in PyTorch).
Unlike a PyTorch `Dataset` however, an `ExperienceStorage` object can build its dataset **at runtime** by processing
 rollouts from the agent. This flexibility allows for us to, for exmaple, implement the experience replay datastructure
 used in deep Q-learning. For this tutorial we won't need this additional functionality as our off-policy dataset
 is a fixed collection of expert trajectories.    

An example of a `ExperienceStorage` for BabyAI expert demos might look as follows:
"""

# %% import_summary allenact_plugins.minigrid_plugin.minigrid_offpolicy.MiniGridExpertTrajectoryStorage

# %%
"""
A complete example can be found in
[MiniGridExpertTrajectoryStorage](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridExpertTrajectoryStorage).

## Loss function

Off-policy losses must implement the
[`GenericAbstractLoss`](/api/allenact/base_abstractions/misc/#genericabstractloss)
interface. In this case, we minimize the cross-entropy between the actor's policy and the expert action:
"""

# %% import allenact_plugins.minigrid_plugin.minigrid_offpolicy.MiniGridOffPolicyExpertCELoss

# %%
"""
A complete example can be found in
[MiniGridOffPolicyExpertCELoss](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridOffPolicyExpertCELoss).
Note that in this case we train the entire actor, but it would also be possible to forward data through a different
subgraph of the ActorCriticModel.

## Experiment configuration

For the experiment configuration, we'll build on top of an existing
[base BabyAI GoToLocal Experiment Config](/api/projects/babyai_baselines/experiments/go_to_local/base/#basebabyaigotolocalexperimentconfig).
The complete `ExperimentConfig` file for off-policy training is
[here](/api/projects/tutorials/minigrid_offpolicy_tutorial/#bcoffpolicybabyaigotolocalexperimentconfig), but let's
focus on the most relevant aspect to enable this type of training:
providing an [OffPolicyPipelineComponent](/api/allenact/utils/experiment_utils/#offpolicypipelinecomponent) object as input to a
`PipelineStage` when instantiating the `TrainingPipeline` in the `training_pipeline` method.
"""

# %% hide
import os
from typing import Optional, List, Tuple

import torch
from gym_minigrid.minigrid import MiniGridEnv

from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage
from allenact.utils.experiment_utils import (
    PipelineStage,
    StageComponent,
    TrainingSettings,
)
from allenact_plugins.babyai_plugin.babyai_constants import (
    BABYAI_EXPERT_TRAJECTORIES_DIR,
)
from allenact_plugins.minigrid_plugin.minigrid_offpolicy import (
    MiniGridOffPolicyExpertCELoss,
    MiniGridExpertTrajectoryStorage,
)
from projects.babyai_baselines.experiments.go_to_local.base import (
    BaseBabyAIGoToLocalExperimentConfig,
)


# %%
class BCOffPolicyBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):
    """BC Off-policy imitation."""

    DATASET: Optional[List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]] = None

    GPU_ID = 0 if torch.cuda.is_available() else None

    @classmethod
    def tag(cls):
        return "BabyAIGoToLocalBCOffPolicy"

    @classmethod
    def METRIC_ACCUMULATE_INTERVAL(cls):
        # See BaseBabyAIGoToLocalExperimentConfig for how this is used.
        return 1

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
        ppo_info = cls.rl_loss_default("ppo", steps=-1)

        num_mini_batch = ppo_info["num_mini_batch"]
        update_repeats = ppo_info["update_repeats"]

        # fmt: off
        return cls._training_pipeline(
            named_losses={
                "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss(
                    total_episodes_in_epoch=int(1e6)
                ),
            },
            named_storages={
                "onpolicy": RolloutBlockStorage(),
                "minigrid_offpolicy_expert": MiniGridExpertTrajectoryStorage(
                    data_path=os.path.join(
                                BABYAI_EXPERT_TRAJECTORIES_DIR,
                                "BabyAI-GoToLocal-v0{}.pkl".format(
                                    "" if torch.cuda.is_available() else "-small"
                                ),
                            ),
                    num_samplers=cls.NUM_TRAIN_SAMPLERS,
                    rollout_len=cls.ROLLOUT_STEPS,
                    instr_len=cls.INSTR_LEN,
                ),
            },
            pipeline_stages=[
                # Single stage, only with off-policy training
                PipelineStage(
                    loss_names=["offpolicy_expert_ce_loss"],                                              # no on-policy losses
                    max_stage_steps=total_train_steps,                          # keep sampling episodes in the stage
                    stage_components=[
                        StageComponent(
                            uuid="offpolicy",
                            storage_uuid="minigrid_offpolicy_expert",
                            loss_names=["offpolicy_expert_ce_loss"],
                            training_settings=TrainingSettings(
                                update_repeats=num_mini_batch * update_repeats,
                                num_mini_batch=1,
                            )
                        )
                    ],
                ),
            ],
            # As we don't have any on-policy losses, we set the next
            # two values to zero to ensure we don't attempt to
            # compute gradients for on-policy rollouts:
            num_mini_batch=0,
            update_repeats=0,
            total_train_steps=total_train_steps,
        )
        # fmt: on


# %%
"""
You'll have noted that it is possible to combine on-policy and off-policy training in the same stage, even though here
we apply pure off-policy training.

## Training

We recommend using a machine with a CUDA-capable GPU for this experiment. In order to start training, we just need to
invoke

```bash
PYTHONPATH=. python allenact/main.py -b projects/tutorials minigrid_offpolicy_tutorial -m 8 -o <OUTPUT_PATH>
```

Note that with the `-m 8` option we limit to 8 the number of on-policy task sampling processes used between off-policy
updates.

If everything goes well, the training success should quickly reach values around 0.7-0.8 on GPU and converge to values
close to 1 if given sufficient time to train.

If running tensorboard, you'll notice a separate group of scalars named `train-offpolicy-losses` and 
 `train-offpolicy-misc` with losses, approximate "experiences per second" (i.e. the number of off-policy experiences/steps
 being used to update the model per second), and other tracked values in addition to the standard `train-onpolicy-*`
  used for on-policy training. In the `train-metrics` and `train-misc` sections you'll find the metrics 
  quantifying the performance of the agent throughout training and some other plots showing training details.
  *Note that the x-axis for these plots is different than for the `train-offpolicy-*` sections*. This
  is because these plots use the number of rollout steps as the x-axis (i.e. steps that the trained agent
  takes interactively) while the `train-offpolicy-*` plots uses the number of offpolicy "experiences" that have
  been shown to the agent.
  

A view of the training progress about 5 hours after starting on a CUDA-capable GPU should look similar to the below
(note that training reached >99% success after about 50 minutes).

![off-policy progress](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/minigrid-offpolicy/minigrid-offpolicy-tutorial-tb.png)
"""


================================================
FILE: projects/tutorials/minigrid_tutorial.py
================================================
# literate: tutorials/minigrid-tutorial.md
# %%
"""# Tutorial: Navigation in MiniGrid."""

# %%
"""
In this tutorial, we will train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the
[MiniGrid](https://github.com/maximecb/gym-minigrid) environment. We will demonstrate how to:

* Write an experiment configuration file with a simple training pipeline from scratch.
* Use one of the supported environments with minimal user effort.
* Train, validate and test your experiment from the command line.

This tutorial assumes the [installation instructions](../installation/installation-allenact.md) have already been
followed and that, to some extent, this framework's [abstractions](../getting_started/abstractions.md) are known.
The `extra_requirements` for `minigrid_plugin` and `babyai_plugin` can be installed with.

```bash
pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt; pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt
```

## The task
A `MiniGrid-Empty-Random-5x5-v0` task consists of a grid of dimensions 5x5 where an agent spawned at a random
location and orientation has to navigate to the visitable bottom right corner cell of the grid by sequences of three
possible actions (rotate left/right and move forward). A visualization of the environment with expert steps in a random
`MiniGrid-Empty-Random-5x5-v0` task looks like

![MiniGridEmptyRandom5x5 task example](../img/minigrid_environment.png)

The observation for the agent is a subset of the entire grid, simulating a simplified limited field of view, as
depicted by the highlighted rectangle (observed subset of the grid) around the agent (red arrow). Gray cells correspond
to walls.

## Experiment configuration file

Our complete experiment consists of:

* Training a basic actor-critic agent with memory to solve randomly sampled navigation tasks.
* Validation on a fixed set of tasks (running in parallel with training).
* A second stage where we test saved checkpoints with a larger fixed set of tasks.

The entire configuration for the experiment, including training, validation, and testing, is encapsulated in a single 
class implementing the `ExperimentConfig` abstraction. For this tutorial, we will follow the config under
`projects/tutorials/minigrid_tutorial.py`. 

The `ExperimentConfig` abstraction is used by the
[OnPolicyTrainer](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicytrainer) class (for training) and the
[OnPolicyInference](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicyinference) class (for validation and testing)
invoked through the entry script `main.py` that calls an orchestrating
[OnPolicyRunner](../api/allenact/algorithms/onpolicy_sync/runner.md#onpolicyrunner) class. It includes:

* A `tag` method to identify the experiment.
* A `create_model` method to instantiate actor-critic models.
* A `make_sampler_fn` method to instantiate task samplers.
* Three `{train,valid,test}_task_sampler_args` methods describing initialization parameters for task samplers used in
training, validation, and testing; including assignment of workers to devices for simulation.
* A `machine_params` method with configuration parameters that will be used for training, validation, and testing.
* A `training_pipeline` method describing a possibly multi-staged training pipeline with different types of losses,
an optimizer, and other parameters like learning rates, batch sizes, etc.

### Preliminaries

We first import everything we'll need to define our experiment.
"""

# %%
from typing import Dict, Optional, List, Any, cast

import gym
from gym_minigrid.envs import EmptyRandomEnv5x5
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite
from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)
from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvRNN
from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor
from allenact_plugins.minigrid_plugin.minigrid_tasks import (
    MiniGridTaskSampler,
    MiniGridTask,
)

# %%
"""
We now create the `MiniGridTutorialExperimentConfig` class which we will use to define our experiment. 
For pedagogical reasons, we will add methods to this class one at a time below with a description of what
these classes do.  
"""


# %%
class MiniGridTutorialExperimentConfig(ExperimentConfig):

    # %%
    """An experiment is identified by a `tag`."""

    # %%
    @classmethod
    def tag(cls) -> str:
        return "MiniGridTutorial"

    # %%
    """
    ### Sensors and Model
    
    A readily available Sensor type for MiniGrid,
    [EgocentricMiniGridSensor](../api/allenact_plugins/minigrid_plugin/minigrid_sensors.md#egocentricminigridsensor),
    allows us to extract observations in a format consumable by an `ActorCriticModel` agent:
    """

    # %%
    SENSORS = [
        EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),
    ]

    # %%
    """
    The three `view_channels` include objects, colors and states corresponding to a partial observation of the environment
    as an image tensor, equivalent to that from `ImgObsWrapper` in
    [MiniGrid](https://github.com/maximecb/gym-minigrid#wrappers). The
    relatively large `agent_view_size` means the view will only be clipped by the environment walls in the forward and
    lateral directions with respect to the agent's orientation.
    
    We define our `ActorCriticModel` agent using a lightweight implementation with recurrent memory for MiniGrid
    environments, [MiniGridSimpleConvRNN](../api/allenact_plugins/minigrid_plugin/minigrid_models.md#minigridsimpleconvrnn):
    """

    # %%
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return MiniGridSimpleConvRNN(
            action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())),
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            num_objects=cls.SENSORS[0].num_objects,
            num_colors=cls.SENSORS[0].num_colors,
            num_states=cls.SENSORS[0].num_states,
        )

    # %%
    """
    ### Task samplers
    
    We use an available TaskSampler implementation for MiniGrid environments that allows to sample both random and
    deterministic `MiniGridTasks`,
    [MiniGridTaskSampler](../api/allenact_plugins/minigrid_plugin/minigrid_tasks.md#minigridtasksampler):
    """

    # %%
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return MiniGridTaskSampler(**kwargs)

    # %%
    """
    This task sampler will during training (or validation/testing), randomly initialize new tasks for the agent to complete.
    While it is not quite as important for this task type (as we test our agent in the same setting it is trained on) there
    are a lot of good reasons we would like to sample tasks differently during training than during validation or testing.
    One good reason, that is applicable in this tutorial, is that, during training, we would like to be able to sample tasks
    forever while, during testing, we would like to sample a fixed number of tasks (as otherwise we would never finish
    testing!). In `allenact` this is made possible by defining different arguments for the task sampler:
    """

    # %%
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="train")

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="valid")

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test")

    # %%
    """
    where, for convenience, we have defined a `_get_sampler_args` method:
    """

    # %%
    def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 20 + 20 * (mode == "test")  # 20 tasks for valid, 40 for test

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            max_tasks=max_tasks,  # see above
            env_class=self.make_env,  # builder for third-party environment (defined below)
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            env_info=dict(),  # parameters for environment builder (none for now)
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
        )

    @staticmethod
    def make_env(*args, **kwargs):
        return EmptyRandomEnv5x5()

    # %%
    """
    Note that the `env_class` argument to the Task Sampler is the one determining which task type we are going to train the
    model for (in this case, `MiniGrid-Empty-Random-5x5-v0` from
    [gym-minigrid](https://github.com/maximecb/gym-minigrid#empty-environment))
    . The sparse reward is
    [given by the environment](https://github.com/maximecb/gym-minigrid/blob/6e22a44dc67414b647063692258a4f95ce789161/gym_minigrid/minigrid.py#L819)
    , and the maximum task length is 100. For training, we opt for a default random sampling, whereas for validation and
    test we define fixed sets of randomly sampled tasks without needing to explicitly define a dataset.
    
    In this toy example, the maximum number of different tasks is 32. For validation we sample 320 tasks using 16 samplers,
    or 640 for testing, so we can be fairly sure that all possible tasks are visited at least once during evaluation.
    
    ### Machine parameters
    
    Given the simplicity of the task and model, we can quickly train the model on the CPU:
    """

    # %%
    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        return {
            "nprocesses": 128 if mode == "train" else 16,
            "devices": [],
        }

    # %%
    """
    We allocate a larger number of samplers for training (128) than for validation or testing (16), and we default to CPU
    usage by returning an empty list of `devices`.
    
    ### Training pipeline
    
    The last definition required before starting to train is a training pipeline. In this case, we just use a single PPO
    stage with linearly decaying learning rate:
    """

    # %%
    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        ppo_steps = int(150000)
        return TrainingPipeline(
            named_losses=dict(ppo_loss=PPO(**PPOConfig)),  # type:ignore
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
            num_mini_batch=4,
            update_repeats=3,
            max_grad_norm=0.5,
            num_steps=16,
            gamma=0.99,
            use_gae=True,
            gae_lambda=0.95,
            advance_scene_rollout_period=None,
            save_interval=10000,
            metric_accumulate_interval=1,
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}  # type:ignore
            ),
        )


# %%
"""
You can see that we use a `Builder` class to postpone the construction of some of the elements, like the optimizer,
for which the model weights need to be known.

## Training and validation

We have a complete implementation of this experiment's configuration class in `projects/tutorials/minigrid_tutorial.py`.
To start training from scratch, we just need to invoke

```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o /PATH/TO/minigrid_output -s 12345
```

from the `allenact` root directory.

* With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file 
will be found in the `projects/tutorials` directory.
* With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers).
* With `-o minigrid_output` we set the output folder into which results and logs will be saved.
* With `-s 12345` we set the random seed.

If we have Tensorboard installed, we can track progress with
```bash
tensorboard --logdir /PATH/TO/minigrid_output
```
which will default to the URL [http://localhost:6006/](http://localhost:6006/).

After 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder.
The training curves should look similar to:

![training curves](../img/minigrid_train.png)

If everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4.
(For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the
not-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example
with a different random seed). The validation curves should look similar to:

![validation curves](../img/minigrid_valid.png)

## Testing

The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the
subfolders in the path to the checkpoints, saved under the output folder.
In order to evaluate (i.e. test) a particular checkpoint, we need to pass the `--eval` flag and specify the checkpoint with the
`--checkpoint CHECKPOINT_PATH` option:
```bash
PYTHONPATH=. python allenact/main.py minigrid_tutorial \
    -b projects/tutorials \
    -m 1 \
    -o /PATH/TO/minigrid_output \
    -s 12345 \
    --eval \
    --checkpoint /PATH/TO/minigrid_output/checkpoints/MiniGridTutorial/YOUR_START_DATE/exp_MiniGridTutorial__stage_00__steps_000000151552.pt
```

Again, if everything went well, the `test` success rate should converge to 1 and the mean episode length to a value
below 4. Detailed results are saved under a `metrics` subfolder in the output folder.
The test curves should look similar to:

![test curves](../img/minigrid_test.png)
"""


================================================
FILE: projects/tutorials/minigrid_tutorial_conds.py
================================================
from typing import Dict, Optional, List, Any, cast, Callable, Union, Tuple

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from gym_minigrid.envs import EmptyRandomEnv5x5
from gym_minigrid.minigrid import MiniGridEnv
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig
from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel, DistributionType
from allenact.base_abstractions.distributions import (
    CategoricalDistr,
    ConditionalDistr,
    SequentialDistr,
)
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.misc import ActorCriticOutput, Memory, RLStepResult
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.embodiedai.models.basic_models import RNNStateEncoder
from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)
from allenact.utils.misc_utils import prepare_locals_for_super
from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvBase
from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor
from allenact_plugins.minigrid_plugin.minigrid_tasks import (
    MiniGridTaskSampler,
    MiniGridTask,
)


class ConditionedLinearActorCriticHead(nn.Module):
    def __init__(
        self, input_size: int, master_actions: int = 2, subpolicy_actions: int = 2
    ):
        super().__init__()
        self.input_size = input_size
        self.master_and_critic = nn.Linear(input_size, master_actions + 1)
        self.embed_higher = nn.Embedding(num_embeddings=2, embedding_dim=input_size)
        self.actor = nn.Linear(2 * input_size, subpolicy_actions)

        nn.init.orthogonal_(self.master_and_critic.weight)
        nn.init.constant_(self.master_and_critic.bias, 0)
        nn.init.orthogonal_(self.actor.weight)
        nn.init.constant_(self.actor.bias, 0)

    def lower_policy(self, *args, **kwargs):
        assert "higher" in kwargs
        assert "state_embedding" in kwargs
        emb = self.embed_higher(kwargs["higher"])
        logits = self.actor(torch.cat([emb, kwargs["state_embedding"]], dim=-1))
        return CategoricalDistr(logits=logits)

    def forward(self, x):
        out = self.master_and_critic(x)

        master_logits = out[..., :-1]
        values = out[..., -1:]
        # noinspection PyArgumentList

        cond1 = ConditionalDistr(
            distr_conditioned_on_input_fn_or_instance=CategoricalDistr(
                logits=master_logits
            ),
            action_group_name="higher",
        )
        cond2 = ConditionalDistr(
            distr_conditioned_on_input_fn_or_instance=lambda *args, **kwargs: ConditionedLinearActorCriticHead.lower_policy(
                self, *args, **kwargs
            ),
            action_group_name="lower",
            state_embedding=x,
        )

        return (
            SequentialDistr(cond1, cond2),
            values.view(*values.shape[:2], -1),  # [steps, samplers, flattened]
        )


class ConditionedLinearActorCritic(ActorCriticModel[SequentialDistr]):
    def __init__(
        self,
        input_uuid: str,
        action_space: gym.spaces.Dict,
        observation_space: gym.spaces.Dict,
    ):
        super().__init__(action_space=action_space, observation_space=observation_space)

        assert (
            input_uuid in observation_space.spaces
        ), "ConditionedLinearActorCritic expects only a single observational input."
        self.input_uuid = input_uuid

        box_space: gym.spaces.Box = observation_space[self.input_uuid]
        assert isinstance(box_space, gym.spaces.Box), (
            "ConditionedLinearActorCritic requires that"
            "observation space corresponding to the input uuid is a Box space."
        )
        assert len(box_space.shape) == 1
        self.in_dim = box_space.shape[0]
        self.head = ConditionedLinearActorCriticHead(
            input_size=self.in_dim,
            master_actions=action_space["higher"].n,
            subpolicy_actions=action_space["lower"].n,
        )

    # noinspection PyMethodMayBeStatic
    def _recurrent_memory_specification(self):
        return None

    def forward(self, observations, memory, prev_actions, masks):
        dists, values = self.head(observations[self.input_uuid])

        # noinspection PyArgumentList
        return (
            ActorCriticOutput(
                distributions=dists,
                values=values,
                extras={},
            ),
            None,
        )


class ConditionedRNNActorCritic(ActorCriticModel[SequentialDistr]):
    def __init__(
        self,
        input_uuid: str,
        action_space: gym.spaces.Dict,
        observation_space: gym.spaces.Dict,
        hidden_size: int = 128,
        num_layers: int = 1,
        rnn_type: str = "GRU",
        head_type: Callable[
            ..., ActorCriticModel[SequentialDistr]
        ] = ConditionedLinearActorCritic,
    ):
        super().__init__(action_space=action_space, observation_space=observation_space)
        self.hidden_size = hidden_size
        self.rnn_type = rnn_type

        assert (
            input_uuid in observation_space.spaces
        ), "LinearActorCritic expects only a single observational input."
        self.input_uuid = input_uuid

        box_space: gym.spaces.Box = observation_space[self.input_uuid]
        assert isinstance(box_space, gym.spaces.Box), (
            "RNNActorCritic requires that"
            "observation space corresponding to the input uuid is a Box space."
        )
        assert len(box_space.shape) == 1
        self.in_dim = box_space.shape[0]

        self.state_encoder = RNNStateEncoder(
            input_size=self.in_dim,
            hidden_size=hidden_size,
            num_layers=num_layers,
            rnn_type=rnn_type,
            trainable_masked_hidden_state=True,
        )

        self.head_uuid = "{}_{}".format("rnn", input_uuid)

        self.ac_nonrecurrent_head: ActorCriticModel[SequentialDistr] = head_type(
            input_uuid=self.head_uuid,
            action_space=action_space,
            observation_space=gym.spaces.Dict(
                {
                    self.head_uuid: gym.spaces.Box(
                        low=np.float32(0.0), high=np.float32(1.0), shape=(hidden_size,)
                    )
                }
            ),
        )

        self.memory_key = "rnn"

    @property
    def recurrent_hidden_state_size(self) -> int:
        return self.hidden_size

    @property
    def num_recurrent_layers(self) -> int:
        return self.state_encoder.num_recurrent_layers

    def _recurrent_memory_specification(self):
        return {
            self.memory_key: (
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
        }

    def forward(  # type:ignore
        self,
        observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]],
        memory: Memory,
        prev_actions: torch.Tensor,
        masks: torch.FloatTensor,
    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
        rnn_out, mem_return = self.state_encoder(
            x=observations[self.input_uuid],
            hidden_states=memory.tensor(self.memory_key),
            masks=masks,
        )

        # noinspection PyCallingNonCallable
        out, _ = self.ac_nonrecurrent_head(
            observations={self.head_uuid: rnn_out},
            memory=None,
            prev_actions=prev_actions,
            masks=masks,
        )

        # noinspection PyArgumentList
        return (
            out,
            memory.set_tensor(self.memory_key, mem_return),
        )


class ConditionedMiniGridSimpleConvRNN(MiniGridSimpleConvBase):
    def __init__(
        self,
        action_space: gym.spaces.Dict,
        observation_space: gym.spaces.Dict,
        num_objects: int,
        num_colors: int,
        num_states: int,
        object_embedding_dim: int = 8,
        hidden_size=512,
        num_layers=1,
        rnn_type="GRU",
        head_type: Callable[
            ..., ActorCriticModel[SequentialDistr]
        ] = ConditionedLinearActorCritic,
        **kwargs,
    ):
        super().__init__(**prepare_locals_for_super(locals()))

        self._hidden_size = hidden_size
        agent_view_x, agent_view_y, view_channels = observation_space[
            "minigrid_ego_image"
        ].shape
        self.actor_critic = ConditionedRNNActorCritic(
            input_uuid=self.ac_key,
            action_space=action_space,
            observation_space=gym.spaces.Dict(
                {
                    self.ac_key: gym.spaces.Box(
                        low=np.float32(-1.0),
                        high=np.float32(1.0),
                        shape=(
                            self.object_embedding_dim
                            * agent_view_x
                            * agent_view_y
                            * view_channels,
                        ),
                    )
                }
            ),
            hidden_size=hidden_size,
            num_layers=num_layers,
            rnn_type=rnn_type,
            head_type=head_type,
        )
        self.memory_key = "rnn"

        self.train()

    @property
    def num_recurrent_layers(self):
        return self.actor_critic.num_recurrent_layers

    @property
    def recurrent_hidden_state_size(self):
        return self._hidden_size

    def _recurrent_memory_specification(self):
        return {
            self.memory_key: (
                (
                    ("layer", self.num_recurrent_layers),
                    ("sampler", None),
                    ("hidden", self.recurrent_hidden_state_size),
                ),
                torch.float32,
            )
        }


class ConditionedMiniGridTask(MiniGridTask):
    _ACTION_NAMES = ("left", "right", "forward", "pickup")
    _ACTION_IND_TO_MINIGRID_IND = tuple(
        MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES
    )

    @property
    def action_space(self) -> gym.spaces.Dict:
        return gym.spaces.Dict(
            higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
        )

    def _step(self, action: Dict[str, int]) -> RLStepResult:
        assert len(action) == 2, "got action={}".format(action)
        minigrid_obs, reward, self._minigrid_done, info = self.env.step(
            action=(
                self._ACTION_IND_TO_MINIGRID_IND[action["lower"] + 2 * action["higher"]]
            )
        )

        # self.env.render()

        return RLStepResult(
            observation=self.get_observations(minigrid_output_obs=minigrid_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )

    def query_expert(self, **kwargs) -> Tuple[int, bool]:
        if kwargs["expert_sensor_group_name"] == "higher":
            if self._minigrid_done:
                raise ValueError("Episode is completed, but expert is still queried.")
                # return 0, False
            self.cached_expert = super().query_expert(**kwargs)
            if self.cached_expert[1]:
                return self.cached_expert[0] // 2, True
            else:
                return 0, False
        else:
            assert hasattr(self, "cached_expert")
            if self.cached_expert[1]:
                res = (self.cached_expert[0] % 2, True)
            else:
                res = (0, False)
            del self.cached_expert
            return res


class MiniGridTutorialExperimentConfig(ExperimentConfig):
    @classmethod
    def tag(cls) -> str:
        return "MiniGridTutorial"

    SENSORS = [
        EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),
        ExpertActionSensor(
            action_space=gym.spaces.Dict(
                higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
            )
        ),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ConditionedMiniGridSimpleConvRNN(
            action_space=gym.spaces.Dict(
                higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
            ),
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            num_objects=cls.SENSORS[0].num_objects,
            num_colors=cls.SENSORS[0].num_colors,
            num_states=cls.SENSORS[0].num_states,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return MiniGridTaskSampler(**kwargs)

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="train")

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="valid")

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test")

    def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 20 + 20 * (
                mode == "test"
            )  # 20 tasks for valid, 40 for test (per sampler)

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            max_tasks=max_tasks,  # see above
            env_class=self.make_env,  # builder for third-party environment (defined below)
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            env_info=dict(),  # parameters for environment builder (none for now)
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            task_class=ConditionedMiniGridTask,
        )

    @staticmethod
    def make_env(*args, **kwargs):
        return EmptyRandomEnv5x5()

    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        return {
            "nprocesses": 128 if mode == "train" else 16,
            "devices": [],
        }

    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        ppo_steps = int(150000)
        return TrainingPipeline(
            named_losses=dict(
                imitation_loss=Imitation(
                    cls.SENSORS[1]
                ),  # 0 is Minigrid, 1 is ExpertActionSensor
                ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"),
            ),  # type:ignore
            pipeline_stages=[
                PipelineStage(
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=ppo_steps // 2,
                    ),
                    loss_names=["imitation_loss", "ppo_loss"],
                    max_stage_steps=ppo_steps,
                )
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
            num_mini_batch=4,
            update_repeats=3,
            max_grad_norm=0.5,
            num_steps=16,
            gamma=0.99,
            use_gae=True,
            gae_lambda=0.95,
            advance_scene_rollout_period=None,
            save_interval=10000,
            metric_accumulate_interval=1,
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}  # type:ignore
            ),
        )


================================================
FILE: projects/tutorials/navtopartner_robothor_rgb_ppo.py
================================================
from math import ceil
from typing import Dict, Any, List, Optional

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import (
    Builder,
    PipelineStage,
    TrainingPipeline,
    LinearDecay,
)
from allenact.utils.multi_agent_viz_utils import MultiTrajectoryViz
from allenact.utils.viz_utils import VizSuite, AgentViewViz
from allenact_plugins.robothor_plugin.robothor_models import (
    NavToPartnerActorCriticSimpleConvRNN,
)
from allenact_plugins.robothor_plugin.robothor_sensors import RGBSensorMultiRoboThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
    NavToPartnerTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import NavToPartnerTask
from allenact_plugins.robothor_plugin.robothor_viz import ThorMultiViz


class NavToPartnerRoboThorRGBPPOExperimentConfig(ExperimentConfig):
    """A Multi-Agent Navigation experiment configuration in RoboThor."""

    # Task Parameters
    MAX_STEPS = 500
    REWARD_CONFIG = {
        "step_penalty": -0.01,
        "max_success_distance": 0.75,
        "success_reward": 5.0,
    }

    # Simulator Parameters
    CAMERA_WIDTH = 300
    CAMERA_HEIGHT = 300
    SCREEN_SIZE = 224

    # Training Engine Parameters
    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    NUM_PROCESSES = 20
    TRAINING_GPUS: List[int] = [0]
    VALIDATION_GPUS: List[int] = [0]
    TESTING_GPUS: List[int] = [0]

    SENSORS = [
        RGBSensorMultiRoboThor(
            agent_count=2,
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb",
        ),
    ]

    OBSERVATIONS = [
        "rgb",
    ]

    ENV_ARGS = dict(
        width=CAMERA_WIDTH,
        height=CAMERA_HEIGHT,
        rotateStepDegrees=30.0,
        visibilityDistance=1.0,
        gridSize=0.25,
        agentCount=2,
    )

    @classmethod
    def tag(cls):
        return "NavToPartnerRobothorRGBPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(1000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 3
        num_steps = 30
        save_interval = 200000
        log_interval = 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )

    def split_num_processes(self, ndevices):
        assert self.NUM_PROCESSES >= ndevices, "NUM_PROCESSES {} < ndevices {}".format(
            self.NUM_PROCESSES, ndevices
        )
        res = [0] * ndevices
        for it in range(self.NUM_PROCESSES):
            res[it % ndevices] += 1
        return res

    viz: Optional[VizSuite] = None

    def get_viz(self, mode):
        if self.viz is not None:
            return self.viz

        self.viz = VizSuite(
            mode=mode,
            # Basic 2D trajectory visualizer (task output source):
            base_trajectory=MultiTrajectoryViz(),  # plt_colormaps=["cool", "cool"]),
            # Egocentric view visualizer (vector task source):
            egeocentric=AgentViewViz(max_video_length=100, max_episodes_in_group=1),
            # Specialized 2D trajectory visualizer (task output source):
            thor_trajectory=ThorMultiViz(
                figsize=(16, 8),
                viz_rows_cols=(448, 448),
                scenes=("FloorPlan_Train{}_{}", 1, 1, 1, 1),
            ),
        )

        return self.viz

    def machine_params(self, mode="train", **kwargs):
        visualizer = None
        if mode == "train":
            devices = (
                ["cpu"] if not torch.cuda.is_available() else list(self.TRAINING_GPUS)
            )
            nprocesses = (
                4
                if not torch.cuda.is_available()
                else self.split_num_processes(len(devices))
            )
        elif mode == "valid":
            nprocesses = 0
            devices = ["cpu"] if not torch.cuda.is_available() else self.VALIDATION_GPUS
        elif mode == "test":
            nprocesses = 1
            devices = ["cpu"] if not torch.cuda.is_available() else self.TESTING_GPUS
            visualizer = self.get_viz(mode=mode)
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        return {
            "nprocesses": nprocesses,
            "devices": devices,
            "visualizer": visualizer,
        }

    # TODO Define Model
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return NavToPartnerActorCriticSimpleConvRNN(
            action_space=gym.spaces.Tuple(
                [
                    gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),
                    gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),
                ]
            ),
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            hidden_size=512,
        )

    # Define Task Sampler
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return NavToPartnerTaskSampler(**kwargs)

    # Utility Functions for distributing scenes between GPUs
    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
            np.int32
        )

    def _get_sampler_args_for_scene_split(
        self,
        scenes: List[str],
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Tuple(
                [
                    gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),
                    gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),
                ]
            ),
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
            "rewards_config": self.REWARD_CONFIG,
        }

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        scenes = ["FloorPlan_Train1_1"]

        res = self._get_sampler_args_for_scene_split(
            scenes,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["env_args"] = {
            **self.ENV_ARGS,
            "x_display": (
                ("0.%d" % devices[process_ind % len(devices)])
                if devices is not None and len(devices) > 0
                else None
            ),
        }
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        scenes = ["FloorPlan_Train1_1"]

        res = self._get_sampler_args_for_scene_split(
            scenes,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["env_args"] = {
            **self.ENV_ARGS,
            "x_display": (
                ("0.%d" % devices[process_ind % len(devices)])
                if devices is not None and len(devices) > 0
                else None
            ),
        }
        res["max_tasks"] = 20
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        scenes = ["FloorPlan_Train1_1"]

        res = self._get_sampler_args_for_scene_split(
            scenes,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["env_args"] = {
            **self.ENV_ARGS,
            "x_display": (
                ("0.%d" % devices[process_ind % len(devices)])
                if devices is not None and len(devices) > 0
                else None
            ),
        }
        res["max_tasks"] = 4
        return res


================================================
FILE: projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py
================================================
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.utils.experiment_utils import (
    Builder,
    PipelineStage,
    TrainingPipeline,
    LinearDecay,
)
from allenact.base_abstractions.sensor import ExpertActionSensor
from projects.tutorials.object_nav_ithor_ppo_one_object import (
    ObjectNavThorPPOExperimentConfig,
    ObjectNaviThorGridTask,
)


class ObjectNavThorDaggerThenPPOExperimentConfig(ObjectNavThorPPOExperimentConfig):
    """A simple object navigation experiment in THOR.

    Training with DAgger and then PPO.
    """

    SENSORS = ObjectNavThorPPOExperimentConfig.SENSORS + [
        ExpertActionSensor(
            action_space=len(ObjectNaviThorGridTask.class_action_names()),
        ),
    ]

    @classmethod
    def tag(cls):
        return "ObjectNavThorDaggerThenPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        dagger_steos = int(1e4)
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 2 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
                "imitation_loss": Imitation(),  # We add an imitation loss.
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=dagger_steos,
                    ),
                    max_stage_steps=dagger_steos,
                ),
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=ppo_steps,
                ),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )


================================================
FILE: projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.py
================================================
from projects.tutorials.object_nav_ithor_dagger_then_ppo_one_object import (
    ObjectNavThorDaggerThenPPOExperimentConfig,
)
from allenact.utils.viz_utils import (
    VizSuite,
    TrajectoryViz,
    AgentViewViz,
    ActorViz,
    TensorViz1D,
)
from allenact_plugins.ithor_plugin.ithor_viz import ThorViz


class ObjectNavThorDaggerThenPPOVizExperimentConfig(
    ObjectNavThorDaggerThenPPOExperimentConfig
):
    """A simple object navigation experiment in THOR.

    Training with DAgger and then PPO + using viz for test.
    """

    TEST_SAMPLES_IN_SCENE = 4

    @classmethod
    def tag(cls):
        return "ObjectNavThorDaggerThenPPOViz"

    viz = None

    def get_viz(self, mode):
        if self.viz is not None:
            return self.viz

        self.viz = VizSuite(
            mode=mode,
            base_trajectory=TrajectoryViz(
                path_to_target_location=None,
                path_to_rot_degrees=("rotation",),
            ),
            egeocentric=AgentViewViz(max_video_length=100),
            action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),
            taken_action_logprobs=TensorViz1D(),
            episode_mask=TensorViz1D(rollout_source=("masks",)),
            thor_trajectory=ThorViz(
                path_to_target_location=None,
                figsize=(8, 8),
                viz_rows_cols=(448, 448),
            ),
        )

        return self.viz

    def machine_params(self, mode="train", **kwargs):
        params = super().machine_params(mode, **kwargs)

        if mode == "test":
            params.set_visualizer(self.get_viz(mode))

        return params


================================================
FILE: projects/tutorials/object_nav_ithor_ppo_one_object.py
================================================
from math import ceil
from typing import Dict, Any, List, Optional

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import (
    Builder,
    PipelineStage,
    TrainingPipeline,
    LinearDecay,
)
from allenact_plugins.ithor_plugin.ithor_sensors import (
    RGBSensorThor,
    GoalObjectTypeThorSensor,
)
from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler
from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
from allenact_plugins.navigation_plugin.objectnav.models import ObjectNavActorCritic


class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
    """A simple object navigation experiment in THOR.

    Training with PPO.
    """

    # A simple setting, train/valid/test are all the same single scene
    # and we're looking for a single object
    OBJECT_TYPES = ["Tomato"]
    TRAIN_SCENES = ["FloorPlan1_physics"]
    VALID_SCENES = ["FloorPlan1_physics"]
    TEST_SCENES = ["FloorPlan1_physics"]

    # Setting up sensors and basic environment details
    SCREEN_SIZE = 224
    SENSORS = [
        RGBSensorThor(
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_resnet_normalization=True,
        ),
        GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),
    ]

    ENV_ARGS = {
        "player_screen_height": SCREEN_SIZE,
        "player_screen_width": SCREEN_SIZE,
        "quality": "Very Low",
    }

    MAX_STEPS = 128
    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    VALID_SAMPLES_IN_SCENE = 10
    TEST_SAMPLES_IN_SCENE = 100

    @classmethod
    def tag(cls):
        return "ObjectNavThorPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 2 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=ppo_steps,
                ),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )

    @classmethod
    def machine_params(cls, mode="train", **kwargs):
        num_gpus = torch.cuda.device_count()
        has_gpu = num_gpus != 0

        if mode == "train":
            nprocesses = 20 if has_gpu else 4
            gpu_ids = [0] if has_gpu else []
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [1 % num_gpus] if has_gpu else []
        elif mode == "test":
            nprocesses = 1
            gpu_ids = [0] if has_gpu else []
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
        )

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ObjectNavActorCritic(
            action_space=gym.spaces.Discrete(
                len(ObjectNaviThorGridTask.class_action_names())
            ),
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            rgb_uuid=cls.SENSORS[0].uuid,
            depth_uuid=None,
            goal_sensor_uuid="goal_object_type_ind",
            hidden_size=512,
            object_type_embedding_dim=8,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return ObjectNavTaskSampler(**kwargs)

    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
            np.int32
        )

    def _get_sampler_args_for_scene_split(
        self,
        scenes: List[str],
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "object_types": self.OBJECT_TYPES,
            "env_args": self.ENV_ARGS,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(
                len(ObjectNaviThorGridTask.class_action_names())
            ),
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
        }

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            self.TRAIN_SCENES,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_period"] = "manual"
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            self.VALID_SCENES,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_period"] = self.VALID_SAMPLES_IN_SCENE
        res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"])
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            self.TEST_SCENES,
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_period"] = self.TEST_SAMPLES_IN_SCENE
        res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"])
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        return res


================================================
FILE: projects/tutorials/pointnav_habitat_rgb_ddppo.py
================================================
import os
from typing import Dict, Any, List, Optional, Sequence

import gym
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.utils.experiment_utils import (
    Builder,
    PipelineStage,
    TrainingPipeline,
    LinearDecay,
    evenly_distribute_count_into_bins,
)
from allenact_plugins.habitat_plugin.habitat_constants import (
    HABITAT_DATASETS_DIR,
    HABITAT_CONFIGS_DIR,
)
from allenact_plugins.habitat_plugin.habitat_sensors import (
    RGBSensorHabitat,
    TargetCoordinatesSensorHabitat,
)
from allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler
from allenact_plugins.habitat_plugin.habitat_utils import (
    construct_env_configs,
    get_habitat_config,
)
from allenact_plugins.navigation_plugin.objectnav.models import (
    ResnetTensorNavActorCritic,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask


class PointNavHabitatRGBPPOTutorialExperimentConfig(ExperimentConfig):
    """A Point Navigation experiment configuration in Habitat."""

    # Task Parameters
    MAX_STEPS = 500
    REWARD_CONFIG = {
        "step_penalty": -0.01,
        "goal_success_reward": 10.0,
        "failed_stop_reward": 0.0,
        "shaping_weight": 1.0,
    }
    DISTANCE_TO_GOAL = 0.2

    # Simulator Parameters
    CAMERA_WIDTH = 640
    CAMERA_HEIGHT = 480
    SCREEN_SIZE = 224

    # Training Engine Parameters
    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    NUM_PROCESSES = max(5 * torch.cuda.device_count() - 1, 4)
    TRAINING_GPUS = list(range(torch.cuda.device_count()))
    VALIDATION_GPUS = [torch.cuda.device_count() - 1]
    TESTING_GPUS = [torch.cuda.device_count() - 1]

    task_data_dir_template = os.path.join(
        HABITAT_DATASETS_DIR, "pointnav/gibson/v1/{}/{}.json.gz"
    )
    TRAIN_SCENES = task_data_dir_template.format(*(["train"] * 2))
    VALID_SCENES = task_data_dir_template.format(*(["val"] * 2))
    TEST_SCENES = task_data_dir_template.format(*(["test"] * 2))

    CONFIG = get_habitat_config(
        os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav_gibson.yaml")
    )
    CONFIG.defrost()
    CONFIG.NUM_PROCESSES = NUM_PROCESSES
    CONFIG.SIMULATOR_GPU_IDS = TRAINING_GPUS
    CONFIG.DATASET.SCENES_DIR = "habitat/habitat-api/data/scene_datasets/"
    CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = ["*"]
    CONFIG.DATASET.DATA_PATH = TRAIN_SCENES
    CONFIG.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"]
    CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH
    CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT
    CONFIG.SIMULATOR.TURN_ANGLE = 30
    CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25
    CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS

    CONFIG.TASK.TYPE = "Nav-v0"
    CONFIG.TASK.SUCCESS_DISTANCE = DISTANCE_TO_GOAL
    CONFIG.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR"
    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2
    CONFIG.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass"
    CONFIG.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"]
    CONFIG.TASK.SPL.TYPE = "SPL"
    CONFIG.TASK.SPL.SUCCESS_DISTANCE = DISTANCE_TO_GOAL
    CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = DISTANCE_TO_GOAL

    CONFIG.MODE = "train"

    SENSORS = [
        RGBSensorHabitat(
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_resnet_normalization=True,
        ),
        TargetCoordinatesSensorHabitat(coordinate_dims=2),
    ]

    PREPROCESSORS = [
        Builder(
            ResNetPreprocessor,
            {
                "input_height": SCREEN_SIZE,
                "input_width": SCREEN_SIZE,
                "output_width": 7,
                "output_height": 7,
                "output_dims": 512,
                "pool": False,
                "torchvision_resnet_model": models.resnet18,
                "input_uuids": ["rgb_lowres"],
                "output_uuid": "rgb_resnet",
            },
        ),
    ]

    OBSERVATIONS = [
        "rgb_resnet",
        "target_coordinates_ind",
    ]

    TRAIN_CONFIGS = construct_env_configs(CONFIG)

    @classmethod
    def tag(cls):
        return "PointNavHabitatRGBPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(250000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 3
        num_steps = 30
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )

    def machine_params(self, mode="train", **kwargs):
        if mode == "train":
            workers_per_device = 1
            gpu_ids = (
                []
                if not torch.cuda.is_available()
                else self.TRAINING_GPUS * workers_per_device
            )
            nprocesses = (
                1
                if not torch.cuda.is_available()
                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
            )
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS
        elif mode == "test":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        sensor_preprocessor_graph = (
            SensorPreprocessorGraph(
                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
                preprocessors=self.PREPROCESSORS,
            )
            if mode == "train"
            or (
                (isinstance(nprocesses, int) and nprocesses > 0)
                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
            )
            else None
        )

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    # Define Model
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ResnetTensorNavActorCritic(
            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            goal_sensor_uuid="target_coordinates_ind",
            rgb_resnet_preprocessor_uuid="rgb_resnet",
            hidden_size=512,
            goal_dims=32,
        )

    # Define Task Sampler
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return PointNavTaskSampler(**kwargs)

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        config = self.TRAIN_CONFIGS[process_ind]
        return {
            "env_config": config,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "distance_to_goal": self.DISTANCE_TO_GOAL,  # type:ignore
        }

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        config = self.CONFIG.clone()
        config.defrost()
        config.DATASET.DATA_PATH = self.VALID_SCENES
        config.MODE = "validate"
        config.freeze()
        return {
            "env_config": config,
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "distance_to_goal": self.DISTANCE_TO_GOAL,  # type:ignore
        }

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        raise NotImplementedError("Testing not implemented for this tutorial.")


================================================
FILE: projects/tutorials/pointnav_ithor_rgb_ddppo.py
================================================
import glob
import os
from math import ceil
from typing import Dict, Any, List, Optional, Sequence

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.utils.experiment_utils import (
    Builder,
    PipelineStage,
    TrainingPipeline,
    LinearDecay,
    evenly_distribute_count_into_bins,
)
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.navigation_plugin.objectnav.models import (
    ResnetTensorNavActorCritic,
)
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
    PointNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask


class PointNaviThorRGBPPOExperimentConfig(ExperimentConfig):
    """A Point Navigation experiment configuration in iTHOR."""

    # Task Parameters
    MAX_STEPS = 500
    REWARD_CONFIG = {
        "step_penalty": -0.01,
        "goal_success_reward": 10.0,
        "failed_stop_reward": 0.0,
        "shaping_weight": 1.0,
    }

    # Simulator Parameters
    CAMERA_WIDTH = 640
    CAMERA_HEIGHT = 480
    SCREEN_SIZE = 224

    # Training Engine Parameters
    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    NUM_PROCESSES = 60
    TRAINING_GPUS = list(range(torch.cuda.device_count()))
    VALIDATION_GPUS = [torch.cuda.device_count() - 1]
    TESTING_GPUS = [torch.cuda.device_count() - 1]

    # Dataset Parameters
    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train")
    VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val")

    SENSORS = [
        RGBSensorThor(
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    PREPROCESSORS = [
        Builder(
            ResNetPreprocessor,
            {
                "input_height": SCREEN_SIZE,
                "input_width": SCREEN_SIZE,
                "output_width": 7,
                "output_height": 7,
                "output_dims": 512,
                "pool": False,
                "torchvision_resnet_model": models.resnet18,
                "input_uuids": ["rgb_lowres"],
                "output_uuid": "rgb_resnet",
            },
        ),
    ]

    OBSERVATIONS = [
        "rgb_resnet",
        "target_coordinates_ind",
    ]

    ENV_ARGS = dict(
        width=CAMERA_WIDTH,
        height=CAMERA_HEIGHT,
        rotateStepDegrees=30.0,
        visibilityDistance=1.0,
        gridSize=0.25,
    )

    @classmethod
    def tag(cls):
        return "PointNavithorRGBPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(250000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 3
        num_steps = 30
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )

    def machine_params(self, mode="train", **kwargs):
        sampler_devices: Sequence[int] = []
        if mode == "train":
            workers_per_device = 1
            gpu_ids = (
                []
                if not torch.cuda.is_available()
                else self.TRAINING_GPUS * workers_per_device
            )
            nprocesses = (
                1
                if not torch.cuda.is_available()
                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
            )
            sampler_devices = self.TRAINING_GPUS
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS
        elif mode == "test":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        sensor_preprocessor_graph = (
            SensorPreprocessorGraph(
                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
                preprocessors=self.PREPROCESSORS,
            )
            if mode == "train"
            or (
                (isinstance(nprocesses, int) and nprocesses > 0)
                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
            )
            else None
        )

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sampler_devices=(
                sampler_devices if mode == "train" else gpu_ids
            ),  # ignored with > 1 gpu_ids
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    # Define Model
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ResnetTensorNavActorCritic(
            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            goal_sensor_uuid="target_coordinates_ind",
            rgb_resnet_preprocessor_uuid="rgb_resnet",
            hidden_size=512,
            goal_dims=32,
        )

    # Define Task Sampler
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return PointNavDatasetTaskSampler(**kwargs)

    # Utility Functions for distributing scenes between GPUs
    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
            np.int32
        )

    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
        if len(scenes) == 0:
            raise RuntimeError(
                (
                    "Could find no scene dataset information in directory {}."
                    " Are you sure you've downloaded them? "
                    " If not, see https://allenact.org/installation/download-datasets/ information"
                    " on how this can be done."
                ).format(scenes_dir)
            )
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
            "rewards_config": self.REWARD_CONFIG,
        }

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.TRAIN_DATASET_DIR
        res["loop_dataset"] = True
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        res["allow_flipping"] = True
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        return res


================================================
FILE: projects/tutorials/running_inference_tutorial.py
================================================
# literate: tutorials/running-inference-on-a-pretrained-model.md
# %%
"""# Tutorial: Inference with a pre-trained model."""

# %%
"""
In this tutorial we will run inference on a pre-trained model for the PointNav task
in the RoboTHOR environment. In this task the agent is tasked with going to a specific location
within a realistic 3D environment.

For information on how to train a PointNav Model see [this tutorial](training-a-pointnav-model.md)

We will need to [install the full AllenAct library](../installation/installation-allenact.md#full-library),
the `robothor_plugin` requirements via

```bash
pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt
```

and [download the 
RoboTHOR Pointnav dataset](../installation/download-datasets.md) before we get started.

For this tutorial we will download the weights of a model trained on the debug dataset.
This can be done with a handy script in the `pretrained_model_ckpts` directory:
```bash
bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-pointnav-rgb-resnet
```
This will download the weights for an RGB model that has been
trained on the PointNav task in RoboTHOR to `pretrained_model_ckpts/robothor-pointnav-rgb-resnet`


Next we need to run the inference, using the PointNav experiment config from the
[tutorial on making a PointNav experiment](training-a-pointnav-model.md).
We can do this with the following command:

```bash
PYTHONPATH=. python allenact/main.py -o <PATH_TO_OUTPUT> -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> -c <PATH_TO_CHECKPOINT> --eval
```

Where `<PATH_TO_OUTPUT>` is the location where the results of the test will be dumped, `<PATH_TO_CHECKPOINT>` is the
location of the downloaded model weights, and `<BASE_DIRECTORY_OF_YOUR_EXPERIMENT>` is a path to the directory where
our experiment definition is stored.

For our current setup the following command would work:

```bash
PYTHONPATH=. python allenact/main.py \
training_a_pointnav_model \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \
--eval
```

For testing on all saved checkpoints we pass a directory to `--checkpoint` rather than just a single file:

```bash
PYTHONPATH=. python allenact/main.py \
training_a_pointnav_model \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials  \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30
--eval
```
## Visualization

We also show examples of visualizations that can be extracted from the `"valid"` and `"test"` modes. Currently,
visualization is still undergoing design changes and does not support multi-agent tasks, but the available functionality
is sufficient for pointnav in RoboThor.

Following up on the example above, we can make a specialized pontnav `ExperimentConfig` where we instantiate
the base visualization class, `VizSuite`, defined in
[`allenact.utils.viz_utils`](https://github.com/allenai/allenact/tree/master/allenact/utils/viz_utils.py), when in `test` mode.

Each visualization type can be thought of as a plugin to the base `VizSuite`. For example, all `episode_ids` passed to
`VizSuite` will be processed with each of the instantiated visualization types (possibly with the exception of the
`AgentViewViz`). In the example below we show how to instantiate different visualization types from 4 different data
sources.

The data sources available to `VizSuite` are:

* Task output (e.g. 2D trajectories)
* Vector task (e.g. egocentric views)
* Rollout storage (e.g. recurrent memory, taken action logprobs...)
* `ActorCriticOutput` (e.g. action probabilities)

The visualization types included below are:

* `TrajectoryViz`: Generic 2D trajectory view.
* `AgentViewViz`: RGB egocentric view.
* `ActorViz`: Action probabilities from `ActorCriticOutput[CategoricalDistr]`.
* `TensorViz1D`: Evolution of a point from RolloutStorage over time.
* `TensorViz2D`: Evolution of a vector from RolloutStorage over time.
* `ThorViz`: Specialized 2D trajectory view
[for RoboThor](https://github.com/allenai/allenact/tree/master/allenact_plugins/robothor_plugin/robothor_viz.py).

Note that we need to explicitly set the `episode_ids` that we wish to visualize. For `AgentViewViz` we have the option
of using a different (typically shorter) list of episodes or enforce the ones used for the rest of visualizations.
"""

# %% hide
from typing import Optional

from allenact.utils.viz_utils import (
    VizSuite,
    TrajectoryViz,
    ActorViz,
    AgentViewViz,
    TensorViz1D,
    TensorViz2D,
)
from allenact_plugins.robothor_plugin.robothor_viz import ThorViz
from projects.tutorials.training_a_pointnav_model import (
    PointNavRoboThorRGBPPOExperimentConfig,
)


# %%
class PointNavRoboThorRGBPPOVizExperimentConfig(PointNavRoboThorRGBPPOExperimentConfig):
    """ExperimentConfig used to demonstrate how to set up visualization code.

    # Attributes

    viz_ep_ids : Scene names that will be visualized.
    viz_video_ids : Scene names that will have videos visualizations associated with them.
    """

    viz_ep_ids = [
        "FloorPlan_Train1_1_3",
        "FloorPlan_Train1_1_4",
        "FloorPlan_Train1_1_5",
        "FloorPlan_Train1_1_6",
    ]
    viz_video_ids = [["FloorPlan_Train1_1_3"], ["FloorPlan_Train1_1_4"]]

    viz: Optional[VizSuite] = None

    def get_viz(self, mode):
        if self.viz is not None:
            return self.viz

        self.viz = VizSuite(
            episode_ids=self.viz_ep_ids,
            mode=mode,
            # Basic 2D trajectory visualizer (task output source):
            base_trajectory=TrajectoryViz(
                path_to_target_location=(
                    "task_info",
                    "target",
                ),
            ),
            # Egocentric view visualizer (vector task source):
            egeocentric=AgentViewViz(
                max_video_length=100, episode_ids=self.viz_video_ids
            ),
            # Default action probability visualizer (actor critic output source):
            action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),
            # Default taken action logprob visualizer (rollout storage source):
            taken_action_logprobs=TensorViz1D(),
            # Same episode mask visualizer (rollout storage source):
            episode_mask=TensorViz1D(rollout_source=("masks",)),
            # Default recurrent memory visualizer (rollout storage source):
            rnn_memory=TensorViz2D(
                rollout_source=("memory_first_last", "single_belief")
            ),
            # Specialized 2D trajectory visualizer (task output source):
            thor_trajectory=ThorViz(
                figsize=(16, 8),
                viz_rows_cols=(448, 448),
                scenes=("FloorPlan_Train{}_{}", 1, 1, 1, 1),
            ),
        )

        return self.viz

    def machine_params(self, mode="train", **kwargs):
        res = super().machine_params(mode, **kwargs)
        if mode == "test":
            res.set_visualizer(self.get_viz(mode))

        return res


# %%
"""
Running test on the same downloaded models, but using the visualization-enabled `ExperimentConfig` with
 
```bash
PYTHONPATH=. python allenact/main.py \
running_inference_tutorial \
-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \
-b projects/tutorials \
-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \
--eval
```

generates different types of visualization and logs them in tensorboard. If everything is properly setup and
tensorboard includes the `robothor-pointnav-rgb-resnet` folder, under the `IMAGES` tab, we should see something similar
to

![Visualization example](../img/viz_pretrained_2videos.jpg)
"""


================================================
FILE: projects/tutorials/training_a_pointnav_model.py
================================================
# literate: tutorials/training-a-pointnav-model.md
# %%
"""# Tutorial: PointNav in RoboTHOR."""

# %%
"""
![RoboTHOR Robot](../img/RoboTHOR_robot.jpg)
## Introduction
One of the most obvious tasks that an embodied agent should master is navigating the world it inhabits.
Before we can teach a robot to cook or clean it first needs to be able to move around. The simplest
way to formulate "moving around" into a task is by making your agent find a beacon somewhere in the environment.
This beacon transmits its location, such that at any time, the agent can get the direction and euclidian distance
to the beacon. This particular task is often called Point Navigation, or **PointNav** for short.

#### PointNav
At first glance, this task seems trivial. If the agent is given the direction and distance of the target at
all times, can it not simply follow this signal directly? The answer is no, because agents are often trained
on this task in environments that emulate real-world buildings which are not wide-open spaces, but rather
contain many smaller rooms. Because of this, the agent has to learn to navigate human spaces and use doors
and hallways to efficiently navigate from one side of the building to the other. This task becomes particularly
difficult when the agent is tested in an environment that it is not trained in. If the agent does not know
how the floor plan of an environment looks, it has to learn to predict the design of man-made structures,
to efficiently navigate across them, much like how people instinctively know how to move around a building
they have never seen before based on their experience navigating similar buildings.

#### What is an environment anyways?
Environments are worlds in which embodied agents exist. If our embodied agent is simply a neural network that is being
trained in a simulator, then that simulator is its environment. Similarly, if our agent is a
physical robot then its environment is the real world. The agent interacts with the environment by taking one
of several available actions (such as "move forward", or "turn left"). After each action, the environment
produces a new frame that the agent can analyze to determine its next step. For many tasks, including PointNav
the agent also has a special "stop" action which indicates that the agent thinks it has reached the target.
After this action is called the agent will be reset to a new location, regardless if it reached the
target. The hope is that after enough training the agent will learn to correctly assess that it has successfully
navigated to the target.

![RoboTHOR Sim vs. Real](../img/RoboTHOR_sim_real.jpg)

There are many simulators designed for the training
of embodied agents. In this tutorial, we will be using a simulator called [RoboTHOR](https://ai2thor.allenai.org/robothor/), 
which is designed specifically to train models that can easily be transferred to a real robot, by providing a
photo-realistic virtual environment and a real-world replica of the environment that researchers can have access to. 
RoboTHOR contains 60 different virtual scenes with different floor plans and furniture and 15 validation scenes.

It is also important to mention that **AllenAct**
has a class abstraction called Environment. This is not the actual simulator game engine or robotics controller,
but rather a shallow wrapper that provides a uniform interface to the actual environment.

#### Learning algorithm
Finally, let us briefly touch on the algorithm that we will use to train our embodied agent to navigate. While
*AllenAct* offers us great flexibility to train models using complex pipelines, we will be using a simple
pure reinforcement learning approach for this tutorial. More specifically, we will be using DD-PPO,
a decentralized and distributed variant of the ubiquitous PPO algorithm. For those unfamiliar with Reinforcement
Learning we highly recommend [this tutorial](http://karpathy.github.io/2016/05/31/rl/) by Andrej Karpathy, and [this
book](http://www.incompleteideas.net/book/the-book-2nd.html) by Sutton and Barto. Essentially what we are doing
is letting our agent explore the environment on its own, rewarding it for taking actions that bring it closer
to its goal and penalizing it for actions that take it away from its goal. We then optimize the agent's model
to maximize this reward.

## Requirements
To train the model on the PointNav task, we need to [install the RoboTHOR environment](../installation/installation-framework.md) 
and [download the RoboTHOR PointNav dataset](../installation/download-datasets.md)

The dataset contains a list of episodes with thousands of randomly generated starting positions and target locations for each of the scenes
as well as a precomputed cache of distances, containing the shortest path from each point in a scene, to every other point in that scene. 
This is used to reward the agent for moving closer to the target in terms of geodesic distance - the actual path distance (as opposed to a 
straight line distance).

## Config File Setup
Now comes the most important part of the tutorial, we are going to write an experiment config file. 
If this is your first experience with experiment config files in AllenAct, we suggest that you
first see our how-to on [defining an experiment](../howtos/defining-an-experiment.md) which will
walk you through creating a simplified experiment config file.

Unlike a library that can be imported into python, **AllenAct** is structured as a framework with a runner script called
`main.py` which will run the experiment specified in a config file. This design forces us to keep meticulous records of 
exactly which settings were used to produce a particular result,
which can be very useful given how expensive RL models are to train.

The `projects/` directory is home to different projects using `AllenAct`. Currently it is populated with baselines
of popular tasks and tutorials.

We already have all the code for this tutorial stored in `projects/tutorials/training_a_pointnav_model.py`. We will
be using this file to run our experiments, but you can create a new directory in `projects/` and start writing your
experiment there.

We start off by importing everything we will need:
"""

# %%
import glob
import os
from math import ceil
from typing import Dict, Any, List, Optional, Sequence

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torchvision import models

from allenact.algorithms.onpolicy_sync.losses import PPO
from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
from allenact.base_abstractions.sensor import SensorSuite
from allenact.base_abstractions.task import TaskSampler
from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
from allenact.utils.experiment_utils import (
    Builder,
    PipelineStage,
    TrainingPipeline,
    LinearDecay,
    evenly_distribute_count_into_bins,
)
from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
from allenact_plugins.navigation_plugin.objectnav.models import (
    ResnetTensorNavActorCritic,
)
from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor
from allenact_plugins.robothor_plugin.robothor_task_samplers import (
    PointNavDatasetTaskSampler,
)
from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask

# %%
"""Next we define a new experiment config class:"""


# %%
class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig):
    """A Point Navigation experiment configuration in RoboThor."""

    # %%
    """
    We then define the task parameters. For PointNav, these include the maximum number of steps our agent
    can take before being reset (this prevents the agent from wandering on forever), and a configuration
    for the reward function that we will be using. 
    """

    # %%
    # Task Parameters
    MAX_STEPS = 500
    REWARD_CONFIG = {
        "step_penalty": -0.01,
        "goal_success_reward": 10.0,
        "failed_stop_reward": 0.0,
        "shaping_weight": 1.0,
    }

    # %%
    """
    In this case, we set the maximum number of steps to 500.
    We give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal
    in as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination.
    If the agent selects the `stop` action without reaching the target we do not punish it (although this is
    sometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves
    closer to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should
    be and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around
    with them.
    
    Next, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render
    every frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set
    to a 224 by 224 box).
    """

    # %%
    # Simulator Parameters
    CAMERA_WIDTH = 640
    CAMERA_HEIGHT = 480
    SCREEN_SIZE = 224

    # %%
    """
    Next, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel
    processes that will be used to train the model. In general, more processes result in faster training,
    but since each process is a unique instance of the environment in which we are training they can take up a
    lot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may
    need to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to
    be the number of training scenes in RoboTHOR, which allows each process to load only a single scene into
    memory, saving time and space.
    
    `TRAINING_GPUS` takes the ids of the GPUS on which
    the model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which
    the validation and testing will occur. During training, a validation process is constantly running and evaluating
    the current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea.
    If our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default
    to running everything on the CPU with only 1 process.
    """

    # %%
    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    NUM_PROCESSES = 20
    TRAINING_GPUS: Sequence[int] = [0]
    VALIDATION_GPUS: Sequence[int] = [0]
    TESTING_GPUS: Sequence[int] = [0]

    # %%
    """
    Since we are using a dataset to train our model we need to define the path to where we have stored it. If we
    download the dataset instructed above we can define the path as follows
    """

    # %%
    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug")
    VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug")

    # %%
    """
    Next, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the
    raw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we
    specified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks
    the point our agent needs to move to. It tells us the direction and distance to our goal at every time step.
    """

    # %%
    SENSORS = [
        RGBSensorThor(
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    # %%
    """
    For the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct*
    the preprocessor abstraction is designed with large models with frozen weights in mind. These models often
    hail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a
    complex embedding, which then gets stored and used as input to our trainable model instead of the original image.
    Most other preprocessing work is done in the sensor classes (as we just saw with the RGB
    sensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should
    use this abstraction.
    """

    # %%
    PREPROCESSORS = [
        Builder(
            ResNetPreprocessor,
            {
                "input_height": SCREEN_SIZE,
                "input_width": SCREEN_SIZE,
                "output_width": 7,
                "output_height": 7,
                "output_dims": 512,
                "pool": False,
                "torchvision_resnet_model": models.resnet18,
                "input_uuids": ["rgb_lowres"],
                "output_uuid": "rgb_resnet",
            },
        ),
    ]

    # %%
    """
    Next, we must define all of the observation inputs that our model will use. These are just
    the hardcoded ids of the sensors we are using in the experiment.
    """

    # %%
    OBSERVATIONS = [
        "rgb_resnet",
        "target_coordinates_ind",
    ]

    # %%
    """
    Finally, we must define the settings of our simulator. We set the camera dimensions to the values
    we defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a
    turn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the
    agent moves forward, it will do so by 0.25 meters. 
    """

    # %%
    ENV_ARGS = dict(
        width=CAMERA_WIDTH,
        height=CAMERA_HEIGHT,
        rotateStepDegrees=30.0,
        visibilityDistance=1.0,
        gridSize=0.25,
        agentMode="bot",
    )

    # %%
    """
    Now we move on to the methods that we must define to finish implementing an experiment config. Firstly we
    have a simple method that just returns the name of the experiment.
    """

    # %%
    @classmethod
    def tag(cls):
        return "PointNavRobothorRGBPPO"

    # %%
    """
    Next, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms
    we will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4.
    We specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters
    respectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval`
    sets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how
    often we save the model weights and run validation on them.
    """

    # %%
    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(250000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 3
        num_steps = 30
        save_interval = 5000000
        log_interval = 1000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps)
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )

    # %%
    """
    The `machine_params` method returns the hardware parameters of each
    process, based on the list of devices we defined above.
    """

    # %%
    def machine_params(self, mode="train", **kwargs):
        sampler_devices: List[int] = []
        if mode == "train":
            workers_per_device = 1
            gpu_ids = (
                []
                if not torch.cuda.is_available()
                else list(self.TRAINING_GPUS) * workers_per_device
            )
            nprocesses = (
                8
                if not torch.cuda.is_available()
                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
            )
            sampler_devices = list(self.TRAINING_GPUS)
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS
        elif mode == "test":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS
        else:
            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")

        sensor_preprocessor_graph = (
            SensorPreprocessorGraph(
                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
                preprocessors=self.PREPROCESSORS,
            )
            if mode == "train"
            or (
                (isinstance(nprocesses, int) and nprocesses > 0)
                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
            )
            else None
        )

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sampler_devices=(
                sampler_devices if mode == "train" else gpu_ids
            ),  # ignored with > 1 gpu_ids
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    # %%
    """
    Now we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch,
    so any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which
    unsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different
    actions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space`
    We specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define
    the size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and
    distance to the target) with `goal_dims`.
    """

    # %%
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ResnetTensorNavActorCritic(
            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
            goal_sensor_uuid="target_coordinates_ind",
            rgb_resnet_preprocessor_uuid="rgb_resnet",
            hidden_size=512,
            goal_dims=32,
        )

    # %%
    """
    We also need to define the task sampler that we will be using. This is a piece of code that generates instances
    of tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting
    our tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets
    the agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the
    `stop` action.
    """

    # %%
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return PointNavDatasetTaskSampler(**kwargs)

    # %%
    """
    You might notice that we did not specify the task sampler's arguments, but are rather passing them in. The
    reason for this is that each process will have its own task sampler, and we need to specify exactly which scenes
    each process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this.
    """

    # %%
    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
            np.int32
        )

    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)]
        if len(scenes) == 0:
            raise RuntimeError(
                (
                    "Could find no scene dataset information in directory {}."
                    " Are you sure you've downloaded them? "
                    " If not, see https://allenact.org/installation/download-datasets/ information"
                    " on how this can be done."
                ).format(scenes_dir)
            )
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
            "max_steps": self.MAX_STEPS,
            "sensors": self.SENSORS,
            "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed": seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn": deterministic_cudnn,
            "rewards_config": self.REWARD_CONFIG,
        }

    # %%
    """
    The very last things we need to define are the sampler arguments themselves. We define them separately for a train,
    validation, and test sampler, but in this case, they are almost the same. The arguments need to include the location
    of the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above
    and are just referencing here. The only consequential differences between these task samplers are the path to the dataset
    we are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since
    we want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of
    RoboTHOR are private we are also testing on our validation set.
    """

    # %%
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.TRAIN_DATASET_DIR
        res["loop_dataset"] = True
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        res["allow_flipping"] = True
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (
            ("0.%d" % devices[process_ind % len(devices)])
            if devices is not None and len(devices) > 0
            else None
        )
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        return res


# %%
"""
This is it! If we copy all of the code into a file we should be able to run our experiment!

## Training Model On Debug Dataset
We can test if our installation worked properly by training our model on a small dataset of 4 episodes. This
should take about 20 minutes on a computer with a NVIDIA GPU.

We can now train a model by running:
```bash
PYTHONPATH=. python allenact/main.py -o <PATH_TO_OUTPUT> -c -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> <EXPERIMENT_NAME>
```
If using the same configuration as we have set up, the following command should work:
```bash
PYTHONPATH=. python allenact/main.py training_a_pointnav_model -o storage/robothor-pointnav-rgb-resnet-resnet -b projects/tutorials
```
If we start up a tensorboard server during training and specify that `output_dir=storage` the output should look
something like this:
![tensorboard output](../img/point-nav-baseline-tb.png)

## Training Model On Full Dataset
We can also train the model on the full dataset by changing back our dataset path and running the same command as above.
But be aware, training this takes nearly 2 days on a machine with 8 GPU.

## Testing Model
To test the performance of a model please refer to [this tutorial](running-inference-on-a-pretrained-model.md).

## Conclusion
In this tutorial, we learned how to create a new PointNav experiment using **AllenAct**. There are many simple
and obvious ways to modify the experiment from here - changing the model, the learning algorithm and the environment
each requires very few lines of code changed in the above file, allowing us to explore our embodied ai research ideas
across different frameworks with ease.
"""


================================================
FILE: requirements.txt
================================================
certifi==2020.12.5
chardet==4.0.0
cloudpickle==1.6.0
cycler==0.10.0
decorator==4.4.2
filelock==3.0.12
future==0.18.2
gym==0.17.3
idna==2.10
imageio==2.9.0
imageio-ffmpeg==0.4.3
kiwisolver==1.3.1
matplotlib==3.3.3
moviepy==1.0.3
networkx==2.5
numpy==1.19.5
opencv-python==4.5.1.48
Pillow>=8.2.0,<9.0.0
proglog==0.1.9
protobuf==3.14.0
pyglet==1.5.0
pyparsing==2.4.7
python-dateutil>=2.8.1
requests==2.25.1
scipy==1.5.4
setproctitle==1.2.1
six>=1.15.0
tensorboardX==2.1
torch>=1.6.0,!=1.8.0,<2.0.0
torchvision>=0.7.0,<0.10.0
tqdm==4.56.0
urllib3==1.26.5
attr
attrs
wandb

================================================
FILE: scripts/auto_format.sh
================================================
#!/bin/bash

# Move to the directory containing the directory that this file is in
cd "$( cd "$( dirname "${BASH_SOURCE[0]}/.." )" >/dev/null 2>&1 && pwd )" || exit

echo RUNNING BLACK
black . --exclude src --exclude external_projects
echo BLACK DONE
echo ""

echo RUNNING DOCFORMATTER
find . -name "*.py" | grep -v ^./src | grep -v ^./external_projects | grep -v used_configs | xargs docformatter --in-place -r
echo DOCFORMATTER DONE

echo ALL DONE

================================================
FILE: scripts/build_docs.py
================================================
import glob
import os
import shutil
import sys
from pathlib import Path
from subprocess import check_output
from threading import Thread
from typing import Dict, Union, Optional, Set, List, Sequence, Mapping

from git import Git
from ruamel.yaml import YAML  # type: ignore

from constants import ABS_PATH_OF_TOP_LEVEL_DIR

# TODO: the scripts directory shouldn't be a module (as it conflicts with
#  some local developmment workflows) but we do want to import scripts/literate.py.
#  Temporary solution is just to modify the sys.path when this script is run.
sys.path.append(os.path.abspath(os.path.dirname(Path(__file__))))

from literate import literate_python_to_markdown


class StringColors:
    HEADER = "\033[95m"
    OKBLUE = "\033[94m"
    OKGREEN = "\033[92m"
    WARNING = "\033[93m"
    FAIL = "\033[91m"
    ENDC = "\033[0m"
    BOLD = "\033[1m"
    UNDERLINE = "\033[4m"


exclude_files = [
    ".DS_Store",
    "__init__.py",
    "__init__.pyc",
    "README.md",
    "version.py",
    "run.py",
    "setup.py",
    "main.py",
]


def render_file(
    relative_src_path: str, src_file: str, to_file: str, modifier=""
) -> None:
    """Shells out to pydocmd, which creates a .md file from the docstrings of
    python functions and classes in the file we specify.

    The modifer specifies the depth at which to generate docs for
    classes and functions in the file. More information here:
    https://pypi.org/project/pydoc-markdown/
    """
    # First try literate
    was_literate = False
    try:
        was_literate = literate_python_to_markdown(
            path=os.path.join(relative_src_path, src_file)
        )
    except Exception as _:
        pass

    if was_literate:
        return

    # Now do standard pydocmd
    relative_src_namespace = relative_src_path.replace("/", ".")
    src_base = src_file.replace(".py", "")

    if relative_src_namespace == "":
        namespace = f"{src_base}{modifier}"
    else:
        namespace = f"{relative_src_namespace}.{src_base}{modifier}"

    pydoc_config = """'{
        renderer: {
            type: markdown,
            code_headers: true,
            descriptive_class_title: false,
            add_method_class_prefix: true,
            source_linker: {type: github, repo: allenai/allenact},
            header_level_by_type: {
                Module: 1,
                Class: 2,
                Method: 3,
                Function: 3,
                Data: 3,
            }
        }
    }'"""
    pydoc_config = " ".join(pydoc_config.split())
    args = ["pydoc-markdown", "-m", namespace, pydoc_config]
    try:
        call_result = check_output([" ".join(args)], shell=True, env=os.environ).decode(
            "utf-8"
        )

        # noinspection PyShadowingNames
        with open(to_file, "w") as f:
            doc_split = call_result.split("\n")
            # github_path = "https://github.com/allenai/allenact/tree/master/"
            # path = (
            #     github_path + namespace.replace(".", "/") + ".py"
            # )
            # mdlink = "[[source]]({})".format(path)
            mdlink = ""  # Removing the above source link for now.
            call_result = "\n".join([doc_split[0] + " " + mdlink] + doc_split[1:])
            call_result = call_result.replace("_DOC_COLON_", ":")
            f.write(call_result)
        print(
            f"{StringColors.OKGREEN}[SUCCESS]{StringColors.ENDC} built docs for {src_file} -> {to_file}."
        )
    except Exception as _:
        cmd = " ".join(args)
        print(
            f"{StringColors.WARNING}[SKIPPING]{StringColors.ENDC} could not"
            f" build docs for {src_file} (missing an import?). CMD: '{cmd}'"
        )


# noinspection PyShadowingNames
def build_docs_for_file(
    relative_path: str, file_name: str, docs_dir: str, threads: List
) -> Dict[str, str]:
    """Build docs for an individual python file."""
    clean_filename = file_name.replace(".py", "")
    markdown_filename = f"{clean_filename}.md"

    output_path = os.path.join(docs_dir, relative_path, markdown_filename)
    nav_path = os.path.join("api", relative_path, markdown_filename)

    thread = Thread(target=render_file, args=(relative_path, file_name, output_path))
    thread.start()
    threads.append(thread)

    return {os.path.basename(clean_filename): nav_path}


# noinspection PyShadowingNames
def build_docs(
    base_dir: Union[Path, str],
    root_path: Union[Path, str],
    docs_dir: Union[Path, str],
    threads: List,
    allowed_dirs: Optional[Set[str]] = None,
):
    base_dir, root_path, docs_dir = str(base_dir), str(root_path), str(docs_dir)

    nav_root = []

    for child in os.listdir(root_path):
        relative_path = os.path.join(root_path, child)

        if (
            (allowed_dirs is not None)
            and (os.path.isdir(relative_path))
            and (os.path.abspath(relative_path) not in allowed_dirs)
            # or ".git" in relative_path
            # or ".idea" in relative_path
            # or "__pycache__" in relative_path
            # or "tests" in relative_path
            # or "mypy_cache" in relative_path
        ):
            print("SKIPPING {}".format(relative_path))
            continue

        # without_allenact = str(root_path).replace("allenact/", "")
        new_path = os.path.relpath(root_path, base_dir).replace(".", "")
        target_dir = os.path.join(docs_dir, new_path)
        if not os.path.exists(target_dir):
            os.mkdir(target_dir)

        if os.path.isdir(relative_path):
            nav_subsection = build_docs(
                base_dir,
                relative_path,
                docs_dir,
                threads=threads,
                allowed_dirs=allowed_dirs,
            )
            if not nav_subsection:
                continue
            nav_root.append({child: nav_subsection})

        else:
            if child in exclude_files or not child.endswith(".py"):
                continue

            nav = build_docs_for_file(new_path, child, docs_dir, threads=threads)
            nav_root.append(nav)

    return nav_root


def project_readme_paths_to_nav_structure(project_readmes):
    nested_dict = {}
    for fp in project_readmes:
        has_seen_project_dir = False
        sub_nested_dict = nested_dict

        split_fp = os.path.dirname(fp).split("/")
        for i, yar in enumerate(split_fp):
            has_seen_project_dir = has_seen_project_dir or yar == "projects"
            if not has_seen_project_dir or yar == "projects":
                continue

            if yar not in sub_nested_dict:
                if i == len(split_fp) - 1:
                    sub_nested_dict[yar] = fp.replace("docs/", "")
                    break
                else:
                    sub_nested_dict[yar] = {}

            sub_nested_dict = sub_nested_dict[yar]

    def recursively_create_nav_structure(nested_dict):
        if isinstance(nested_dict, str):
            return nested_dict

        to_return = []
        for key in nested_dict:
            to_return.append({key: recursively_create_nav_structure(nested_dict[key])})
        return to_return

    return recursively_create_nav_structure(nested_dict)


def pruned_nav_entries(nav_entries):
    if isinstance(nav_entries, str):
        if os.path.exists(os.path.join("docs", nav_entries)):
            return nav_entries
        else:
            return None
    elif isinstance(nav_entries, Sequence):
        new_entries = []
        for entry in nav_entries:
            entry = pruned_nav_entries(entry)
            if entry:
                new_entries.append(entry)
        return new_entries
    elif isinstance(nav_entries, Mapping):
        new_entries = {}
        for k, entry in nav_entries.items():
            entry = pruned_nav_entries(entry)
            if entry:
                new_entries[k] = entry
        return new_entries
    else:
        raise NotImplementedError()


def main():
    os.chdir(ABS_PATH_OF_TOP_LEVEL_DIR)

    print("Copying all README.md files to docs.")
    with open("README.md") as f:
        readme_content = f.readlines()
    readme_content = [x.replace("docs/", "") for x in readme_content]
    with open("docs/index.md", "w") as f:
        f.writelines(readme_content)

    project_readmes = []
    for readme_file_path in glob.glob("projects/**/README.md", recursive=True):
        if "docs/" not in readme_file_path:
            new_path = os.path.join("docs", readme_file_path)
            os.makedirs(os.path.dirname(new_path), exist_ok=True)
            shutil.copy(readme_file_path, new_path)
            project_readmes.append(new_path)

    print("Copying LICENSE file to docs.")
    shutil.copy("LICENSE", "docs/LICENSE.md")

    print("Copying CONTRIBUTING.md file to docs.")
    shutil.copy("CONTRIBUTING.md", "docs/CONTRIBUTING.md")

    # print("Copying CNAME file to docs.")
    # shutil.copy("CNAME", "docs/CNAME")

    print("Building the docs.")
    parent_folder_path = Path(__file__).parent.parent
    yaml_path = parent_folder_path / "mkdocs.yml"
    source_path = parent_folder_path
    docs_dir = str(parent_folder_path / "docs" / "api")
    if not os.path.exists(docs_dir):
        os.mkdir(docs_dir)

    # Adding project readmes to the yaml
    yaml = YAML()
    mkdocs_yaml = yaml.load(yaml_path)
    site_nav = mkdocs_yaml["nav"]
    # TODO Find a way to do the following in a way that results in nice titles.
    # projects_key = "Projects using allenact"
    # nav_obj = None
    # for obj in site_nav:
    #     if projects_key in obj:
    #         nav_obj = obj
    #         break
    # nav_obj[projects_key] = project_readme_paths_to_nav_structure(project_readmes)

    with open(yaml_path, "w") as f:
        yaml.dump(mkdocs_yaml, f)

    # Get directories to ignore
    git_dirs = set(
        os.path.abspath(os.path.split(p)[0]) for p in Git(".").ls_files().split("\n")
    )
    ignore_rel_dirs = [
        "docs",
        "scripts",
        "experiments",
        "src",
        ".pip_src",
        "dist",
        "build",
    ]
    ignore_abs_dirs = set(
        os.path.abspath(os.path.join(str(parent_folder_path), rel_dir))
        for rel_dir in ignore_rel_dirs
    )
    for d in ignore_abs_dirs:
        if d in git_dirs:
            git_dirs.remove(d)

    threads: List = []
    nav_entries = build_docs(
        parent_folder_path,
        source_path,
        docs_dir,
        threads=threads,
        allowed_dirs=git_dirs,
    )
    nav_entries.sort(key=lambda x: list(x)[0], reverse=False)

    for thread in threads:
        thread.join()

    nav_entries = pruned_nav_entries(nav_entries)

    docs_key = "API"

    # Find the yaml corresponding to the API
    nav_obj = None
    for obj in site_nav:
        if docs_key in obj:
            nav_obj = obj
            break

    nav_obj[docs_key] = nav_entries

    with open(yaml_path, "w") as f:
        yaml.dump(mkdocs_yaml, f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/build_docs.sh
================================================
#!/usr/bin/env bash

set -e

# Add allenact to the python path
export PYTHONPATH=$PYTHONPATH:$PWD

# Alter the relative path of the README image for the docs.
#sed -i '1s/docs/./' docs/README.md
python scripts/build_docs.py


================================================
FILE: scripts/dcommand.py
================================================
#!/usr/bin/env python3

"""Tool to run command on multiple nodes through SSH."""

import argparse
import glob
import os


def get_argument_parser():
    """Creates the argument parser."""

    # noinspection PyTypeChecker
    parser = argparse.ArgumentParser(
        description="dcommand",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        "--runs_on",
        required=False,
        type=str,
        default=None,
        help="Comma-separated IP addresses of machines. If empty, the tool will scan for lists of IP addresses"
        " in `screen_ids_file`s in the `~/.allenact` directory.",
    )

    parser.add_argument(
        "--ssh_cmd",
        required=False,
        type=str,
        default="ssh {addr}",
        help="SSH command. Useful to utilize a pre-shared key with 'ssh -i path/to/mykey.pem ubuntu@{addr}'.",
    )

    parser.add_argument(
        "--command",
        required=False,
        default="nvidia-smi | head -n 35",
        type=str,
        help="Command to be run through ssh onto each machine",
    )

    return parser


def get_args():
    """Creates the argument parser and parses any input arguments."""

    parser = get_argument_parser()
    args = parser.parse_args()

    return args


def wrap_double(text):
    return f'"{text}"'


def wrap_single(text):
    return f"'{text}'"


def wrap_single_nested(text, quote=r"'\''"):
    return f"{quote}{text}{quote}"


if __name__ == "__main__":
    args = get_args()

    all_addresses = []
    if args.runs_on is not None:
        all_addresses = args.runs_on.split(",")
    else:
        all_files = sorted(
            glob.glob(os.path.join(os.path.expanduser("~"), ".allenact", "*.killfile")),
            reverse=True,
        )
        if len(all_files) == 0:
            print(
                f"No screen_ids_file found under {os.path.join(os.path.expanduser('~'), '.allenact')}"
            )

        for killfile in all_files:
            with open(killfile, "r") as f:
                # Each line contains 'IP_address screen_ID'
                nodes = [tuple(line[:-1].split(" ")) for line in f.readlines()]

            all_addresses.extend(node[0] for node in nodes)

            use_addresses = ""
            while use_addresses not in ["y", "n"]:
                use_addresses = input(
                    f"Run on {all_addresses} from {killfile}? [Y/n] "
                ).lower()
                if use_addresses == "":
                    use_addresses = "y"

            if use_addresses == "n":
                all_addresses.clear()
            else:
                break

    print(f"Running on IP addresses {all_addresses}")

    for it, addr in enumerate(all_addresses):
        ssh_command = f"{args.ssh_cmd.format(addr=addr)} {wrap_single(args.command)}"

        print(f"{it} {addr} SSH command {ssh_command}")
        os.system(ssh_command)

    print("DONE")


================================================
FILE: scripts/dconfig.py
================================================
#!/usr/bin/env python3

import os
import argparse


def get_argument_parser():
    """Creates the argument parser."""

    # noinspection PyTypeChecker
    parser = argparse.ArgumentParser(
        description="dconfig",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        "--runs_on",
        required=True,
        type=str,
        help="Comma-separated IP addresses of machines",
    )

    parser.add_argument(
        "--config_script",
        required=True,
        type=str,
        help="Path to bash script with configuration",
    )

    parser.add_argument(
        "--ssh_cmd",
        required=False,
        type=str,
        default="ssh -f {addr}",
        help="SSH command. Useful to utilize a pre-shared key with 'ssh -i path/to/mykey.pem -f ubuntu@{addr}'. "
        "The option `-f` should be used, since we want a non-interactive session",
    )

    parser.add_argument(
        "--distribute_public_rsa_key",
        dest="distribute_public_rsa_key",
        action="store_true",
        required=False,
        help="if you pass the `--distribute_public_rsa_key` flag, the manager node's public key will be added to the "
        "authorized keys of all workers (this is necessary in default-configured EC2 instances to use "
        "`scripts/dmain.py`)",
    )
    parser.set_defaults(distribute_public_rsa_key=False)

    return parser


def get_args():
    """Creates the argument parser and parses any input arguments."""

    parser = get_argument_parser()
    args = parser.parse_args()

    return args


def wrap_double(text):
    return f'"{text}"'


def wrap_single(text):
    return f"'{text}'"


def wrap_single_nested(text, quote=r"'\''"):
    return f"{quote}{text}{quote}"


if __name__ == "__main__":
    args = get_args()

    all_addresses = args.runs_on.split(",")
    print(f"Running on addresses {all_addresses}")

    remote_config_script = f"{args.config_script}.distributed"
    for it, addr in enumerate(all_addresses):
        if args.distribute_public_rsa_key:
            key_command = (
                f"{args.ssh_cmd.format(addr=addr)} "
                f"{wrap_double('echo $(cat ~/.ssh/id_rsa.pub) >> ~/.ssh/authorized_keys')}"
            )
            print(f"Key command {key_command}")
            os.system(f"{key_command}")

        scp_cmd = (
            args.ssh_cmd.replace("ssh ", "scp ")
            .replace("-f", args.config_script)
            .format(addr=addr)
        )
        print(f"SCP command {scp_cmd}:{remote_config_script}")
        os.system(f"{scp_cmd}:{remote_config_script}")

        screen_name = f"allenact_config_machine{it}"
        bash_command = wrap_single_nested(
            f"source {remote_config_script} &>> log_allenact_distributed_config"
        )
        screen_command = wrap_single(
            f"screen -S {screen_name} -dm bash -c {bash_command}"
        )

        ssh_command = f"{args.ssh_cmd.format(addr=addr)} {screen_command}"

        print(f"SSH command {ssh_command}")
        os.system(ssh_command)
        print(f"{addr} {screen_name}")

    print("DONE")


================================================
FILE: scripts/dkill.py
================================================
#!/usr/bin/env python3

"""Tool to terminate multi-node (distributed) training."""

import os
import argparse
import glob


def get_argument_parser():
    """Creates the argument parser."""

    # noinspection PyTypeChecker
    parser = argparse.ArgumentParser(
        description="dkill",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        "--screen_ids_file",
        required=False,
        type=str,
        default=None,
        help="Path to file generated by dmain.py with IPs and screen ids for nodes running process."
        " If empty, the tool will scan the `~/.allenact` directory for `screen_ids_file`s.",
    )

    parser.add_argument(
        "--ssh_cmd",
        required=False,
        type=str,
        default="ssh {addr}",
        help="SSH command. Useful to utilize a pre-shared key with 'ssh -i mykey.pem ubuntu@{addr}'. ",
    )

    return parser


def get_args():
    """Creates the argument parser and parses any input arguments."""

    parser = get_argument_parser()
    args = parser.parse_args()

    return args


if __name__ == "__main__":
    args = get_args()

    all_files = (
        [args.screen_ids_file]
        if args.screen_ids_file is not None
        else sorted(
            glob.glob(os.path.join(os.path.expanduser("~"), ".allenact", "*.killfile")),
            reverse=True,
        )
    )

    if len(all_files) == 0:
        print(
            f"No screen_ids_file found under {os.path.join(os.path.expanduser('~'), '.allenact')}"
        )

    for killfile in all_files:
        with open(killfile, "r") as f:
            nodes = [tuple(line[:-1].split(" ")) for line in f.readlines()]

        do_kill = ""
        while do_kill not in ["y", "n"]:
            do_kill = input(
                f"Stopping processes on {nodes} from {killfile}? [y/N] "
            ).lower()
            if do_kill == "":
                do_kill = "n"

        if do_kill == "y":
            for it, node in enumerate(nodes):
                addr, screen_name = node

                print(f"Killing screen {screen_name} on {addr}")

                ssh_command = (
                    f"{args.ssh_cmd.format(addr=addr)} '"
                    f"screen -S {screen_name} -p 0 -X quit ; "
                    f"sleep 1 ; "
                    f"echo Master processes left running: ; "
                    f"ps aux | grep Master: | grep -v grep ; "
                    f"echo ; "
                    f"'"
                )

                # print(f"SSH command {ssh_command}")
                os.system(ssh_command)

            do_delete = ""
            while do_delete not in ["y", "n"]:
                do_delete = input(f"Delete file {killfile}? [y/N] ").lower()
                if do_delete == "":
                    do_delete = "n"

            if do_delete == "y":
                os.system(f"rm {killfile}")
                print(f"Deleted {killfile}")

    print("DONE")


================================================
FILE: scripts/dmain.py
================================================
#!/usr/bin/env python3

"""Entry point to multi-node (distributed) training for a user given experiment
name."""

import os
import random
import string
import subprocess
import sys
import time
from pathlib import Path
from typing import Optional

# Add to PYTHONPATH the path of the parent directory of the current file's directory
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(Path(__file__)))))

from allenact.main import get_argument_parser as get_main_arg_parser
from allenact.utils.system import init_logging, get_logger
from constants import ABS_PATH_OF_TOP_LEVEL_DIR


def get_argument_parser():
    """Creates the argument parser."""

    parser = get_main_arg_parser()
    parser.description = f"distributed {parser.description}"

    parser.add_argument(
        "--runs_on",
        required=True,
        type=str,
        help="Comma-separated IP addresses of machines",
    )

    parser.add_argument(
        "--ssh_cmd",
        required=False,
        type=str,
        default="ssh -f {addr}",
        help="SSH command. Useful to utilize a pre-shared key with 'ssh -i mykey.pem -f ubuntu@{addr}'. "
        "The option `-f` should be used for non-interactive session",
    )

    parser.add_argument(
        "--env_activate_path",
        required=True,
        type=str,
        help="Path to the virtual environment's `activate` script. It must be the same across all machines",
    )

    parser.add_argument(
        "--allenact_path",
        required=False,
        type=str,
        default="allenact",
        help="Path to allenact top directory. It must be the same across all machines",
    )

    # Required distributed_ip_and_port
    idx = [a.dest for a in parser._actions].index("distributed_ip_and_port")
    parser._actions[idx].required = True

    return parser


def get_args():
    """Creates the argument parser and parses any input arguments."""

    parser = get_argument_parser()
    args = parser.parse_args()

    return args


def get_raw_args():
    raw_args = sys.argv[1:]
    filtered_args = []
    remove: Optional[str] = None
    enclose_in_quotes: Optional[str] = None
    for arg in raw_args:
        if remove is not None:
            remove = None
        elif enclose_in_quotes is not None:
            # Within backslash expansion: close former single, open double, create single, close double, reopen single
            inner_quote = r"\'\"\'\"\'"
            # Convert double quotes into backslash double for later expansion
            filtered_args.append(
                inner_quote + arg.replace('"', r"\"").replace("'", r"\"") + inner_quote
            )
            enclose_in_quotes = None
        elif arg in [
            "--runs_on",
            "--ssh_cmd",
            "--env_activate_path",
            "--allenact_path",
            "--extra_tag",
            "--machine_id",
        ]:
            remove = arg
        elif arg == "--config_kwargs":
            enclose_in_quotes = arg
            filtered_args.append(arg)
        else:
            filtered_args.append(arg)
    return filtered_args


def wrap_single(text):
    return f"'{text}'"


def wrap_single_nested(text):
    # Close former single, start backslash expansion (via $), create new single quote for expansion:
    quote_enter = r"'$'\'"
    # New closing single quote for expansion, close backslash expansion, reopen former single:
    quote_leave = r"\'''"
    return f"{quote_enter}{text}{quote_leave}"


def wrap_double(text):
    return f'"{text}"'


def id_generator(size=4, chars=string.ascii_uppercase + string.digits):
    return "".join(random.choice(chars) for _ in range(size))


# Assume we can ssh into each of the `runs_on` machines through port 22
if __name__ == "__main__":
    # Tool must be called from AllenAct project's root directory
    cwd = os.path.abspath(os.getcwd())
    assert cwd == ABS_PATH_OF_TOP_LEVEL_DIR, (
        f"`dmain.py` called from {cwd}."
        f"\nIt should be called from AllenAct's top level directory {ABS_PATH_OF_TOP_LEVEL_DIR}."
    )

    args = get_args()

    init_logging(args.log_level)

    raw_args = get_raw_args()

    if args.seed is None:
        seed = random.randint(0, 2**31 - 1)
        raw_args.extend(["-s", f"{seed}"])
        get_logger().info(f"Using random seed {seed} in all workers (none was given)")

    all_addresses = args.runs_on.split(",")
    get_logger().info(f"Running on IP addresses {all_addresses}")

    assert args.distributed_ip_and_port.split(":")[0] in all_addresses, (
        f"Missing listener IP address {args.distributed_ip_and_port.split(':')[0]}"
        f" in list of worker addresses {all_addresses}"
    )

    time_str = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime(time.time()))

    global_job_id = id_generator()
    killfilename = os.path.join(
        os.path.expanduser("~"), ".allenact", f"{time_str}_{global_job_id}.killfile"
    )
    os.makedirs(os.path.dirname(killfilename), exist_ok=True)

    code_src = "."

    with open(killfilename, "w") as killfile:
        for it, addr in enumerate(all_addresses):
            code_tget = f"{addr}:{args.allenact_path}/"
            get_logger().info(f"rsync {code_src} to {code_tget}")
            os.system(f"rsync -rz {code_src} {code_tget}")

            job_id = id_generator()

            command = " ".join(
                ["python", "main.py"]
                + raw_args
                + [
                    "--extra_tag",
                    f"{args.extra_tag}{'__' if len(args.extra_tag) > 0 else ''}machine{it}",
                ]
                + ["--machine_id", f"{it}"]
            )

            logfile = (
                f"{args.output_dir}/log_{time_str}_{global_job_id}_{job_id}_machine{it}"
            )

            env_and_command = wrap_single_nested(
                f"for NCCL_SOCKET_IFNAME in $(route | grep default) ; do : ; done && export NCCL_SOCKET_IFNAME"
                f" && cd {args.allenact_path}"
                f" && mkdir -p {args.output_dir}"
                f" && source {args.env_activate_path} &>> {logfile}"
                f" && echo pwd=$(pwd) &>> {logfile}"
                f" && echo output_dir={args.output_dir} &>> {logfile}"
                f" && echo python_version=$(python --version) &>> {logfile}"
                f" && echo python_path=$(which python) &>> {logfile}"
                f" && set | grep NCCL_SOCKET_IFNAME &>> {logfile}"
                f" && echo &>> {logfile}"
                f" && {command} &>> {logfile}"
            )

            screen_name = f"allenact_{time_str}_{global_job_id}_{job_id}_machine{it}"
            screen_command = wrap_single(
                f"screen -S {screen_name} -dm bash -c {env_and_command}"
            )

            ssh_command = f"{args.ssh_cmd.format(addr=addr)} {screen_command}"

            get_logger().debug(f"SSH command {ssh_command}")
            subprocess.run(ssh_command, shell=True, executable="/bin/bash")
            get_logger().info(f"{addr} {screen_name}")

            killfile.write(f"{addr} {screen_name}\n")

    get_logger().info("")
    get_logger().info(f"Running screen ids saved to {killfilename}")
    get_logger().info("")

    get_logger().info("DONE")


================================================
FILE: scripts/literate.py
================================================
"""Helper functions used to create literate documentation from python files."""

import importlib
import inspect
import os
from typing import Optional, Sequence, List, cast

from typing.io import TextIO

from constants import ABS_PATH_OF_DOCS_DIR, ABS_PATH_OF_TOP_LEVEL_DIR


def get_literate_output_path(file: TextIO) -> Optional[str]:
    for l in file:
        l = l.strip()
        if l != "":
            if l.lower().startswith(("# literate", "#literate")):
                parts = l.split(":")
                if len(parts) == 1:
                    assert (
                        file.name[-3:].lower() == ".py"
                    ), "Can only run literate on python (*.py) files."
                    return file.name[:-3] + ".md"
                elif len(parts) == 2:
                    rel_outpath = parts[1].strip()
                    outpath = os.path.abspath(
                        os.path.join(ABS_PATH_OF_DOCS_DIR, rel_outpath)
                    )
                    assert outpath.startswith(
                        ABS_PATH_OF_DOCS_DIR
                    ), f"Path {outpath} is not allowed, must be within {ABS_PATH_OF_DOCS_DIR}."
                    return outpath
                else:
                    raise NotImplementedError(
                        f"Line '{l}' is not of the correct format."
                    )
            else:
                return None
    return None


def source_to_markdown(dot_path: str, summarize: bool = False):
    importlib.invalidate_caches()
    module_path, obj_name = ".".join(dot_path.split(".")[:-1]), dot_path.split(".")[-1]
    module = importlib.import_module(module_path)
    obj = getattr(module, obj_name)
    source = inspect.getsource(obj)

    if not summarize:
        return source
    elif inspect.isclass(obj):
        lines = source.split("\n")
        newlines = [lines[0]]
        whitespace_len = float("inf")
        k = 1
        started = False
        while k < len(lines):
            l = lines[k]
            lstripped = l.lstrip()
            if started:
                newlines.append(l)
                started = "):" not in l and "->" not in l
                if not started:
                    newlines.append(l[: cast(int, whitespace_len)] + "    ...\n")

            if (
                l.lstrip().startswith("def ")
                and len(l) - len(lstripped) <= whitespace_len
            ):
                whitespace_len = len(l) - len(lstripped)
                newlines.append(l)
                started = "):" not in l and "->" not in l
                if not started:
                    newlines.append(l[:whitespace_len] + "    ...\n")
            k += 1
        return "\n".join(newlines).strip()
    elif inspect.isfunction(obj):
        return source.split("\n")[0] + "\n    ..."
    else:
        return


def _strip_empty_lines(lines: Sequence[str]) -> List[str]:
    lines = list(lines)
    if len(lines) == 0:
        return lines

    for i in range(len(lines)):
        if lines[i].strip() != "":
            lines = lines[i:]
            break

    for i in reversed(list(range(len(lines)))):
        if lines[i].strip() != "":
            lines = lines[: i + 1]
            break
    return lines


def literate_python_to_markdown(path: str) -> bool:
    assert path[-3:].lower() == ".py", "Can only run literate on python (*.py) files."

    with open(path, "r") as file:
        output_path = get_literate_output_path(file)

        if output_path is None:
            return False

        output_lines = [
            f"<!-- DO NOT EDIT THIS FILE. --> ",
            f"<!-- THIS FILE WAS AUTOGENERATED FROM"
            f" 'ALLENACT_BASE_DIR/{os.path.relpath(path, ABS_PATH_OF_TOP_LEVEL_DIR)}', EDIT IT INSTEAD. -->\n",
        ]
        md_lines: List[str] = []
        code_lines = md_lines

        lines = file.readlines()
        mode = None

        for line in lines:
            line = line.rstrip()
            stripped_line = line.strip()
            if (mode is None or mode == "change") and line.strip() == "":
                continue

            if mode == "markdown":
                if stripped_line in ['"""', "'''"]:
                    output_lines.extend(_strip_empty_lines(md_lines) + [""])
                    md_lines.clear()
                    mode = None
                elif stripped_line.endswith(('"""', "'''")):
                    output_lines.extend(
                        _strip_empty_lines(md_lines) + [stripped_line[:-3]]
                    )
                    md_lines.clear()
                    mode = None
                    # TODO: Does not account for the case where a string is ended with a comment.
                else:
                    md_lines.append(line.strip())
            elif stripped_line.startswith(("# %%", "#%%")):
                last_mode = mode
                mode = "change"
                if last_mode == "code":
                    output_lines.extend(
                        ["```python"] + _strip_empty_lines(code_lines) + ["```"]
                    )
                    code_lines.clear()

                if " import " in stripped_line:
                    path = stripped_line.split(" import ")[-1].strip()
                    output_lines.append(
                        "```python\n" + source_to_markdown(path) + "\n```"
                    )
                elif " import_summary " in stripped_line:
                    path = stripped_line.split(" import_summary ")[-1].strip()
                    output_lines.append(
                        "```python\n"
                        + source_to_markdown(path, summarize=True)
                        + "\n```"
                    )
                elif " hide" in stripped_line:
                    mode = "hide"
            elif mode == "hide":
                continue
            elif mode == "change":
                if stripped_line.startswith(('"""', "'''")):
                    mode = "markdown"
                    if len(stripped_line) != 3:
                        if stripped_line.endswith(('"""', "'''")):
                            output_lines.append(stripped_line[3:-3])
                            mode = "change"
                        else:
                            output_lines.append(stripped_line[3:])
                else:
                    mode = "code"
                    code_lines.append(line)
            elif mode == "code":
                code_lines.append(line)
            else:
                raise NotImplementedError(
                    f"mode {mode} is not implemented. Last 5 lines: "
                    + "\n".join(output_lines[-5:])
                )

        if mode == "code" and len(code_lines) != 0:
            output_lines.extend(
                ["```python"] + _strip_empty_lines(code_lines) + ["```"]
            )

    with open(output_path, "w") as f:
        f.writelines([l + "\n" for l in output_lines])

    return True


if __name__ == "__main__":
    # print(
    #     source_to_markdown(
    #         "allenact_plugins.minigrid_plugin.minigrid_offpolicy.ExpertTrajectoryIterator",
    #         True
    #     )
    # )

    literate_python_to_markdown(
        os.path.join(
            ABS_PATH_OF_TOP_LEVEL_DIR,
            "projects/tutorials/training_a_pointnav_model.py",
        )
    )


================================================
FILE: scripts/release.py
================================================
import os
import sys
from pathlib import Path
from subprocess import getoutput


def make_package(name, verbose=False):
    """Prepares sdist for allenact or allenact_plugins."""

    orig_dir = os.getcwd()
    base_dir = os.path.join(os.path.abspath(os.path.dirname(Path(__file__))), "..")
    os.chdir(base_dir)

    with open(".VERSION", "r") as f:
        __version__ = f.readline().strip()

    # generate sdist via setuptools
    output = getoutput(f"{sys.executable} {name}/setup.py sdist")
    if verbose:
        print(output)

    os.chdir(os.path.join(base_dir, "dist"))

    # uncompress the tar.gz sdist
    output = getoutput(f"tar zxvf {name}-{__version__}.tar.gz")
    if verbose:
        print(output)

    # copy setup.py to the top level of the package (required by pip install)
    output = getoutput(
        f"cp {name}-{__version__}/{name}/setup.py {name}-{__version__}/setup.py"
    )
    if verbose:
        print(output)

    # create new source file with version
    getoutput(
        f"printf '__version__ = \"{__version__}\"\n' >> {name}-{__version__}/{name}/_version.py"
    )
    # include it in sources
    getoutput(
        f'printf "\n{name}/_version.py" >> {name}-{__version__}/{name}.egg-info/SOURCES.txt'
    )

    # recompress tar.gz
    output = getoutput(f"tar zcvf {name}-{__version__}.tar.gz {name}-{__version__}/")
    if verbose:
        print(output)

    # remove temporary directory
    output = getoutput(f"rm -r {name}-{__version__}")
    if verbose:
        print(output)

    os.chdir(orig_dir)


if __name__ == "__main__":
    verbose = False
    make_package("allenact", verbose)
    make_package("allenact_plugins", verbose)


================================================
FILE: scripts/run_tests.sh
================================================
#!/usr/bin/env bash

echo RUNNING PYTEST WITH COVERAGE
pipenv run coverage run -m --source=. pytest tests/
echo DONE
echo ""

echo GENERATING COVERAGE HTML
coverage html
echo HTML GENERATED

if [ "$(uname)" == "Darwin" ]; then
    echo OPENING COVERAGE INFO
    open htmlcov/index.html
fi

================================================
FILE: scripts/startx.py
================================================
import atexit
import os
import platform
import re
import shlex
import subprocess
import tempfile


# Turning off automatic black formatting for this script as it breaks quotes.

# fmt: off

def pci_records():
    records = []
    command = shlex.split("lspci -vmm")
    output = subprocess.check_output(command).decode()

    for devices in output.strip().split("\n\n"):
        record = {}
        records.append(record)
        for row in devices.split("\n"):
            key, value = row.split("\t")
            record[key.split(":")[0]] = value

    return records

def generate_xorg_conf(devices):
    xorg_conf = []

    device_section = """
Section "Device"
    Identifier     "Device{device_id}"
    Driver         "nvidia"
    VendorName     "NVIDIA Corporation"
    BusID          "{bus_id}"
EndSection
"""
    server_layout_section = """
Section "ServerLayout"
    Identifier     "Layout0"
    {screen_records}
EndSection
"""
    screen_section = """
Section "Screen"
    Identifier     "Screen{screen_id}"
    Device         "Device{device_id}"
    DefaultDepth    24
    Option         "AllowEmptyInitialConfiguration" "True"
    SubSection     "Display"
        Depth       24
        Virtual 1024 768
    EndSubSection
EndSection
"""
    screen_records = []
    for i, bus_id in enumerate(devices):
        xorg_conf.append(device_section.format(device_id=i, bus_id=bus_id))
        xorg_conf.append(screen_section.format(device_id=i, screen_id=i))
        screen_records.append('Screen {screen_id} "Screen{screen_id}" 0 0'.format(screen_id=i))
    
    xorg_conf.append(server_layout_section.format(screen_records="\n    ".join(screen_records)))

    output =  "\n".join(xorg_conf)
    return output

def startx(display=0):
    if platform.system() != "Linux":
        raise Exception("Can only run startx on linux")

    devices = []
    for r in pci_records():
        if r.get("Vendor", "") == "NVIDIA Corporation"\
                and r["Class"] in ["VGA compatible controller", "3D controller"]:
            bus_id = "PCI:" + ":".join(map(lambda x: str(int(x, 16)), re.split(r"[:\.]", r["Slot"])))
            devices.append(bus_id)

    if not devices:
        raise Exception("no nvidia cards found")

    fd = None
    path = None
    try:
        fd, path = tempfile.mkstemp()
        with open(path, "w") as f:
            f.write(generate_xorg_conf(devices))
        command = shlex.split("Xorg -noreset +extension GLX +extension RANDR +extension RENDER -config %s :%s" % (path, display))
        proc = subprocess.Popen(command)
        atexit.register(lambda: proc.poll() is None and proc.kill())
        proc.wait()
    finally:
        if fd is not None:
            os.close(fd)
            os.unlink(path)

# fmt: on


if __name__ == "__main__":
    startx()


================================================
FILE: tests/.gitignore
================================================
tmp
.DS_Store
!.py
!.gitignore


================================================
FILE: tests/__init__.py
================================================


================================================
FILE: tests/hierarchical_policies/__init__.py
================================================


================================================
FILE: tests/hierarchical_policies/test_minigrid_conditional.py
================================================
import os
from tempfile import mkdtemp
from typing import Dict, Optional, List, Any, cast

import gym
from gym_minigrid.envs import EmptyRandomEnv5x5
from torch import nn
from torch import optim
from torch.optim.lr_scheduler import LambdaLR

from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation
from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig
from allenact.algorithms.onpolicy_sync.runner import OnPolicyRunner
from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler
from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
from allenact.utils.experiment_utils import (
    TrainingPipeline,
    Builder,
    PipelineStage,
    LinearDecay,
)
from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor
from allenact_plugins.minigrid_plugin.minigrid_tasks import MiniGridTaskSampler
from projects.tutorials.minigrid_tutorial_conds import (
    ConditionedMiniGridSimpleConvRNN,
    ConditionedMiniGridTask,
)


class MiniGridCondTestExperimentConfig(ExperimentConfig):
    @classmethod
    def tag(cls) -> str:
        return "MiniGridCondTest"

    SENSORS = [
        EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),
        ExpertActionSensor(
            action_space=gym.spaces.Dict(
                higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
            )
        ),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ConditionedMiniGridSimpleConvRNN(
            action_space=gym.spaces.Dict(
                higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)
            ),
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            num_objects=cls.SENSORS[0].num_objects,
            num_colors=cls.SENSORS[0].num_colors,
            num_states=cls.SENSORS[0].num_states,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return MiniGridTaskSampler(**kwargs)

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="train")

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="valid")

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test")

    def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 20 + 20 * (
                mode == "test"
            )  # 20 tasks for valid, 40 for test (per sampler)

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)
            )

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            max_tasks=max_tasks,  # see above
            env_class=self.make_env,  # builder for third-party environment (defined below)
            sensors=self.SENSORS,  # sensors used to return observations to the agent
            env_info=dict(),  # parameters for environment builder (none for now)
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            task_class=ConditionedMiniGridTask,
        )

    @staticmethod
    def make_env(*args, **kwargs):
        return EmptyRandomEnv5x5()

    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        return {
            "nprocesses": 4 if mode == "train" else 1,
            "devices": [],
        }

    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        ppo_steps = int(512)
        return TrainingPipeline(
            named_losses=dict(
                imitation_loss=Imitation(
                    cls.SENSORS[1]
                ),  # 0 is Minigrid, 1 is ExpertActionSensor
                ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"),
            ),  # type:ignore
            pipeline_stages=[
                PipelineStage(
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=ppo_steps // 2,
                    ),
                    loss_names=["imitation_loss", "ppo_loss"],
                    max_stage_steps=ppo_steps,
                )
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
            num_mini_batch=4,
            update_repeats=3,
            max_grad_norm=0.5,
            num_steps=16,
            gamma=0.99,
            use_gae=True,
            gae_lambda=0.95,
            advance_scene_rollout_period=None,
            save_interval=10000,
            metric_accumulate_interval=1,
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}  # type:ignore
            ),
        )


class TestMiniGridCond:
    def test_train(self, tmpdir):
        cfg = MiniGridCondTestExperimentConfig()
        train_runner = OnPolicyRunner(
            config=cfg,
            output_dir=tmpdir,
            loaded_config_src_files=None,
            seed=12345,
            mode="train",
            deterministic_cudnn=False,
            deterministic_agents=False,
            extra_tag="",
            disable_tensorboard=True,
            disable_config_saving=True,
        )
        start_time_str, valid_results = train_runner.start_train(
            checkpoint=None,
            restart_pipeline=False,
            max_sampler_processes_per_worker=1,
            collect_valid_results=True,
        )
        assert len(valid_results) > 0

        test_runner = OnPolicyRunner(
            config=cfg,
            output_dir=tmpdir,
            loaded_config_src_files=None,
            seed=12345,
            mode="test",
            deterministic_cudnn=False,
            deterministic_agents=False,
            extra_tag="",
            disable_tensorboard=True,
            disable_config_saving=True,
        )
        test_results = test_runner.start_test(
            checkpoint_path_dir_or_pattern=os.path.join(
                tmpdir, "checkpoints", "**", start_time_str, "*.pt"
            ),
            max_sampler_processes_per_worker=1,
            inference_expert=True,
        )
        assert test_results[-1]["test-metrics/ep_length"] < 4


if __name__ == "__main__":
    TestMiniGridCond().test_train(mkdtemp())  # type:ignore


================================================
FILE: tests/manipulathor_plugin/__init__.py
================================================


================================================
FILE: tests/manipulathor_plugin/test_utils.py
================================================
from allenact_plugins.manipulathor_plugin.arm_calculation_utils import (
    world_coords_to_agent_coords,
)


class TestArmCalculationUtils(object):
    def test_translation_functions(self):
        agent_coordinate = {
            "position": {"x": 1, "y": 0, "z": 2},
            "rotation": {"x": 0, "y": -45, "z": 0},
        }
        obj_coordinate = {
            "position": {"x": 0, "y": 1, "z": 0},
            "rotation": {"x": 0, "y": 0, "z": 0},
        }
        rotated = world_coords_to_agent_coords(obj_coordinate, agent_coordinate)
        eps = 0.01
        assert (
            abs(rotated["position"]["x"] - (-2.12)) < eps
            and abs(rotated["position"]["y"] - (1.0)) < eps
            and abs(rotated["position"]["z"] - (-0.70)) < eps
        )


if __name__ == "__main__":
    TestArmCalculationUtils().test_translation_functions()


================================================
FILE: tests/mapping/__init__.py
================================================


================================================
FILE: tests/mapping/test_ai2thor_mapping.py
================================================
import os
import platform
import random
import sys
import urllib
import urllib.request
import warnings
from collections import defaultdict

# noinspection PyUnresolvedReferences
from tempfile import mkdtemp
from typing import Dict, List, Tuple, cast

# noinspection PyUnresolvedReferences
import ai2thor

# noinspection PyUnresolvedReferences
import ai2thor.wsgi_server
import compress_pickle
import numpy as np
import torch

from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage
from allenact.base_abstractions.misc import Memory, ActorCriticOutput
from allenact.embodiedai.mapping.mapping_utils.map_builders import SemanticMapBuilder
from allenact.utils.experiment_utils import set_seed
from allenact.utils.system import get_logger
from allenact.utils.tensor_utils import batch_observations
from allenact_plugins.ithor_plugin.ithor_sensors import (
    RelativePositionChangeTHORSensor,
    ReachableBoundsTHORSensor,
    BinnedPointCloudMapTHORSensor,
    SemanticMapTHORSensor,
)
from allenact_plugins.ithor_plugin.ithor_util import get_open_x_displays
from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor
from constants import ABS_PATH_OF_TOP_LEVEL_DIR


class TestAI2THORMapSensors(object):
    def setup_path_for_use_with_rearrangement_project(self) -> bool:
        if platform.system() != "Darwin" and len(get_open_x_displays()) == 0:
            wrn_msg = "Cannot run tests as there seem to be no open displays!"
            warnings.warn(wrn_msg)
            get_logger().warning(wrn_msg)
            return False

        os.chdir(ABS_PATH_OF_TOP_LEVEL_DIR)
        sys.path.append(
            os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "projects/ithor_rearrangement")
        )
        try:
            import rearrange
        except ImportError:
            wrn_msg = (
                "Could not import `rearrange`. Is it possible you have"
                " not initialized the submodules (i.e. by running"
                " `git submodule init; git submodule update;`)?"
            )
            warnings.warn(wrn_msg)
            get_logger().warning(wrn_msg)
            return False

        return True

    def test_binned_and_semantic_mapping(self, tmpdir):
        try:
            if not self.setup_path_for_use_with_rearrangement_project():
                return

            from baseline_configs.rearrange_base import RearrangeBaseExperimentConfig
            from baseline_configs.walkthrough.walkthrough_rgb_base import (
                WalkthroughBaseExperimentConfig,
            )
            from rearrange.constants import (
                FOV,
                PICKUPABLE_OBJECTS,
                OPENABLE_OBJECTS,
            )
            from datagen.datagen_utils import get_scenes

            ORDERED_OBJECT_TYPES = list(sorted(PICKUPABLE_OBJECTS + OPENABLE_OBJECTS))

            map_range_sensor = ReachableBoundsTHORSensor(margin=1.0)
            map_info = dict(
                map_range_sensor=map_range_sensor,
                vision_range_in_cm=40 * 5,
                map_size_in_cm=1050,
                resolution_in_cm=5,
            )
            map_sensors = [
                RelativePositionChangeTHORSensor(),
                map_range_sensor,
                DepthSensorThor(
                    height=224,
                    width=224,
                    use_normalization=False,
                    uuid="depth",
                ),
                BinnedPointCloudMapTHORSensor(
                    fov=FOV,
                    ego_only=False,
                    **map_info,
                ),
                SemanticMapTHORSensor(
                    fov=FOV,
                    ego_only=False,
                    ordered_object_types=ORDERED_OBJECT_TYPES,
                    **map_info,
                ),
            ]
            all_sensors = [*WalkthroughBaseExperimentConfig.SENSORS, *map_sensors]

            open_x_displays = []
            try:
                open_x_displays = get_open_x_displays()
            except (AssertionError, IOError):
                pass
            walkthrough_task_sampler = WalkthroughBaseExperimentConfig.make_sampler_fn(
                stage="train",
                sensors=all_sensors,
                scene_to_allowed_rearrange_inds={s: [0] for s in get_scenes("train")},
                force_cache_reset=True,
                allowed_scenes=None,
                seed=1,
                x_display=open_x_displays[0] if len(open_x_displays) != 0 else None,
                thor_controller_kwargs={
                    **RearrangeBaseExperimentConfig.THOR_CONTROLLER_KWARGS,
                    # "server_class": ai2thor.wsgi_server.WsgiServer,  # Only for debugging
                },
            )

            targets_path = os.path.join(tmpdir, "rearrange_mapping_examples.pkl.gz")
            urllib.request.urlretrieve(
                "https://ai2-prior-allenact-public-test.s3-us-west-2.amazonaws.com/ai2thor_mapping/rearrange_mapping_examples.pkl.gz",
                targets_path,
            )
            goal_obs_dict = compress_pickle.load(targets_path)

            def compare_recursive(obs, goal_obs, key_list: List):
                if isinstance(obs, Dict):
                    for k in goal_obs:
                        compare_recursive(
                            obs=obs[k], goal_obs=goal_obs[k], key_list=key_list + [k]
                        )
                elif isinstance(obs, (List, Tuple)):
                    for i in range(len(goal_obs)):
                        compare_recursive(
                            obs=obs[i], goal_obs=goal_obs[i], key_list=key_list + [i]
                        )
                else:
                    # Should be a numpy array at this point
                    assert isinstance(obs, np.ndarray) and isinstance(
                        goal_obs, np.ndarray
                    ), f"After {key_list}, not numpy arrays, obs={obs}, goal_obs={goal_obs}"

                    obs = 1.0 * obs
                    goal_obs = 1.0 * goal_obs

                    goal_where_nan = np.isnan(goal_obs)
                    obs_where_nan = np.isnan(obs)

                    where_nan_not_equal = (goal_where_nan != obs_where_nan).sum()
                    # assert (
                    #     where_nan_not_equal.sum() <= 1
                    #     and where_nan_not_equal.mean() < 1e3
                    # )

                    where_nan = np.logical_or(goal_where_nan, obs_where_nan)
                    obs[where_nan] = 0.0
                    goal_obs[where_nan] = 0.0

                    def special_mean(v):
                        while len(v.shape) > 2:
                            v = v.sum(-1)
                        return v.mean()

                    numer = np.abs(obs - goal_obs)
                    denom = np.abs(
                        np.stack((obs, goal_obs, np.ones_like(obs)), axis=0)
                    ).max(0)
                    difference = special_mean(numer / denom)
                    # assert (
                    #     difference < 1.2e-3
                    # ), f"Difference of {np.abs(obs - goal_obs).mean()} at {key_list}."

                    if (
                        len(obs.shape) >= 2
                        and obs.shape[0] == obs.shape[1]
                        and obs.shape[0] > 1
                    ):
                        # Sanity check that rotating the observations makes them not-equal
                        rot_obs = np.rot90(obs)
                        numer = np.abs(rot_obs - goal_obs)
                        denom = np.abs(
                            np.stack((rot_obs, goal_obs, np.ones_like(obs)), axis=0)
                        ).max(0)
                        rot_difference = special_mean(numer / denom)
                        assert (
                            difference < rot_difference or (obs == rot_obs).all()
                        ), f"Too small a difference ({(numer / denom).mean()})."

            observations_dict = defaultdict(lambda: [])
            for i in range(5):  # Why 5, why not 5?
                set_seed(i)
                task = walkthrough_task_sampler.next_task()

                obs_list = observations_dict[i]
                obs_list.append(task.get_observations())
                k = 0
                compare_recursive(
                    obs=obs_list[0], goal_obs=goal_obs_dict[i][0], key_list=[i, k]
                )
                while not task.is_done():
                    obs = task.step(
                        action=task.action_names().index(
                            random.choice(
                                3
                                * [
                                    "move_ahead",
                                    "rotate_right",
                                    "rotate_left",
                                    "look_up",
                                    "look_down",
                                ]
                                + ["done"]
                            )
                        )
                    ).observation
                    k += 1
                    obs_list.append(obs)
                    compare_recursive(
                        obs=obs,
                        goal_obs=goal_obs_dict[i][task.num_steps_taken()],
                        key_list=[i, k],
                    )

                    # Free space metric map in RGB using pointclouds coming from depth images. This
                    # is built iteratively after every step.
                    # R - is used to encode points at a height < 0.02m (i.e. the floor)
                    # G - is used to encode points at a height between 0.02m and 2m, i.e. objects the agent would run into
                    # B - is used to encode points higher than 2m, i.e. ceiling

                    # Uncomment if you wish to visualize the observations:
                    import matplotlib.pyplot as plt

                    plt.imshow(
                        np.flip(255 * (obs["binned_pc_map"]["map"] > 0), 0)
                    )  # np.flip because we expect "up" to be -row
                    plt.title("Free space map")
                    plt.show()
                    plt.close()

                    # See also `obs["binned_pc_map"]["egocentric_update"]` to see the
                    # the metric map from the point of view of the agent before it is
                    # rotated into the world-space coordinates and merged with past observations.

                    # Semantic map in RGB which is iteratively revealed using depth maps to figure out what
                    # parts of the scene the agent has seen so far.
                    # This map has shape 210x210x72 with the 72 channels corresponding to the 72
                    # object types in `ORDERED_OBJECT_TYPES`
                    semantic_map = obs["semantic_map"]["map"]

                    # We can't display all 72 channels in an RGB image so instead we randomly assign
                    # each object a color and then just allow them to overlap each other
                    colored_semantic_map = (
                        SemanticMapBuilder.randomly_color_semantic_map(semantic_map)
                    )

                    # Here's the full semantic map with nothing masked out because the agent
                    # hasn't seen it yet
                    colored_semantic_map_no_fog = (
                        SemanticMapBuilder.randomly_color_semantic_map(
                            map_sensors[
                                -1
                            ].semantic_map_builder.ground_truth_semantic_map
                        )
                    )

                    # Uncomment if you wish to visualize the observations:
                    # import matplotlib.pyplot as plt
                    # plt.imshow(
                    #     np.flip(  # np.flip because we expect "up" to be -row
                    #         np.concatenate(
                    #             (
                    #                 colored_semantic_map,
                    #                 255 + 0 * colored_semantic_map[:, :10, :],
                    #                 colored_semantic_map_no_fog,
                    #             ),
                    #             axis=1,
                    #         ),
                    #         0,
                    #     )
                    # )
                    # plt.title("Semantic map with and without exploration fog")
                    # plt.show()
                    # plt.close()

                    # See also
                    # * `obs["semantic_map"]["egocentric_update"]`
                    # * `obs["semantic_map"]["explored_mask"]`
                    # * `obs["semantic_map"]["egocentric_mask"]`

            # To save observations for comparison against future runs, uncomment the below.
            # os.makedirs("tmp_out", exist_ok=True)
            # compress_pickle.dump(
            #     {**observations_dict}, "tmp_out/rearrange_mapping_examples.pkl.gz"
            # )
        finally:
            try:
                walkthrough_task_sampler.close()
            except NameError:
                pass

    def test_pretrained_rearrange_walkthrough_mapping_agent(self, tmpdir):
        try:
            if not self.setup_path_for_use_with_rearrangement_project():
                return

            from baseline_configs.rearrange_base import RearrangeBaseExperimentConfig
            from baseline_configs.walkthrough.walkthrough_rgb_mapping_ppo import (
                WalkthroughRGBMappingPPOExperimentConfig,
            )
            from rearrange.constants import (
                FOV,
                PICKUPABLE_OBJECTS,
                OPENABLE_OBJECTS,
            )
            from datagen.datagen_utils import get_scenes

            open_x_displays = []
            try:
                open_x_displays = get_open_x_displays()
            except (AssertionError, IOError):
                pass
            walkthrough_task_sampler = (
                WalkthroughRGBMappingPPOExperimentConfig.make_sampler_fn(
                    stage="train",
                    scene_to_allowed_rearrange_inds={
                        s: [0] for s in get_scenes("train")
                    },
                    force_cache_reset=True,
                    allowed_scenes=None,
                    seed=2,
                    x_display=open_x_displays[0] if len(open_x_displays) != 0 else None,
                )
            )

            named_losses = (
                WalkthroughRGBMappingPPOExperimentConfig.training_pipeline()._named_losses
            )

            ckpt_path = os.path.join(
                tmpdir, "pretrained_walkthrough_mapping_agent_75mil.pt"
            )
            if not os.path.exists(ckpt_path):
                urllib.request.urlretrieve(
                    "https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/rearrangement/walkthrough/pretrained_walkthrough_mapping_agent_75mil.pt",
                    ckpt_path,
                )

            state_dict = torch.load(
                ckpt_path,
                map_location="cpu",
            )

            walkthrough_model = WalkthroughRGBMappingPPOExperimentConfig.create_model()
            walkthrough_model.load_state_dict(state_dict["model_state_dict"])

            memory = RolloutBlockStorage.create_memory(
                spec=walkthrough_model.recurrent_memory_specification, num_samplers=1
            ).step_squeeze(0)

            masks = torch.FloatTensor([0]).view(1, 1, 1)

            binned_map_losses = []
            semantic_map_losses = []
            for i in range(5):
                masks = 0 * masks

                set_seed(i + 1)
                task = walkthrough_task_sampler.next_task()

                def add_step_dim(input):
                    if isinstance(input, torch.Tensor):
                        return input.unsqueeze(0)
                    elif isinstance(input, Dict):
                        return {k: add_step_dim(v) for k, v in input.items()}
                    else:
                        raise NotImplementedError

                batch = add_step_dim(batch_observations([task.get_observations()]))

                while not task.is_done():
                    # noinspection PyTypeChecker
                    ac_out, memory = cast(
                        Tuple[ActorCriticOutput, Memory],
                        walkthrough_model.forward(
                            observations=batch,
                            memory=memory,
                            prev_actions=None,
                            masks=masks,
                        ),
                    )

                    binned_map_losses.append(
                        named_losses["binned_map_loss"]
                        .loss(
                            step_count=0,  # Not used in this loss
                            batch={"observations": batch},
                            actor_critic_output=ac_out,
                        )[0]
                        .item()
                    )
                    assert (
                        binned_map_losses[-1] < 0.16
                    ), f"Binned map loss to large at ({i}, {task.num_steps_taken()})"

                    semantic_map_losses.append(
                        named_losses["semantic_map_loss"]
                        .loss(
                            step_count=0,  # Not used in this loss
                            batch={"observations": batch},
                            actor_critic_output=ac_out,
                        )[0]
                        .item()
                    )
                    assert (
                        semantic_map_losses[-1] < 0.004
                    ), f"Semantic map loss to large at ({i}, {task.num_steps_taken()})"

                    masks = masks.fill_(1.0)
                    obs = task.step(
                        action=ac_out.distributions.sample().item()
                    ).observation
                    batch = add_step_dim(batch_observations([obs]))

                    if task.num_steps_taken() >= 10:
                        break

            # To save observations for comparison against future runs, uncomment the below.
            # os.makedirs("tmp_out", exist_ok=True)
            # compress_pickle.dump(
            #     {**observations_dict}, "tmp_out/rearrange_mapping_examples.pkl.gz"
            # )
        finally:
            try:
                walkthrough_task_sampler.close()
            except NameError:
                pass


if __name__ == "__main__":
    TestAI2THORMapSensors().test_binned_and_semantic_mapping(mkdtemp())  # type:ignore
    # TestAI2THORMapSensors().test_binned_and_semantic_mapping("tmp_out")  # Used for local debugging
    # TestAI2THORMapSensors().test_pretrained_rearrange_walkthrough_mapping_agent(
    #     mkdtemp() # "tmp_out"
    # )  # Used for local debugging


================================================
FILE: tests/multiprocessing/__init__.py
================================================


================================================
FILE: tests/multiprocessing/test_frozen_attribs.py
================================================
from typing import Dict, Any

import torch.multiprocessing as mp
import torch.nn as nn

from allenact.base_abstractions.experiment_config import ExperimentConfig
from allenact.base_abstractions.task import TaskSampler
from allenact.utils.experiment_utils import TrainingPipeline


# noinspection PyAbstractClass,PyTypeChecker
class MyConfig(ExperimentConfig):
    MY_VAR: int = 3

    @classmethod
    def tag(cls) -> str:
        return ""

    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        return None

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return None

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return None

    def my_var_is(self, val):
        assert self.MY_VAR == val


# noinspection PyAbstractClass
class MySpecConfig(MyConfig):
    MY_VAR = 6

    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        return {}

    @classmethod
    def tag(cls) -> str:
        return "SpecTag"


scfg = MySpecConfig()


class TestFrozenAttribs(object):
    def test_frozen_inheritance(self):
        from abc import abstractmethod
        from allenact.base_abstractions.experiment_config import FrozenClassVariables

        class SomeBase(metaclass=FrozenClassVariables):
            yar = 3

            @abstractmethod
            def use(self):
                raise NotImplementedError()

        class SomeDerived(SomeBase):
            yar = 33

            def use(self):
                return self.yar

        failed = False
        try:
            SomeDerived.yar = 6  # Error
        except Exception as _:
            failed = True
        assert failed

        inst = SomeDerived()
        inst2 = SomeDerived()
        inst.yar = 12  # No error
        assert inst.use() == 12
        assert inst2.use() == 33

    @staticmethod
    def my_func(config, val):
        config.my_var_is(val)

    def test_frozen_experiment_config(self):
        val = 5

        failed = False
        try:
            MyConfig()
        except (RuntimeError, TypeError):
            failed = True
        assert failed

        scfg.MY_VAR = val
        scfg.my_var_is(val)

        failed = False
        try:
            MyConfig.MY_VAR = val
        except RuntimeError:
            failed = True
        assert failed

        failed = False
        try:
            MySpecConfig.MY_VAR = val
        except RuntimeError:
            failed = True
        assert failed

        for fork_method in ["forkserver", "fork"]:
            ctxt = mp.get_context(fork_method)
            p = ctxt.Process(target=self.my_func, kwargs=dict(config=scfg, val=val))
            p.start()
            p.join()


if __name__ == "__main__":
    TestFrozenAttribs().test_frozen_inheritance()  # type:ignore
    TestFrozenAttribs().test_frozen_experiment_config()  # type:ignore


================================================
FILE: tests/sync_algs_cpu/__init__.py
================================================


================================================
FILE: tests/sync_algs_cpu/test_to_to_obj_trains.py
================================================
import io
import math
import os
import pathlib
from contextlib import redirect_stdout, redirect_stderr
from typing import Optional, List, Dict, Any

import torch

from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
    AbstractActorCriticLoss,
)
from allenact.algorithms.onpolicy_sync.policy import ObservationType
from allenact.algorithms.onpolicy_sync.runner import OnPolicyRunner
from allenact.algorithms.onpolicy_sync.storage import (
    StreamingStorageMixin,
    ExperienceStorage,
    RolloutBlockStorage,
)
from allenact.base_abstractions.experiment_config import MachineParams
from allenact.base_abstractions.misc import (
    Memory,
    GenericAbstractLoss,
    ModelType,
    LossOutput,
)
from allenact.utils.experiment_utils import PipelineStage, StageComponent
from allenact.utils.misc_utils import prepare_locals_for_super
from projects.babyai_baselines.experiments.go_to_obj.ppo import (
    PPOBabyAIGoToObjExperimentConfig,
)

SILLY_STORAGE_VALUES = [1.0, 2.0, 3.0, 4.0]
SILLY_STORAGE_REPEATS = [1, 2, 3, 4]


class FixedConstantLoss(AbstractActorCriticLoss):
    def __init__(self, name: str, value: float):
        super().__init__()
        self.name = name
        self.value = value

    def loss(  # type: ignore
        self,
        *args,
        **kwargs,
    ):
        return self.value, {self.name: self.value}


class SillyStorage(ExperienceStorage, StreamingStorageMixin):
    def __init__(self, values_to_return: List[float], repeats: List[int]):
        self.values_to_return = values_to_return
        self.repeats = repeats
        assert len(self.values_to_return) == len(self.repeats)
        self.index = 0

    def initialize(self, *, observations: ObservationType, **kwargs):
        pass

    def add(
        self,
        observations: ObservationType,
        memory: Optional[Memory],
        actions: torch.Tensor,
        action_log_probs: torch.Tensor,
        value_preds: torch.Tensor,
        rewards: torch.Tensor,
        masks: torch.Tensor,
    ):
        pass

    def to(self, device: torch.device):
        pass

    def set_partition(self, index: int, num_parts: int):
        pass

    @property
    def total_experiences(self) -> int:
        return 0

    @total_experiences.setter
    def total_experiences(self, value: int):
        pass

    def next_batch(self) -> Dict[str, Any]:
        if self.index >= len(self.values_to_return):
            raise EOFError

        to_return = {
            "value": torch.tensor(
                [self.values_to_return[self.index]] * self.repeats[self.index]
            ),
        }
        self.index += 1
        return to_return

    def reset_stream(self):
        self.index = 0

    def empty(self) -> bool:
        return len(self.values_to_return) == 0


class AverageBatchValueLoss(GenericAbstractLoss):
    def loss(
        self,
        *,
        model: ModelType,
        batch: ObservationType,
        batch_memory: Memory,
        stream_memory: Memory,
    ) -> LossOutput:
        v = batch["value"].mean()
        return LossOutput(
            value=v,
            info={"avg_batch_val": v},
            per_epoch_info={},
            batch_memory=batch_memory,
            stream_memory=stream_memory,
            bsize=batch["value"].shape[0],
        )


class PPOBabyAIGoToObjTestExperimentConfig(PPOBabyAIGoToObjExperimentConfig):
    NUM_CKPTS_TO_SAVE = 2

    @classmethod
    def tag(cls):
        return "BabyAIGoToObjPPO-TESTING"

    @classmethod
    def machine_params(cls, mode="train", **kwargs):
        mp = super().machine_params(mode=mode, **kwargs)
        if mode == "valid":
            mp = MachineParams(
                nprocesses=1,
                devices=mp.devices,
                sensor_preprocessor_graph=mp.sensor_preprocessor_graph,
                sampler_devices=mp.sampler_devices,
                visualizer=mp.visualizer,
                local_worker_ids=mp.local_worker_ids,
            )
        return mp

    @classmethod
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_RL_TRAIN_STEPS
        ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps)

        tp = cls._training_pipeline(
            named_losses={
                "ppo_loss": ppo_info["loss"],
                "3_loss": FixedConstantLoss("3_loss", 3.0),
                "avg_value_loss": AverageBatchValueLoss(),
            },
            named_storages={
                "onpolicy": RolloutBlockStorage(),
                "silly_storage": SillyStorage(
                    values_to_return=SILLY_STORAGE_VALUES, repeats=SILLY_STORAGE_REPEATS
                ),
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss", "3_loss"],
                    max_stage_steps=total_train_steps,
                    stage_components=[
                        StageComponent(
                            uuid="onpolicy",
                            storage_uuid="onpolicy",
                            loss_names=["ppo_loss", "3_loss"],
                        )
                    ],
                ),
            ],
            num_mini_batch=ppo_info["num_mini_batch"],
            update_repeats=ppo_info["update_repeats"],
            total_train_steps=total_train_steps,
            valid_pipeline_stage=PipelineStage(
                loss_names=["ppo_loss", "3_loss"],
                max_stage_steps=-1,
                update_repeats=1,
                num_mini_batch=1,
            ),
            test_pipeline_stage=PipelineStage(
                loss_names=["avg_value_loss"],
                stage_components=[
                    StageComponent(
                        uuid="debug",
                        storage_uuid="silly_storage",
                        loss_names=["avg_value_loss"],
                    ),
                ],
                max_stage_steps=-1,
                update_repeats=1,
                num_mini_batch=1,
            ),
        )

        tp.training_settings.save_interval = int(
            math.ceil(cls.TOTAL_RL_TRAIN_STEPS / cls.NUM_CKPTS_TO_SAVE)
        )
        return tp

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        # Also run validation
        return self.test_task_sampler_args(**prepare_locals_for_super(locals()))


# Wrapper context manager to redirect stdout and stderr to a file when potentially
# using pytest capsys
class RedirectOutput:
    def __init__(self, capsys: Optional, capfd: Optional):
        self.capsys = capsys
        self.capfd = capfd

        self.f = io.StringIO()
        self.redirect_stdout = redirect_stdout(self.f)
        self.redirect_stderr = redirect_stderr(self.f)
        self.capsys_output = ""
        self.capfd_output = ""
        # self.capsys_disabler = None

    def get_output(self):
        return self.f.getvalue() + self.capsys_output + self.capfd_output

    def __enter__(self):
        if self.capsys is not None:
            self.capsys.readouterr()  # Clear out any existing output

        if self.capfd is not None:
            self.capfd.readouterr()  # Clear out any existing output
            # self.capsys_disabler = self.capsys.disabled()
            # self.capsys_disabler.__enter__()

        self.redirect_stdout.__enter__()
        self.redirect_stderr.__enter__()

    def __exit__(self, *args):
        if self.capsys is not None:
            captured = self.capsys.readouterr()
            self.capsys_output = captured.out + captured.err
            # self.capsys_disabler.__exit__(*args)

        if self.capfd is not None:
            captured = self.capfd.readouterr()
            self.capfd_output = captured.out + captured.err

        self.redirect_stdout.__exit__(*args)
        self.redirect_stderr.__exit__(*args)


class TestGoToObjTrains:
    def test_ppo_trains(self, capfd, tmpdir):
        cfg = PPOBabyAIGoToObjTestExperimentConfig()

        d = tmpdir / "test_ppo_trains"
        if isinstance(d, pathlib.Path):
            d.mkdir(parents=True, exist_ok=True)
        else:
            d.mkdir()
        output_dir = str(d)

        train_runner = OnPolicyRunner(
            config=cfg,
            output_dir=output_dir,
            loaded_config_src_files=None,
            seed=1,
            mode="train",
            deterministic_cudnn=True,
        )

        output_redirector = RedirectOutput(capsys=None, capfd=capfd)
        with output_redirector:
            start_time_str = train_runner.start_train(
                max_sampler_processes_per_worker=1
            )
        s = output_redirector.get_output()

        def extract_final_metrics_from_log(s: str, mode: str):
            lines = s.splitlines()
            lines = [l for l in lines if mode.upper() in l]
            try:
                metrics_and_losses_list = (
                    lines[-1].split(")")[-1].split("[")[0].strip().split(" ")
                )
            except IndexError:
                raise RuntimeError(f"Failed to parse log:\n{s}")

            def try_float(f):
                try:
                    return float(f)
                except ValueError:
                    return f

            metrics_and_losses_dict = {
                k: try_float(v)
                for k, v in zip(
                    metrics_and_losses_list[::2], metrics_and_losses_list[1::2]
                )
            }
            return metrics_and_losses_dict

        train_metrics = extract_final_metrics_from_log(s, "train")
        assert train_metrics["global_batch_size"] == 256

        valid_metrics = extract_final_metrics_from_log(s, "valid")
        assert valid_metrics["3_loss/3_loss"] == 3, "Incorrect validation loss"
        assert (
            valid_metrics["new_tasks_completed"] == cfg.NUM_TEST_TASKS
        ), "Incorrect number of tasks evaluated in validation"

        test_runner = OnPolicyRunner(
            config=cfg,
            output_dir=output_dir,
            loaded_config_src_files=None,
            seed=1,
            mode="test",
            deterministic_cudnn=True,
        )

        test_results = test_runner.start_test(
            checkpoint_path_dir_or_pattern=os.path.join(
                output_dir, "checkpoints", "**", start_time_str, "*.pt"
            ),
            max_sampler_processes_per_worker=1,
        )

        assert (
            len(test_results) == 2
        ), f"Too many or too few test results ({test_results})"

        tr = test_results[-1]
        assert (
            tr["training_steps"]
            == round(
                math.ceil(
                    cfg.TOTAL_RL_TRAIN_STEPS
                    / (cfg.ROLLOUT_STEPS * cfg.NUM_TRAIN_SAMPLERS)
                )
            )
            * cfg.ROLLOUT_STEPS
            * cfg.NUM_TRAIN_SAMPLERS
        ), "Incorrect number of training steps"
        assert len(tr["tasks"]) == cfg.NUM_TEST_TASKS, "Incorrect number of test tasks"
        assert tr["test-metrics/success"] == sum(
            task["success"] for task in tr["tasks"]
        ) / len(tr["tasks"]), "Success counts don't seem to match"
        assert (
            tr["test-metrics/success"] > 0.95
        ), f"PPO did not seem to converge for the go_to_obj task (success {tr['success']})."
        assert tr["test-debug-losses/avg_value_loss/avg_batch_val"] == sum(
            ssv * ssr for ssv, ssr in zip(SILLY_STORAGE_VALUES, SILLY_STORAGE_REPEATS)
        ) / sum(SILLY_STORAGE_REPEATS)
        assert tr["test-debug-losses/avg_value_loss/avg_batch_val"] == sum(
            ssv * ssr for ssv, ssr in zip(SILLY_STORAGE_VALUES, SILLY_STORAGE_REPEATS)
        ) / sum(SILLY_STORAGE_REPEATS)
        assert tr["test-debug-misc/worker_batch_size"] == sum(
            SILLY_STORAGE_VALUES
        ) / len(SILLY_STORAGE_VALUES)


if __name__ == "__main__":
    TestGoToObjTrains().test_ppo_trains(
        pathlib.Path("experiment_output/testing"), capsys=None, capfd=None
    )  # type:ignore


================================================
FILE: tests/utils/__init__.py
================================================


================================================
FILE: tests/utils/test_inference_agent.py
================================================
from collections import Counter

import torch

from allenact.utils.experiment_utils import set_seed
from allenact.utils.inference import InferenceAgent
from projects.babyai_baselines.experiments.go_to_obj.ppo import (
    PPOBabyAIGoToObjExperimentConfig,
)

from packaging.version import parse

if parse(torch.__version__) >= parse("2.0.0"):
    expected_results = [
        {
            "ep_length": 39,
            "reward": 0.45999999999999996,
            "task_info": {},
            "success": 1.0,
        },
        {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0},
    ]
else:
    expected_results = [
        {"ep_length": 64, "reward": 0.0, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "success": 0.0},
        {"ep_length": 17, "reward": 0.7646153846153846, "success": 1.0},
        {"ep_length": 22, "reward": 0.6953846153846154, "success": 1.0},
        {"ep_length": 64, "reward": 0.0, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "success": 0.0},
        {"ep_length": 64, "reward": 0.0, "success": 0.0},
    ]


class TestInferenceAgent(object):
    def test_inference_agent_from_minigrid_config(self):
        set_seed(1)

        exp_config = PPOBabyAIGoToObjExperimentConfig()
        agent = InferenceAgent.from_experiment_config(
            exp_config=exp_config,
            device=torch.device("cpu"),
        )

        task_sampler = exp_config.make_sampler_fn(
            **exp_config.test_task_sampler_args(process_ind=0, total_processes=1)
        )

        all_actions = []
        successes = 0
        for ind, expected_result in zip(range(10), expected_results):
            agent.reset()

            task = task_sampler.next_task()
            observations = task.get_observations()

            actions = []
            while not task.is_done():
                action = agent.act(observations=observations)
                actions.append(action)
                observations = task.step(action).observation

            metrics = task.metrics()

            successes += metrics["success"]

            assert metrics["success"] == 0 or metrics["reward"] > 0
            assert metrics["ep_length"] <= 64

            # Random seeding seems to not work well when changing linux/mac and torch versions :(
            # assert all(
            #     abs(v - expected_result[k]) < 1e-4
            #     for k, v in task.metrics().items()
            #     if k != "task_info"
            # ), f"Failed on task {ind} with actions {actions} and metrics {task.metrics()} (expected={expected_result})."

            all_actions.append(actions)

        assert successes > 0, "At least one task should be successful hopefully..."
        assert min(Counter(sum(all_actions, [])).values()) >= len(
            sum(all_actions, [])
        ) * 1 / (7 + 3), (
            "Statistically, all actions should be taken at around 1/7 * num_actions times. We add 3 to"
            " the denominator for unlikely settings."
        )


if __name__ == "__main__":
    TestInferenceAgent().test_inference_agent_from_minigrid_config()


================================================
FILE: tests/utils/test_spaces.py
================================================
import warnings
from collections import OrderedDict
from typing import Tuple

import numpy as np
import torch
from gym import spaces as gyms

from allenact.utils import spaces_utils as su


class TestSpaces(object):
    space = gyms.Dict(
        {
            "first": gyms.Tuple(
                [
                    gyms.Box(-10, 10, (3, 4)),
                    gyms.MultiDiscrete([2, 3, 4]),
                    gyms.Box(-1, 1, ()),
                ]
            ),
            "second": gyms.Tuple(
                [
                    gyms.Dict({"third": gyms.Discrete(11)}),
                    gyms.MultiBinary(8),
                ]
            ),
        }
    )

    @staticmethod
    def same(a, b, bidx=None):
        if isinstance(a, OrderedDict):
            for key in a:
                if not TestSpaces.same(a[key], b[key], bidx):
                    return False
            return True
        elif isinstance(a, Tuple):
            for it in range(len(a)):
                if not TestSpaces.same(a[it], b[it], bidx):
                    return False
            return True
        else:
            # np.array_equal also works for torch tensors and scalars
            if bidx is None:
                return np.array_equal(a, b)
            else:
                return np.array_equal(a, b[bidx])

    def test_conversion(self):
        gsample = self.space.sample()

        asample = su.torch_point(self.space, gsample)

        back = su.numpy_point(self.space, asample)

        assert self.same(back, gsample)

    def test_flatten(self):
        # We flatten Discrete to 1 value
        assert su.flatdim(self.space) == 25
        # gym flattens Discrete to one-hot
        assert gyms.flatdim(self.space) == 35

        asample = su.torch_point(self.space, self.space.sample())
        flattened = su.flatten(self.space, asample)
        unflattened = su.unflatten(self.space, flattened)
        assert self.same(asample, unflattened)

        # suppress `UserWarning: WARN: Box bound precision lowered by casting to float32`
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            flattened_space = su.flatten_space(self.space)
            assert flattened_space.shape == (25,)
            # The maximum comes from Discrete(11)
            assert flattened_space.high.max() == 11.0
            assert flattened_space.low.min() == -10.0

            gym_flattened_space = gyms.flatten_space(self.space)
            assert gym_flattened_space.shape == (35,)
            # The maximum comes from Box(-10, 10, (3, 4))
            assert gym_flattened_space.high.max() == 10.0
            assert gym_flattened_space.low.min() == -10.0

    def test_batched(self):
        samples = [self.space.sample() for _ in range(10)]
        flattened = [
            su.flatten(self.space, su.torch_point(self.space, sample))
            for sample in samples
        ]
        stacked = torch.stack(flattened, dim=0)
        unflattened = su.unflatten(self.space, stacked)
        for bidx, refsample in enumerate(samples):
            # Compare each torch-ified sample to the corresponding unflattened from the stack
            assert self.same(su.torch_point(self.space, refsample), unflattened, bidx)

        assert self.same(su.flatten(self.space, unflattened), stacked)

    def test_tolist(self):
        space = gyms.MultiDiscrete([3, 3])
        actions = su.torch_point(space, space.sample())  # single sampler
        actions = actions.unsqueeze(0).unsqueeze(0)  # add [step, sampler]
        flat_actions = su.flatten(space, actions)
        al = su.action_list(space, flat_actions)
        assert len(al) == 1
        assert len(al[0]) == 2

        space = gyms.Tuple([gyms.MultiDiscrete([3, 3]), gyms.Discrete(2)])
        actions = su.torch_point(space, space.sample())  # single sampler
        actions = (
            actions[0].unsqueeze(0).unsqueeze(0),
            torch.tensor(actions[1]).unsqueeze(0).unsqueeze(0),
        )  # add [step, sampler]
        flat_actions = su.flatten(space, actions)
        al = su.action_list(space, flat_actions)
        assert len(al) == 1
        assert len(al[0][0]) == 2
        assert isinstance(al[0][1], int)

        space = gyms.Dict(
            {"tuple": gyms.MultiDiscrete([3, 3]), "scalar": gyms.Discrete(2)}
        )
        actions = su.torch_point(space, space.sample())  # single sampler
        actions = OrderedDict(
            [
                ("tuple", actions["tuple"].unsqueeze(0).unsqueeze(0)),
                ("scalar", torch.tensor(actions["scalar"]).unsqueeze(0).unsqueeze(0)),
            ]
        )
        flat_actions = su.flatten(space, actions)
        al = su.action_list(space, flat_actions)
        assert len(al) == 1
        assert len(al[0]["tuple"]) == 2
        assert isinstance(al[0]["scalar"], int)


if __name__ == "__main__":
    TestSpaces().test_conversion()  # type:ignore
    TestSpaces().test_flatten()  # type:ignore
    TestSpaces().test_batched()  # type:ignore
    TestSpaces().test_tolist()  # type:ignore


================================================
FILE: tests/vision/__init__.py
================================================


================================================
FILE: tests/vision/test_pillow_rescaling.py
================================================
import hashlib
import os

import imageio
import numpy as np
from torchvision.transforms import transforms

from allenact.utils.tensor_utils import ScaleBothSides
from constants import ABS_PATH_OF_TOP_LEVEL_DIR

to_pil = transforms.ToPILImage()  # Same as used by the vision sensors


class TestPillowRescaling(object):
    def _load_thor_img(self) -> np.ndarray:
        img_path = os.path.join(
            ABS_PATH_OF_TOP_LEVEL_DIR, "docs/img/iTHOR_framework.jpg"
        )
        img = imageio.v2.imread(img_path)
        return img

    def _get_img_hash(self, img: np.ndarray) -> str:
        img_hash = hashlib.sha1(np.ascontiguousarray(img))
        return img_hash.hexdigest()

    def _random_rgb_image(self, width: int, height: int, seed: int) -> np.ndarray:
        s = np.random.get_state()
        np.random.seed(seed)
        img = np.random.randint(
            low=0, high=256, size=(width, height, 3), dtype=np.uint8
        )
        np.random.set_state(s)
        return img

    def _random_depthmap(
        self, width: int, height: int, max_depth: float, seed: int
    ) -> np.ndarray:
        s = np.random.get_state()
        np.random.seed(seed)
        img = max_depth * np.random.rand(width, height, 1)
        np.random.set_state(s)
        return np.float32(img)

    def test_scaler_rgb_thor(self):
        thor_img_arr = np.uint8(self._load_thor_img())

        assert self._get_img_hash(thor_img_arr) in [
            "80ff8a342b4f74966796eee91babde31409d0457",
            "eb808b2218ccc2e56144131f9ef596a5c2ae3e2a",
        ]

        img = to_pil(thor_img_arr)

        scaler = ScaleBothSides(width=75, height=75)
        scaled_img = np.array(scaler(img))
        assert self._get_img_hash(scaled_img) in [
            "2c47057aa188240cb21b2edc39e0f269c1085bac",
            "b5df3cc03f181cb7be07ddd229cac8d1efd5d077",
        ]

        scaler = ScaleBothSides(width=500, height=600)
        scaled_img = np.array(scaler(img))
        assert self._get_img_hash(scaled_img) in [
            "faf0be2b9ec9bfd23a1b7b465c86ad961d03c259",
            "cccddd7f17b59434dcdd0006dceeffbe1a969dc8",
        ]

    def test_scaler_rgb_random(self):
        arr = self._random_rgb_image(width=100, height=100, seed=1)

        assert self._get_img_hash(arr) == "d01bd8ba151ab790fde9a8cc29aa8a3c63147334"

        img = to_pil(arr)

        scaler = ScaleBothSides(width=60, height=60)
        scaled_img = np.array(scaler(img))
        assert (
            self._get_img_hash(scaled_img) == "22473537e50d5e39abeeec4f92dbfde51c754010"
        )

        scaler = ScaleBothSides(width=1000, height=800)
        scaled_img = np.array(scaler(img))
        assert (
            self._get_img_hash(scaled_img) == "5e5b955981e4ee3b5e22287536040d001a31fbd3"
        )

    def test_scaler_depth_thor(self):
        thor_depth_arr = 5 * np.float32(self._load_thor_img()).sum(-1)
        thor_depth_arr /= thor_depth_arr.max()

        assert self._get_img_hash(thor_depth_arr) in [
            "d3c1474400ba57ed78f52cf4ba6a4c2a1d90516c",
            "85a18befb2a174403079bf49d149630f829222c2",
        ]

        img = to_pil(thor_depth_arr)

        scaler = ScaleBothSides(width=75, height=75)
        scaled_img = np.array(scaler(img))
        assert self._get_img_hash(scaled_img) in [
            "6a879beb6bed49021e438c1e3af7a62c428a44d8",
            "868f1d2b32167bda524ba502158f1ee81c8a24d2",
        ]

        scaler = ScaleBothSides(width=500, height=600)
        scaled_img = np.array(scaler(img))
        assert self._get_img_hash(scaled_img) in [
            "79f11fb741ae638afca40125e4c501f54b22cc01",
            "2d3012e1cced2942f7368e84bf332241fcf9d7fe",
        ]

    def test_scaler_depth_random(self):
        depth_arr = self._random_depthmap(width=96, height=103, max_depth=5.0, seed=1)

        assert (
            self._get_img_hash(depth_arr) == "cbd8ca127951ffafb6848536d9d731970a5397e9"
        )

        img = to_pil(depth_arr)

        scaler = ScaleBothSides(width=60, height=60)
        scaled_img = np.array(scaler(img))
        assert (
            self._get_img_hash(scaled_img) == "5bed173f2d783fb2badcde9b43904ef85a1a5820"
        )

        scaler = ScaleBothSides(width=1000, height=800)
        scaled_img = np.array(scaler(img))
        assert (
            self._get_img_hash(scaled_img) == "9dceb7f77d767888f24a84c00913c0cf4ccd9d49"
        )


if __name__ == "__main__":
    TestPillowRescaling().test_scaler_rgb_thor()
    TestPillowRescaling().test_scaler_rgb_random()
    TestPillowRescaling().test_scaler_depth_thor()
    TestPillowRescaling().test_scaler_depth_random()