[
  {
    "path": ".VERSION",
    "content": "0.5.5a\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.md",
    "content": "---\nname: Bug report\nabout: Create a report to help us improve\ntitle: ''\nlabels: bug\nassignees: ''\n\n---\n\n## Problem\n\nA clear and concise description of what the bug is.\n\n## Steps to reproduce\n\nSteps to reproduce the behavior:\n1. Go to '...'\n2. Click on '....'\n3. Scroll down to '....'\n4. See error\n\n## Expected behavior\n\nA clear and concise description of what you expected to happen.\n\n## Screenshots\n\nIf applicable, add screenshots to help explain your problem.\n\n## Desktop\n\nPlease add the following information:\n - OS: [e.g. Ubuntu 16.04.5]\n - AllenAct Version: [e.g. current HEAD of master or v0.1.0]\n\n## Additional context\n\nAdd any other context about the problem here.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.md",
    "content": "---\nname: Feature request\nabout: Suggest an enhancement or a new feature\ntitle: ''\nlabels: enhancement\nassignees: ''\n\n---\n\n## Problem\n\nIs your feature request related to a problem? Please provide a clear and concise description of what the problem is:\n\nE.g. I would really like to have better support for my favorite environment X.\n\n## Desired solution\n\nA clear and concise description of what you want to happen.\n\n## Alternative solutions\n\nA description of any alternative solutions or features you've considered.\n\n## Additional context\n\nAdd any other context or screenshots about the feature request here.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/support_request.md",
    "content": "---\nname: Support request\nabout: Request support regarding AllenAct\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n## Problem / Question\n\nWhat do you need help with? E.g. \"I'm having trouble running model X\" or \"when I run command Y I get error Z.\"\n\n## Additional context\n\n_(Optional)_ - To provide support it's helpful to have as many details as possible, add additional context here.\n"
  },
  {
    "path": ".github/workflows/black.yml",
    "content": "name: Lint\n\non: [push, pull_request]\n\njobs:\n  lint:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v3\n      - uses: psf/black@stable\n"
  },
  {
    "path": ".github/workflows/codeql.yml",
    "content": "name: \"CodeQL\"\n\non:\n  push:\n    branches: [ \"main\" ]\n  pull_request:\n    branches: [ \"main\" ]\n  schedule:\n    - cron: \"13 6 * * 4\"\n\njobs:\n  analyze:\n    name: Analyze\n    runs-on: ubuntu-latest\n    permissions:\n      actions: read\n      contents: read\n      security-events: write\n\n    strategy:\n      fail-fast: false\n      matrix:\n        language: [ python ]\n\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v3\n\n      - name: Initialize CodeQL\n        uses: github/codeql-action/init@v2\n        with:\n          languages: ${{ matrix.language }}\n          queries: +security-and-quality\n\n      - name: Autobuild\n        uses: github/codeql-action/autobuild@v2\n\n      - name: Perform CodeQL Analysis\n        uses: github/codeql-action/analyze@v2\n        with:\n          category: \"/language:${{ matrix.language }}\"\n"
  },
  {
    "path": ".github/workflows/publish.yml",
    "content": "# This workflow will upload the allenact and allenact_plugins packages using Twine (after manually triggering it)\n# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries\n\nname: Publish PYPI Packages\n\non:\n  workflow_dispatch:\n\njobs:\n  deploy:\n\n    runs-on: ubuntu-latest\n\n    steps:\n    - uses: actions/checkout@v2\n    - name: Set up Python\n      uses: actions/setup-python@v2\n      with:\n        python-version: '3.7'\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        pip install setuptools twine\n    - name: Build and publish\n      env:\n        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}\n      run: |\n        python scripts/release.py\n        twine upload -u __token__ dist/*\n"
  },
  {
    "path": ".github/workflows/pytest.yml",
    "content": "name: PyTest\n\non: [push]\n\njobs:\n  build:\n\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        python-version: [3.9]\n\n    steps:\n    - uses: actions/checkout@v2\n\n    - uses: ouzi-dev/commit-status-updater@v1.1.0 # Updates the commit status badge to pending\n\n    - name: Set up Python ${{ matrix.python-version }}\n      uses: actions/setup-python@v2\n      with:\n        python-version: ${{ matrix.python-version }}\n\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        python -m pip install pytest wandb\n        python -m pip install --editable=\"./allenact\"\n        python -m pip install --editable=\"./allenact_plugins[all]\"\n        python -m pip install -e \"git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\"\n        python -m pip install compress_pickle # Needed for some mapping tests\n        pip list\n\n    - name: Test with pytest\n      run: |\n        pytest --capture=tee-sys tests\n\n    - if: always() # Updates the commit status badge to the result of running the tests above\n      uses: ouzi-dev/commit-status-updater@v1.1.0\n      with:\n        status: \"${{ job.status }}\"\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\ndocs/build/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\npip-wheel-metadata/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n.python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pycharm\n.idea/\n\n# pytorch\n*.pt\n\n# Default output dir\nexperiment_output\n*_out\n\n# PDFs\n*.pdf\n\n# PNGs\n*.png\n\n# Tensorboard logs\nevents.out.tfevents.*\n\n# TSV files\n*.tsv\n\n# tmp directory\ntmp/\n\n# Pickle files\n*.pkl\n*.pkl.gz\n\n# Zip files\n*.zip\n\n# VSCode\n.vscode/\n\n# MacOS\n.DS_Store\n\n# Docs\ndocs/index.md\ndocs/CONTRIBUTING.md\ndocs/LICENSE.md\n\n# Metrics\nmetrics__*.json\n\n# Robothor\nallenact_plugins/robothor_plugin/data/*\n\n# ithor\nallenact_plugins/ithor_plugin/data/*\n\n# Habitat\nexternal_projects/habitat-lab\n\n# Local pip installations\nsrc\n.pip_src\n\n# Files created when running training\n**/used_configs\n*.patch\n\n# Package building\n*.egg_info\n*.egg-info\n\n# Additional allenact-specific locks and hidden files\n*.allenact_last_start_time_string\n*.allenact_start_time_string.lock\n*.lock\nrsync-*"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"projects/ithor_rearrangement\"]\n\tpath = projects/ithor_rearrangement\n\turl = https://github.com/allenai/ai2thor-rearrangement.git\n\tbranch = active_neural_slam\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "repos:\n-   repo: https://github.com/ambv/black\n    rev: 19.10b0\n    hooks:\n    - id: black\n      language_version: python3.7\n-   repo: https://github.com/pre-commit/mirrors-mypy\n    rev: 'v0.761'  # Use the sha / tag you want to point at\n    hooks:\n    -   id: mypy\n        args: [--follow-imports=skip]"
  },
  {
    "path": "CNAME",
    "content": "www.allenact.org"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing\n\nWe welcome contributions from the greater community. If you would like to make such a contributions we recommend first submitting an [issue](https://github.com/allenai/allenact/issues) describing your proposed improvement.\nDoing so can ensure we can validate your suggestions before you spend a great deal of time\nupon them. Improvements and bug fixes should be made via a pull request\nfrom your fork of the repository at [https://github.com/allenai/allenact](https://github.com/allenai/allenact).\n \nAll code in pull requests should adhere to the following guidelines.\n\n## Found a bug or want to suggest an enhancement?\n\nPlease submit an [issue](https://github.com/allenai/allenact/issues) in which you note the steps\nto reproduce the bug or in which you detail the enhancement.\n\n## Making a pull request?\n\nWhen making a pull request we require that any code respects several guidelines detailed below.\n\n### Auto-formatting\n\nAll python code in this repository should be formatted using [black](https://black.readthedocs.io/en/stable/).\nTo use `black` auto-formatting across all files, simply run\n```bash\nbash scripts/auto_format.sh\n``` \nwhich will run `black` auto-formatting as well as [docformatter](https://pypi.org/project/docformatter/) (used\nto auto-format documentation strings).\n\n### Type-checking\n\nOur code makes liberal use of type hints. If you have not had experience with type hinting in python we recommend\nreading the [documentation](https://docs.python.org/3/library/typing.html) of the `typing` python module or the \nsimplified introduction to type hints found [here](https://www.python.org/dev/peps/pep-0483/). All methods should\nhave typed arguments and output. Furthermore we use [mypy](https://mypy.readthedocs.io/en/stable/) to perform \nbasic static type checking. Before making a pull request, there should be no warnings or errors when running\n```bash\ndmypy run -- --follow-imports=skip .\n```\nExplicitly ignoring type checking (for instance using `# type: ignore`) should be only be done when it would otherwise\nbe an extensive burden.\n\n<!-- TODO: This should be updated given how we're changing depdencies in AllenAct.\n\n### Updating, adding, or removing packages?\n\nIf you are updating, adding, or removing packages please run:\n```bash\npipenv-setup sync --pipfile # Syncs packages to setup.py\npip freeze > requirements.txt # Syncs packages to requirements.py\n``` \nbefore submitting a pull request. If you are not using `pipenv`, you are still\nrequired to update the file `Pipfile` with newly installed or modified packages. Moreover\nyou must manually update the `install_requires` field of the `setup.py` file. \n-->\n\n### Setting up pre-commit hooks (optional)\n\nPre-commit hooks check that, when you attempt to commit changes, your code adheres a number of\nformatting and type-checking guidelines. Pull requests containing code not adhering to these \nguidelines will not be accepted and thus we recommend installing these pre-commit hooks. Assuming you have \ninstalled all of the project requirements, you can install our recommended\npre-commit hooks by running (from this project's root directory)\n```bash\npre-commit install\n```\nAfter running the above, each time you run `git commit ...` a set of pre-commit checks will\nbe run."
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nOriginal work Copyright (c) 2017 Ilya Kostrikov\n\nOriginal work Copyright (c) Facebook, Inc. and its affiliates.\n\nModified work Copyright (c) 2020 Allen Institute for Artificial Intelligence\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "<div align=\"center\">\n    <img src=\"docs/img/AllenAct.svg\" width=\"350\" />\n    <br>\n    <i><h3>An open source framework for research in Embodied AI</h3></i>\n    </p>\n    <hr/>\n</div>\n\n[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE)\n[![Documentation Status](https://img.shields.io/badge/docs-up%20to%20date-Green.svg)](https://allenact.org)\n[![Latest Release](https://img.shields.io/github/v/release/allenai/allenact)](https://github.com/allenai/allenact/releases/latest)\n[![Python 3.7](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/downloads/release/python-360/)\n[![LGTM Grade: Python](https://img.shields.io/lgtm/grade/python/g/allenai/allenact.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/allenai/allenact/context:python)\n[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)\n\n**AllenAct** is a modular and flexible learning framework designed with a focus on the unique requirements of Embodied-AI research. It provides first-class support for a growing collection of embodied environments, tasks and algorithms, provides reproductions of state-of-the-art models and includes extensive documentation, tutorials, start-up code, and pre-trained models.\n\nAllenAct is built and backed by the [Allen Institute for AI (AI2)](https://allenai.org/). AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering.\n\n## Quick Links\n\n- [Website & Docs](https://www.allenact.org/)\n- [Github](https://github.com/allenai/allenact)\n- [Install](https://www.allenact.org/installation/installation-allenact/)\n- [Tutorials](https://www.allenact.org/tutorials/)\n- [AllenAct Paper](https://arxiv.org/abs/2008.12760)\n- [Citation](#citation)\n\n## Features & Highlights\n\n* _Support for multiple environments_: Support for the [iTHOR](https://ai2thor.allenai.org/ithor/), [RoboTHOR](https://ai2thor.allenai.org/robothor/) and [Habitat](https://aihabitat.org/) embodied environments as well as for grid-worlds including [MiniGrid](https://github.com/maximecb/gym-minigrid).\n* _Task Abstraction_: Tasks and environments are decoupled in AllenAct, enabling researchers to easily implement a large variety of tasks in the same environment.\n* _Algorithms_: Support for a variety of on-policy algorithms including [PPO](https://arxiv.org/pdf/1707.06347.pdf), [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf), [A2C](https://arxiv.org/pdf/1611.05763.pdf), Imitation Learning and [DAgger](https://www.ri.cmu.edu/pub_files/2011/4/Ross-AISTATS11-NoRegret.pdf) as well as offline training such as offline IL.\n* _Sequential Algorithms_: It is trivial to experiment with different sequences of training routines, which are often the key to successful policies.\n* _Simultaneous Losses_: Easily combine various losses while training models (e.g. use an external self-supervised loss while optimizing a PPO loss).\n* _Multi-agent support_: Support for multi-agent algorithms and tasks.\n* _Visualizations_: Out of the box support to easily visualize first and third person views for agents as well as intermediate model tensors, integrated into Tensorboard.\n* _Pre-trained models_: Code and models for a number of standard Embodied AI tasks.\n* _Tutorials_: Start-up code and extensive tutorials to help ramp up to Embodied AI.\n* _First-class PyTorch support_: One of the few RL frameworks to target PyTorch.\n* _Arbitrary action spaces_: Supporting both discrete and continuous actions.\n\n|Environments|Tasks|Algorithms|\n|------------|-----|----------|\n|[iTHOR](https://ai2thor.allenai.org/ithor/), [RoboTHOR](https://ai2thor.allenai.org/robothor/), [Habitat](https://aihabitat.org/), [MiniGrid](https://github.com/maximecb/gym-minigrid), [OpenAI Gym](https://gym.openai.com/)|[PointNav](https://arxiv.org/pdf/1807.06757.pdf), [ObjectNav](https://arxiv.org/pdf/2006.13171.pdf), [MiniGrid tasks](https://github.com/maximecb/gym-minigrid), [Gym Box2D tasks](https://gym.openai.com/envs/#box2d)|[A2C](https://arxiv.org/pdf/1611.05763.pdf), [PPO](https://arxiv.org/pdf/1707.06347.pdf), [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf), [DAgger](https://www.ri.cmu.edu/pub_files/2011/4/Ross-AISTATS11-NoRegret.pdf), Off-policy Imitation|\n\n## Contributions\nWe welcome contributions from the greater community. If you would like to make such a contributions we recommend first submitting an [issue](https://github.com/allenai/allenact/issues) describing your proposed improvement. Doing so can ensure we can validate your suggestions before you spend a great deal of time upon them. Improvements and bug fixes should be made via a pull request from your fork of the repository at [https://github.com/allenai/allenact](https://github.com/allenai/allenact).\n\nAll code in this repository is subject to formatting, documentation, and type-annotation guidelines. For more details, please see the our [contribution guidelines](CONTRIBUTING.md).\n\n## Acknowledgments\nThis work builds upon the [pytorch-a2c-ppo-acktr](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail) library of Ilya Kostrikov and uses some data structures from FAIR's [habitat-lab](https://github.com/facebookresearch/habitat-lab). We would like to thank Dustin Schwenk for his help for the public release of the framework.\n\n## License\nAllenAct is MIT licensed, as found in the [LICENSE](LICENSE) file.\n\n## Team\nAllenAct is an open-source project built by members of the PRIOR research group at the Allen Institute for Artificial Intelligence (AI2). \n\n<div align=\"left\">\n    <a href=\"//prior.allenai.org/\" target=\"_blank\">\n        <img src=\"docs/img/ai2-prior.svg\" width=\"400\">\n    </a>\n    <br>\n</div>\n\n## Citation\nIf you use this work, please cite our [paper](https://arxiv.org/abs/2008.12760):\n\n```bibtex\n@article{AllenAct,\n  author = {Luca Weihs and Jordi Salvador and Klemen Kotar and Unnat Jain and Kuo-Hao Zeng and Roozbeh Mottaghi and Aniruddha Kembhavi},\n  title = {AllenAct: A Framework for Embodied AI Research},\n  year = {2020},\n  journal = {arXiv preprint arXiv:2008.12760},\n}\n```\n\n\n"
  },
  {
    "path": "ROADMAP.md",
    "content": "# Roadmap\n\nHere we track new features/support to be added in the short/mid-term.  \n\n## New environments\n* [SAPIEN](https://sapien.ucsd.edu/)\n* [ThreeDWorld](http://www.threedworld.org/)\n\n## New tasks\n* [Room-to-room navigation](https://arxiv.org/pdf/1711.07280.pdf)\n* [Furniture Lifting](https://arxiv.org/abs/1904.05879) and [Furniture Moving](https://arxiv.org/abs/2007.04979)\n\n## New training methods\n\n* A3C\n* Deep Q-Learning "
  },
  {
    "path": "allenact/__init__.py",
    "content": "try:\n    # noinspection PyProtectedMember,PyUnresolvedReferences\n    from allenact._version import __version__\nexcept ModuleNotFoundError:\n    __version__ = None\n"
  },
  {
    "path": "allenact/_constants.py",
    "content": "import os\nfrom pathlib import Path\n\nALLENACT_INSTALL_DIR = os.path.abspath(os.path.dirname(Path(__file__)))\n"
  },
  {
    "path": "allenact/algorithms/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/algorithms/offpolicy_sync/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/algorithms/offpolicy_sync/losses/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss.py",
    "content": "\"\"\"Defining abstract loss classes for actor critic models.\"\"\"\n\nimport abc\nfrom typing import Dict, Tuple, TypeVar, Generic\n\nimport torch\n\nfrom allenact.algorithms.onpolicy_sync.policy import ObservationType\nfrom allenact.base_abstractions.misc import Loss, Memory\n\nModelType = TypeVar(\"ModelType\")\n\n\nclass AbstractOffPolicyLoss(Generic[ModelType], Loss):\n    \"\"\"Abstract class representing an off-policy loss function used to train a\n    model.\"\"\"\n\n    # noinspection PyMethodOverriding\n    @abc.abstractmethod\n    def loss(  # type: ignore\n        self,\n        *,  # No positional arguments\n        step_count: int,\n        model: ModelType,\n        batch: ObservationType,\n        memory: Memory,\n        **kwargs,\n    ) -> Tuple[torch.FloatTensor, Dict[str, float], Memory, int]:\n        \"\"\"Computes the loss.\n\n        Loss after processing a batch of data with (part of) a model (possibly with memory).\n\n        # Parameters\n\n        model: model to run on data batch (both assumed to be on the same device)\n        batch: data to use as input for model (already on the same device as model)\n        memory: model memory before processing current data batch\n\n        # Returns\n\n        A tuple with:\n\n        current_loss: total loss\n        current_info: additional information about the current loss\n        memory: model memory after processing current data batch\n        bsize: batch size\n        \"\"\"\n        raise NotImplementedError()\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/engine.py",
    "content": "\"\"\"Defines the reinforcement learning `OnPolicyRLEngine`.\"\"\"\n\nimport datetime\nimport logging\nimport numbers\nimport os\nimport random\nimport time\nimport traceback\nfrom functools import partial\nfrom multiprocessing.context import BaseContext\nfrom typing import Any, Dict, List, Optional, Sequence, Union, cast\n\nimport filelock\nimport torch\nimport torch.distributed as dist  # type: ignore\nimport torch.distributions  # type: ignore\nimport torch.multiprocessing as mp  # type: ignore\nimport torch.nn as nn\nimport torch.optim as optim\n\n# noinspection PyProtectedMember\nfrom torch._C._distributed_c10d import ReduceOp\n\nfrom allenact.algorithms.onpolicy_sync.misc import TrackingInfo, TrackingInfoType\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.utils.misc_utils import str2bool\nfrom allenact.utils.model_utils import md5_hash_of_state_dict\n\ntry:\n    # noinspection PyProtectedMember,PyUnresolvedReferences\n    from torch.optim.lr_scheduler import _LRScheduler\nexcept (ImportError, ModuleNotFoundError):\n    raise ImportError(\"`_LRScheduler` was not found in `torch.optim.lr_scheduler`\")\n\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n)\nfrom allenact.algorithms.onpolicy_sync.policy import ActorCriticModel\nfrom allenact.algorithms.onpolicy_sync.storage import (\n    ExperienceStorage,\n    MiniBatchStorageMixin,\n    RolloutStorage,\n    StreamingStorageMixin,\n)\nfrom allenact.algorithms.onpolicy_sync.vector_sampled_tasks import (\n    COMPLETE_TASK_CALLBACK_KEY,\n    COMPLETE_TASK_METRICS_KEY,\n    SingleProcessVectorSampledTasks,\n    VectorSampledTasks,\n)\nfrom allenact.base_abstractions.distributions import TeacherForcingDistr\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams\nfrom allenact.base_abstractions.misc import (\n    ActorCriticOutput,\n    GenericAbstractLoss,\n    Memory,\n    RLStepResult,\n)\nfrom allenact.utils import spaces_utils as su\nfrom allenact.utils.experiment_utils import (\n    LoggingPackage,\n    PipelineStage,\n    ScalarMeanTracker,\n    StageComponent,\n    TrainingPipeline,\n    set_deterministic_cudnn,\n    set_seed,\n)\nfrom allenact.utils.system import get_logger\nfrom allenact.utils.tensor_utils import batch_observations, detach_recursively\nfrom allenact.utils.viz_utils import VizSuite\n\ntry:\n    # When debugging we don't want to timeout in the VectorSampledTasks\n\n    # noinspection PyPackageRequirements\n    import pydevd\n\n    DEBUGGING = str2bool(os.getenv(\"ALLENACT_DEBUG\", \"true\"))\nexcept ImportError:\n    DEBUGGING = str2bool(os.getenv(\"ALLENACT_DEBUG\", \"false\"))\n\nDEBUG_VST_TIMEOUT: Optional[int] = (lambda x: int(x) if x is not None else x)(\n    os.getenv(\"ALLENACT_DEBUG_VST_TIMEOUT\", None)\n)\n\nTRAIN_MODE_STR = \"train\"\nVALID_MODE_STR = \"valid\"\nTEST_MODE_STR = \"test\"\n\n\nclass OnPolicyRLEngine(object):\n    \"\"\"The reinforcement learning primary controller.\n\n    This `OnPolicyRLEngine` class handles all training, validation, and\n    testing as well as logging and checkpointing. You are not expected\n    to instantiate this class yourself, instead you should define an\n    experiment which will then be used to instantiate an\n    `OnPolicyRLEngine` and perform any desired tasks.\n    \"\"\"\n\n    def __init__(\n        self,\n        experiment_name: str,\n        config: ExperimentConfig,\n        results_queue: mp.Queue,  # to output aggregated results\n        checkpoints_queue: Optional[\n            mp.Queue\n        ],  # to write/read (trainer/evaluator) ready checkpoints\n        checkpoints_dir: str,\n        mode: str = \"train\",\n        callback_sensors: Optional[Sequence[Sensor]] = None,\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        mp_ctx: Optional[BaseContext] = None,\n        worker_id: int = 0,\n        num_workers: int = 1,\n        device: Union[str, torch.device, int] = \"cpu\",\n        distributed_ip: str = \"127.0.0.1\",\n        distributed_port: int = 0,\n        deterministic_agents: bool = False,\n        max_sampler_processes_per_worker: Optional[int] = None,\n        initial_model_state_dict: Optional[Union[Dict[str, Any], int]] = None,\n        try_restart_after_task_error: bool = False,\n        **kwargs,\n    ):\n        \"\"\"Initializer.\n\n        # Parameters\n\n        config : The ExperimentConfig defining the experiment to run.\n        output_dir : Root directory at which checkpoints and logs should be saved.\n        seed : Seed used to encourage deterministic behavior (it is difficult to ensure\n            completely deterministic behavior due to CUDA issues and nondeterminism\n            in environments).\n        mode : \"train\", \"valid\", or \"test\".\n        deterministic_cudnn : Whether to use deterministic cudnn. If `True` this may lower\n            training performance this is necessary (but not sufficient) if you desire\n            deterministic behavior.\n        extra_tag : An additional label to add to the experiment when saving tensorboard logs.\n        \"\"\"\n        self.config = config\n        self.results_queue = results_queue\n        self.checkpoints_queue = checkpoints_queue\n        self.mp_ctx = mp_ctx\n        self.checkpoints_dir = checkpoints_dir\n        self.worker_id = worker_id\n        self.num_workers = num_workers\n        self.device = torch.device(\"cpu\") if device == -1 else torch.device(device)  # type: ignore\n\n        if self.device != torch.device(\"cpu\"):\n            torch.cuda.set_device(device)\n\n        self.distributed_ip = distributed_ip\n        self.distributed_port = distributed_port\n        self.try_restart_after_task_error = try_restart_after_task_error\n\n        self.mode = mode.lower().strip()\n        assert self.mode in [\n            TRAIN_MODE_STR,\n            VALID_MODE_STR,\n            TEST_MODE_STR,\n        ], f\"Only {TRAIN_MODE_STR}, {VALID_MODE_STR}, {TEST_MODE_STR}, modes supported\"\n\n        self.callback_sensors = callback_sensors\n        self.deterministic_cudnn = deterministic_cudnn\n        if self.deterministic_cudnn:\n            set_deterministic_cudnn()\n\n        self.seed = seed\n        set_seed(self.seed)\n\n        self.experiment_name = experiment_name\n\n        assert (\n            max_sampler_processes_per_worker is None\n            or max_sampler_processes_per_worker >= 1\n        ), \"`max_sampler_processes_per_worker` must be either `None` or a positive integer.\"\n        self.max_sampler_processes_per_worker = max_sampler_processes_per_worker\n\n        machine_params = config.machine_params(self.mode)\n        self.machine_params: MachineParams\n        if isinstance(machine_params, MachineParams):\n            self.machine_params = machine_params\n        else:\n            self.machine_params = MachineParams(**machine_params)\n\n        self.num_samplers_per_worker = self.machine_params.nprocesses\n        self.num_samplers = self.num_samplers_per_worker[self.worker_id]\n\n        self._vector_tasks: Optional[\n            Union[VectorSampledTasks, SingleProcessVectorSampledTasks]\n        ] = None\n\n        self.sensor_preprocessor_graph = None\n        self.actor_critic: Optional[ActorCriticModel] = None\n\n        create_model_kwargs = {}\n        if self.machine_params.sensor_preprocessor_graph is not None:\n            self.sensor_preprocessor_graph = (\n                self.machine_params.sensor_preprocessor_graph.to(self.device)\n            )\n            create_model_kwargs[\"sensor_preprocessor_graph\"] = (\n                self.sensor_preprocessor_graph\n            )\n\n        set_seed(self.seed)\n        self.actor_critic = cast(\n            ActorCriticModel,\n            self.config.create_model(**create_model_kwargs),\n        ).to(self.device)\n\n        if initial_model_state_dict is not None:\n            if isinstance(initial_model_state_dict, int):\n                assert (\n                    md5_hash_of_state_dict(self.actor_critic.state_dict())\n                    == initial_model_state_dict\n                ), (\n                    f\"Could not reproduce the correct model state dict on worker {self.worker_id} despite seeding.\"\n                    f\" Please ensure that your model's initialization is reproducable when `set_seed(...)`\"\n                    f\"] has been called with a fixed seed before initialization.\"\n                )\n            else:\n                self.actor_critic.load_state_dict(\n                    state_dict=cast(\n                        \"OrderedDict[str, Tensor]\", initial_model_state_dict\n                    )\n                )\n        else:\n            assert mode != TRAIN_MODE_STR or self.num_workers == 1, (\n                \"When training with multiple workers you must pass a,\"\n                \" non-`None` value for the `initial_model_state_dict` argument.\"\n            )\n\n        if get_logger().level == logging.DEBUG:\n            model_hash = md5_hash_of_state_dict(self.actor_critic.state_dict())\n            get_logger().debug(\n                f\"[{self.mode} worker {self.worker_id}] model weights hash: {model_hash}\"\n            )\n\n        self.is_distributed = False\n        self.store: Optional[torch.distributed.TCPStore] = None  # type:ignore\n        if self.num_workers > 1:\n            self.store = torch.distributed.TCPStore(  # type:ignore\n                host_name=self.distributed_ip,\n                port=self.distributed_port,\n                world_size=self.num_workers,\n                is_master=self.worker_id == 0,\n                timeout=datetime.timedelta(\n                    seconds=3 * (DEBUG_VST_TIMEOUT if DEBUGGING else 1 * 60) + 300\n                ),\n            )\n            cpu_device = self.device == torch.device(\"cpu\")  # type:ignore\n\n            # \"gloo\" required during testing to ensure that `barrier()` doesn't time out.\n            backend = \"gloo\" if cpu_device or self.mode == TEST_MODE_STR else \"nccl\"\n            get_logger().debug(\n                f\"Worker {self.worker_id}: initializing distributed {backend} backend with device {self.device}.\"\n            )\n            dist.init_process_group(  # type:ignore\n                backend=backend,\n                store=self.store,\n                rank=self.worker_id,\n                world_size=self.num_workers,\n                # During testing, we sometimes found that default timeout was too short\n                # resulting in the run terminating surprisingly, we increase it here.\n                timeout=(\n                    datetime.timedelta(minutes=3000)\n                    if (self.mode == TEST_MODE_STR or DEBUGGING)\n                    else dist.default_pg_timeout\n                ),\n            )\n            self.is_distributed = True\n\n        self.deterministic_agents = deterministic_agents\n\n        self._is_closing: bool = (\n            False  # Useful for letting the RL runner know if this is closing\n        )\n        self._is_closed: bool = False\n\n        # Keeping track of metrics and losses during training/inference\n        self.single_process_metrics: List = []\n        self.single_process_task_callback_data: List = []\n        self.tracking_info_list: List[TrackingInfo] = []\n\n        # Variables that wil only be instantiated in the trainer\n        self.optimizer: Optional[optim.optimizer.Optimizer] = None\n        # noinspection PyProtectedMember\n        self.lr_scheduler: Optional[_LRScheduler] = None\n        self.insufficient_data_for_update: Optional[torch.distributed.PrefixStore] = (\n            None\n        )\n\n        # Training pipeline will be instantiated during training and inference.\n        # During inference however, it will be instantiated anew on each run of `run_eval`\n        # and will be set to `None` after the eval run is complete.\n        self.training_pipeline: Optional[TrainingPipeline] = None\n\n    @property\n    def vector_tasks(\n        self,\n    ) -> Union[VectorSampledTasks, SingleProcessVectorSampledTasks]:\n        if self._vector_tasks is None and self.num_samplers > 0:\n            if self.is_distributed:\n                total_processes = sum(\n                    self.num_samplers_per_worker\n                )  # TODO this will break the fixed seed for multi-device test\n            else:\n                total_processes = self.num_samplers\n\n            seeds = self.worker_seeds(\n                total_processes,\n                initial_seed=self.seed,  # do not update the RNG state (creation might happen after seed resetting)\n            )\n\n            # TODO: The `self.max_sampler_processes_per_worker == 1` case below would be\n            #   great to have but it does not play nicely with us wanting to kill things\n            #   using SIGTERM/SIGINT signals. Would be nice to figure out a solution to\n            #   this at some point.\n            # if self.max_sampler_processes_per_worker == 1:\n            #     # No need to instantiate a new task sampler processes if we're\n            #     # restricted to one sampler process for this worker.\n            #     self._vector_tasks = SingleProcessVectorSampledTasks(\n            #         make_sampler_fn=self.config.make_sampler_fn,\n            #         sampler_fn_args_list=self.get_sampler_fn_args(seeds),\n            #     )\n            # else:\n            self._vector_tasks = VectorSampledTasks(\n                make_sampler_fn=self.config.make_sampler_fn,\n                sampler_fn_args=self.get_sampler_fn_args(seeds),\n                callback_sensors=self.callback_sensors,\n                multiprocessing_start_method=(\n                    \"forkserver\" if self.mp_ctx is None else None\n                ),\n                mp_ctx=self.mp_ctx,\n                max_processes=self.max_sampler_processes_per_worker,\n                read_timeout=DEBUG_VST_TIMEOUT if DEBUGGING else 1 * 60,\n            )\n        return self._vector_tasks\n\n    @staticmethod\n    def worker_seeds(nprocesses: int, initial_seed: Optional[int]) -> List[int]:\n        \"\"\"Create a collection of seeds for workers without modifying the RNG\n        state.\"\"\"\n        rstate = None  # type:ignore\n        if initial_seed is not None:\n            rstate = random.getstate()\n            random.seed(initial_seed)\n        seeds = [random.randint(0, (2**31) - 1) for _ in range(nprocesses)]\n        if initial_seed is not None:\n            random.setstate(rstate)\n        return seeds\n\n    def get_sampler_fn_args(self, seeds: Optional[List[int]] = None):\n        sampler_devices = self.machine_params.sampler_devices\n\n        if self.mode == TRAIN_MODE_STR:\n            fn = self.config.train_task_sampler_args\n        elif self.mode == VALID_MODE_STR:\n            fn = self.config.valid_task_sampler_args\n        elif self.mode == TEST_MODE_STR:\n            fn = self.config.test_task_sampler_args\n        else:\n            raise NotImplementedError(\n                f\"self.mode must be one of {TRAIN_MODE_STR}, {VALID_MODE_STR}, or {TEST_MODE_STR}.\"\n            )\n\n        if self.is_distributed:\n            total_processes = sum(self.num_samplers_per_worker)\n            process_offset = sum(self.num_samplers_per_worker[: self.worker_id])\n        else:\n            total_processes = self.num_samplers\n            process_offset = 0\n\n        sampler_devices_as_ints: Optional[List[int]] = None\n        if (\n            self.is_distributed or self.mode == TEST_MODE_STR\n        ) and self.device.index is not None:\n            sampler_devices_as_ints = [self.device.index]\n        elif sampler_devices is not None:\n            sampler_devices_as_ints = [\n                -1 if sd.index is None else sd.index for sd in sampler_devices\n            ]\n\n        return [\n            fn(\n                process_ind=process_offset + it,\n                total_processes=total_processes,\n                devices=sampler_devices_as_ints,\n                seeds=seeds,\n            )\n            for it in range(self.num_samplers)\n        ]\n\n    def checkpoint_load(\n        self, ckpt: Union[str, Dict[str, Any]], restart_pipeline: bool\n    ) -> Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]]:\n        if isinstance(ckpt, str):\n            get_logger().info(\n                f\"[{self.mode} worker {self.worker_id}] Loading checkpoint from {ckpt}\"\n            )\n            # Map location CPU is almost always better than mapping to a CUDA device.\n            ckpt = torch.load(os.path.abspath(ckpt), map_location=\"cpu\")\n\n        ckpt = cast(\n            Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]],\n            ckpt,\n        )\n\n        self.actor_critic.load_state_dict(ckpt[\"model_state_dict\"])  # type:ignore\n\n        if \"training_pipeline_state_dict\" in ckpt and not restart_pipeline:\n            self.training_pipeline.load_state_dict(\n                cast(Dict[str, Any], ckpt[\"training_pipeline_state_dict\"])\n            )\n\n        return ckpt\n\n    # aggregates task metrics currently in queue\n    def aggregate_task_metrics(\n        self,\n        logging_pkg: LoggingPackage,\n        num_tasks: int = -1,\n    ) -> LoggingPackage:\n        if num_tasks > 0:\n            if len(self.single_process_metrics) != num_tasks:\n                error_msg = (\n                    \"shorter\"\n                    if len(self.single_process_metrics) < num_tasks\n                    else \"longer\"\n                )\n                get_logger().error(\n                    f\"Metrics out is {error_msg} than expected number of tasks.\"\n                    \" This should only happen if a positive number of `num_tasks` were\"\n                    \" set during testing but the queue did not contain this number of entries.\"\n                    \" Please file an issue at https://github.com/allenai/allenact/issues.\"\n                )\n\n        num_empty_tasks_dequeued = 0\n\n        for metrics_dict in self.single_process_metrics:\n            num_empty_tasks_dequeued += not logging_pkg.add_metrics_dict(\n                single_task_metrics_dict=metrics_dict\n            )\n\n        self.single_process_metrics = []\n\n        if num_empty_tasks_dequeued != 0:\n            get_logger().warning(\n                f\"Discarded {num_empty_tasks_dequeued} empty task metrics\"\n            )\n\n        return logging_pkg\n\n    def _preprocess_observations(self, batched_observations):\n        if self.sensor_preprocessor_graph is None:\n            return batched_observations\n        return self.sensor_preprocessor_graph.get_observations(batched_observations)\n\n    def remove_paused(self, observations):\n        paused, keep, running = [], [], []\n        for it, obs in enumerate(observations):\n            if obs is None:\n                paused.append(it)\n            else:\n                keep.append(it)\n                running.append(obs)\n\n        for p in reversed(paused):\n            self.vector_tasks.pause_at(p)\n\n        # Group samplers along new dim:\n        batch = batch_observations(running, device=self.device)\n\n        return len(paused), keep, batch\n\n    def initialize_storage_and_viz(\n        self,\n        storage_to_initialize: Optional[Sequence[ExperienceStorage]],\n        visualizer: Optional[VizSuite] = None,\n    ):\n\n        keep: Optional[List] = None\n        if visualizer is not None or (\n            storage_to_initialize is not None\n            and any(isinstance(s, RolloutStorage) for s in storage_to_initialize)\n        ):\n            # No rollout storage, thus we are not\n            observations = self.vector_tasks.get_observations()\n\n            npaused, keep, batch = self.remove_paused(observations)\n            observations = (\n                self._preprocess_observations(batch) if len(keep) > 0 else batch\n            )\n\n            assert npaused == 0, f\"{npaused} samplers are paused during initialization.\"\n\n            num_samplers = len(keep)\n        else:\n            observations = {}\n            num_samplers = 0\n            npaused = 0\n\n        recurrent_memory_specification = (\n            self.actor_critic.recurrent_memory_specification\n        )\n\n        if storage_to_initialize is not None:\n            for s in storage_to_initialize:\n                s.to(self.device)\n                s.set_partition(index=self.worker_id, num_parts=self.num_workers)\n                s.initialize(\n                    observations=observations,\n                    num_samplers=num_samplers,\n                    recurrent_memory_specification=recurrent_memory_specification,\n                    action_space=self.actor_critic.action_space,\n                )\n\n        if visualizer is not None and num_samplers > 0:\n            visualizer.collect(vector_task=self.vector_tasks, alive=keep)\n\n        return npaused\n\n    @property\n    def num_active_samplers(self):\n        if self.vector_tasks is None:\n            return 0\n        return self.vector_tasks.num_unpaused_tasks\n\n    def act(\n        self,\n        rollout_storage: RolloutStorage,\n        dist_wrapper_class: Optional[type] = None,\n    ):\n        with torch.no_grad():\n            agent_input = rollout_storage.agent_input_for_next_step()\n            actor_critic_output, memory = self.actor_critic(**agent_input)\n\n            distr = actor_critic_output.distributions\n            if dist_wrapper_class is not None:\n                distr = dist_wrapper_class(distr=distr, obs=agent_input[\"observations\"])\n\n            actions = distr.sample() if not self.deterministic_agents else distr.mode()\n\n        return actions, actor_critic_output, memory, agent_input[\"observations\"]\n\n    def aggregate_and_send_logging_package(\n        self,\n        tracking_info_list: List[TrackingInfo],\n        logging_pkg: Optional[LoggingPackage] = None,\n        send_logging_package: bool = True,\n        checkpoint_file_name: Optional[str] = None,\n    ):\n        if logging_pkg is None:\n            logging_pkg = LoggingPackage(\n                mode=self.mode,\n                training_steps=self.training_pipeline.total_steps,\n                pipeline_stage=self.training_pipeline.current_stage_index,\n                storage_uuid_to_total_experiences=self.training_pipeline.storage_uuid_to_total_experiences,\n                checkpoint_file_name=checkpoint_file_name,\n            )\n\n        self.aggregate_task_metrics(logging_pkg=logging_pkg)\n\n        for callback_dict in self.single_process_task_callback_data:\n            logging_pkg.task_callback_data.append(callback_dict)\n        self.single_process_task_callback_data = []\n\n        for tracking_info in tracking_info_list:\n            if tracking_info.n < 0:\n                get_logger().warning(\n                    f\"Obtained a train_info_dict with {tracking_info.n} elements.\"\n                    f\" Full info: ({tracking_info.type}, {tracking_info.info}, {tracking_info.n}).\"\n                )\n            else:\n                tracking_info_dict = tracking_info.info\n\n                if tracking_info.type == TrackingInfoType.LOSS:\n                    tracking_info_dict = {\n                        f\"losses/{k}\": v for k, v in tracking_info_dict.items()\n                    }\n\n                logging_pkg.add_info_dict(\n                    info_dict=tracking_info_dict,\n                    n=tracking_info.n,\n                    stage_component_uuid=tracking_info.stage_component_uuid,\n                    storage_uuid=tracking_info.storage_uuid,\n                )\n\n        if send_logging_package:\n            self.results_queue.put(logging_pkg)\n\n        return logging_pkg\n\n    @staticmethod\n    def _active_memory(memory, keep):\n        return memory.sampler_select(keep) if memory is not None else memory\n\n    def probe(self, dones: List[bool], npaused, period=100000):\n        \"\"\"Debugging util. When called from\n        self.collect_step_across_all_task_samplers(...), calls render for the\n        0-th task sampler of the 0-th distributed worker for the first\n        beginning episode spaced at least period steps from the beginning of\n        the previous one.\n\n        For valid, train, it currently renders all episodes for the 0-th task sampler of the\n        0-th distributed worker. If this is not wanted, it must be hard-coded for now below.\n\n        # Parameters\n\n        dones : dones list from self.collect_step_across_all_task_samplers(...)\n        npaused : number of newly paused tasks returned by self.removed_paused(...)\n        period : minimal spacing in sampled steps between the beginning of episodes to be shown.\n        \"\"\"\n        sampler_id = 0\n        done = dones[sampler_id]\n        if self.mode != TRAIN_MODE_STR:\n            setattr(\n                self, \"_probe_npaused\", getattr(self, \"_probe_npaused\", 0) + npaused\n            )\n            if self._probe_npaused == self.num_samplers:  # type:ignore\n                del self._probe_npaused  # type:ignore\n                return\n            period = 0\n        if self.worker_id == 0:\n            if done:\n                if period > 0 and (\n                    getattr(self, \"_probe_steps\", None) is None\n                    or (\n                        self._probe_steps < 0  # type:ignore\n                        and (\n                            self.training_pipeline.total_steps\n                            + self._probe_steps  # type:ignore\n                        )\n                        >= period\n                    )\n                ):\n                    self._probe_steps = self.training_pipeline.total_steps\n            if period == 0 or (\n                getattr(self, \"_probe_steps\", None) is not None\n                and self._probe_steps >= 0\n                and ((self.training_pipeline.total_steps - self._probe_steps) < period)\n            ):\n                if (\n                    period == 0\n                    or not done\n                    or self._probe_steps == self.training_pipeline.total_steps\n                ):\n                    self.vector_tasks.call_at(sampler_id, \"render\", [\"human\"])\n                else:\n                    # noinspection PyAttributeOutsideInit\n                    self._probe_steps = -self._probe_steps\n\n    def collect_step_across_all_task_samplers(\n        self,\n        rollout_storage_uuid: str,\n        uuid_to_storage: Dict[str, ExperienceStorage],\n        visualizer=None,\n        dist_wrapper_class=None,\n    ) -> int:\n        rollout_storage = cast(RolloutStorage, uuid_to_storage[rollout_storage_uuid])\n        actions, actor_critic_output, memory, _ = self.act(\n            rollout_storage=rollout_storage,\n            dist_wrapper_class=dist_wrapper_class,\n        )\n\n        # Flatten actions\n        flat_actions = su.flatten(self.actor_critic.action_space, actions)\n\n        assert len(flat_actions.shape) == 3, (\n            \"Distribution samples must include step and task sampler dimensions [step, sampler, ...]. The simplest way\"\n            \"to accomplish this is to pass param tensors (like `logits` in a `CategoricalDistr`) with these dimensions\"\n            \"to the Distribution.\"\n        )\n\n        # Convert flattened actions into list of actions and send them\n        outputs: List[RLStepResult] = self.vector_tasks.step(\n            su.action_list(self.actor_critic.action_space, flat_actions)\n        )\n\n        # Save after task completion metrics\n        for step_result in outputs:\n            if step_result.info is not None:\n                if COMPLETE_TASK_METRICS_KEY in step_result.info:\n                    self.single_process_metrics.append(\n                        step_result.info[COMPLETE_TASK_METRICS_KEY]\n                    )\n                    del step_result.info[COMPLETE_TASK_METRICS_KEY]\n                if COMPLETE_TASK_CALLBACK_KEY in step_result.info:\n                    self.single_process_task_callback_data.append(\n                        step_result.info[COMPLETE_TASK_CALLBACK_KEY]\n                    )\n                    del step_result.info[COMPLETE_TASK_CALLBACK_KEY]\n\n        rewards: Union[List, torch.Tensor]\n        observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]\n\n        rewards = torch.tensor(\n            rewards,\n            dtype=torch.float,\n            device=self.device,  # type:ignore\n        )\n\n        # We want rewards to have dimensions [sampler, reward]\n        if len(rewards.shape) == 1:\n            # Rewards are of shape [sampler,]\n            rewards = rewards.unsqueeze(-1)\n        elif len(rewards.shape) > 1:\n            raise NotImplementedError()\n\n        # If done then clean the history of observations.\n        masks = (\n            1.0\n            - torch.tensor(\n                dones,\n                dtype=torch.float32,\n                device=self.device,  # type:ignore\n            )\n        ).view(\n            -1, 1\n        )  # [sampler, 1]\n\n        npaused, keep, batch = self.remove_paused(observations)\n\n        if hasattr(self.actor_critic, \"sampler_select\"):\n            self.actor_critic.sampler_select(keep)\n\n        # TODO self.probe(...) can be useful for debugging (we might want to control it from main?)\n        # self.probe(dones, npaused)\n\n        if npaused > 0:\n            if self.mode == TRAIN_MODE_STR:\n                raise NotImplementedError(\n                    \"When trying to get a new task from a task sampler (using the `.next_task()` method)\"\n                    \" the task sampler returned `None`. This is not currently supported during training\"\n                    \" (and almost certainly a bug in the implementation of the task sampler or in the \"\n                    \" initialization of the task sampler for training).\"\n                )\n\n            for s in uuid_to_storage.values():\n                if isinstance(s, RolloutStorage):\n                    s.sampler_select(keep)\n\n        to_add_to_storage = dict(\n            observations=(\n                self._preprocess_observations(batch) if len(keep) > 0 else batch\n            ),\n            memory=self._active_memory(memory, keep),\n            actions=flat_actions[0, keep],\n            action_log_probs=actor_critic_output.distributions.log_prob(actions)[\n                0, keep\n            ],\n            value_preds=actor_critic_output.values[0, keep],\n            rewards=rewards[keep],\n            masks=masks[keep],\n        )\n        for storage in uuid_to_storage.values():\n            storage.add(**to_add_to_storage)\n\n        # TODO we always miss tensors for the last action in the last episode of each worker\n        if visualizer is not None:\n            if len(keep) > 0:\n                visualizer.collect(\n                    rollout=rollout_storage,\n                    vector_task=self.vector_tasks,\n                    alive=keep,\n                    actor_critic=actor_critic_output,\n                )\n            else:\n                visualizer.collect(actor_critic=actor_critic_output)\n\n        return npaused\n\n    def distributed_weighted_sum(\n        self,\n        to_share: Union[torch.Tensor, float, int],\n        weight: Union[torch.Tensor, float, int],\n    ):\n        \"\"\"Weighted sum of scalar across distributed workers.\"\"\"\n        if self.is_distributed:\n            aggregate = torch.tensor(to_share * weight).to(self.device)\n            dist.all_reduce(aggregate)\n            return aggregate.item()\n        else:\n            if abs(1 - weight) > 1e-5:\n                get_logger().warning(\n                    f\"Scaling non-distributed value with weight {weight}\"\n                )\n            return torch.tensor(to_share * weight).item()\n\n    def distributed_reduce(\n        self, to_share: Union[torch.Tensor, float, int], op: ReduceOp\n    ):\n        \"\"\"Weighted sum of scalar across distributed workers.\"\"\"\n        if self.is_distributed:\n            aggregate = torch.tensor(to_share).to(self.device)\n            dist.all_reduce(aggregate, op=op)\n            return aggregate.item()\n        else:\n            return torch.tensor(to_share).item()\n\n    def backprop_step(\n        self,\n        total_loss: torch.Tensor,\n        max_grad_norm: float,\n        local_to_global_batch_size_ratio: float = 1.0,\n    ):\n        raise NotImplementedError\n\n    def save_error_data(self, batch: Dict[str, Any]):\n        raise NotImplementedError\n\n    @property\n    def step_count(self) -> int:\n        if (\n            self.training_pipeline.current_stage is None\n        ):  # Might occur during testing when all stages are complete\n            return 0\n        return self.training_pipeline.current_stage.steps_taken_in_stage\n\n    def compute_losses_track_them_and_backprop(\n        self,\n        stage: PipelineStage,\n        stage_component: StageComponent,\n        storage: ExperienceStorage,\n        skip_backprop: bool = False,\n    ):\n        training = self.mode == TRAIN_MODE_STR\n\n        assert training or skip_backprop\n\n        if training and self.is_distributed:\n            self.insufficient_data_for_update.set(\n                \"insufficient_data_for_update\", str(0)\n            )\n            dist.barrier(\n                device_ids=(\n                    None if self.device == torch.device(\"cpu\") else [self.device.index]\n                )\n            )\n\n        training_settings = stage_component.training_settings\n\n        loss_names = stage_component.loss_names\n        losses = [self.training_pipeline.get_loss(ln) for ln in loss_names]\n        loss_weights = [stage.uuid_to_loss_weight[ln] for ln in loss_names]\n        loss_update_repeats_list = training_settings.update_repeats\n        if isinstance(loss_update_repeats_list, numbers.Integral):\n            loss_update_repeats_list = [loss_update_repeats_list] * len(loss_names)\n\n        if skip_backprop and isinstance(storage, MiniBatchStorageMixin):\n            if loss_update_repeats_list != [1] * len(loss_names):\n                loss_update_repeats_list = [1] * len(loss_names)\n                get_logger().warning(\n                    \"Does not make sense to do multiple updates when\"\n                    \" skip_backprop is `True` and you are using a storage of type\"\n                    \" `MiniBatchStorageMixin`. This is likely a problem caused by\"\n                    \" using a custom valid/test stage component that is inheriting its\"\n                    \" TrainingSettings from the TrainingPipeline's TrainingSettings. We will override\"\n                    \" the requested number of updates repeats (which was\"\n                    f\" {dict(zip(loss_names, loss_update_repeats_list))}) to be 1 for all losses.\"\n                )\n\n        enough_data_for_update = True\n        for current_update_repeat_index in range(\n            max(loss_update_repeats_list, default=0)\n        ):\n            if isinstance(storage, MiniBatchStorageMixin):\n                batch_iterator = storage.batched_experience_generator(\n                    num_mini_batch=training_settings.num_mini_batch\n                )\n            elif isinstance(storage, StreamingStorageMixin):\n                assert (\n                    training_settings.num_mini_batch is None\n                    or training_settings.num_mini_batch == 1\n                )\n\n                def single_batch_generator(streaming_storage: StreamingStorageMixin):\n                    try:\n                        yield cast(\n                            StreamingStorageMixin, streaming_storage\n                        ).next_batch()\n                    except EOFError:\n                        if not training:\n                            raise\n\n                        if streaming_storage.empty():\n                            yield None\n                        else:\n                            cast(\n                                StreamingStorageMixin, streaming_storage\n                            ).reset_stream()\n                            stage.stage_component_uuid_to_stream_memory[\n                                stage_component.uuid\n                            ].clear()\n                            yield cast(\n                                StreamingStorageMixin, streaming_storage\n                            ).next_batch()\n\n                batch_iterator = single_batch_generator(streaming_storage=storage)\n            else:\n                raise NotImplementedError(\n                    f\"Storage {storage} must be a subclass of `MiniBatchStorageMixin` or `StreamingStorageMixin`.\"\n                )\n\n            for batch in batch_iterator:\n                if batch is None:\n                    # This should only happen in a `StreamingStorageMixin` when it cannot\n                    # generate an initial batch or when we are in testing/validation and\n                    # we've reached the end of the dataset over which to test/validate.\n                    if training:\n                        assert isinstance(storage, StreamingStorageMixin)\n                        get_logger().warning(\n                            f\"Worker {self.worker_id}: could not run update in {storage}, potentially because\"\n                            f\" not enough data has been accumulated to be able to fill an initial batch.\"\n                        )\n                    else:\n                        pass\n                    enough_data_for_update = False\n\n                if training and self.is_distributed:\n                    self.insufficient_data_for_update.add(\n                        \"insufficient_data_for_update\",\n                        1 * (not enough_data_for_update),\n                    )\n                    dist.barrier(\n                        device_ids=(\n                            None\n                            if self.device == torch.device(\"cpu\")\n                            else [self.device.index]\n                        )\n                    )\n\n                    if (\n                        int(\n                            self.insufficient_data_for_update.get(\n                                \"insufficient_data_for_update\"\n                            )\n                        )\n                        != 0\n                    ):\n                        enough_data_for_update = False\n                        break\n\n                info: Dict[str, float] = {}\n\n                bsize: Optional[int] = None\n                total_loss: Optional[torch.Tensor] = None\n                actor_critic_output_for_batch: Optional[ActorCriticOutput] = None\n                batch_memory = Memory()\n\n                for loss, loss_name, loss_weight, max_update_repeats_for_loss in zip(\n                    losses, loss_names, loss_weights, loss_update_repeats_list\n                ):\n                    if current_update_repeat_index >= max_update_repeats_for_loss:\n                        continue\n\n                    if isinstance(loss, AbstractActorCriticLoss):\n                        bsize = batch[\"bsize\"]\n\n                        if actor_critic_output_for_batch is None:\n\n                            try:\n                                actor_critic_output_for_batch, _ = self.actor_critic(\n                                    observations=batch[\"observations\"],\n                                    memory=batch[\"memory\"],\n                                    prev_actions=batch[\"prev_actions\"],\n                                    masks=batch[\"masks\"],\n                                )\n                            except ValueError:\n                                save_path = self.save_error_data(batch=batch)\n                                get_logger().error(\n                                    f\"Encountered a value error! Likely because of nans in the output/input.\"\n                                    f\" Saving all error information to {save_path}.\"\n                                )\n                                raise\n\n                        loss_return = loss.loss(\n                            step_count=self.step_count,\n                            batch=batch,\n                            actor_critic_output=actor_critic_output_for_batch,\n                        )\n\n                        per_epoch_info = {}\n                        if len(loss_return) == 2:\n                            current_loss, current_info = loss_return\n                        elif len(loss_return) == 3:\n                            current_loss, current_info, per_epoch_info = loss_return\n                        else:\n                            raise NotImplementedError\n\n                    elif isinstance(loss, GenericAbstractLoss):\n                        loss_output = loss.loss(\n                            model=self.actor_critic,\n                            batch=batch,\n                            batch_memory=batch_memory,\n                            stream_memory=stage.stage_component_uuid_to_stream_memory[\n                                stage_component.uuid\n                            ],\n                        )\n                        current_loss = loss_output.value\n                        current_info = loss_output.info\n                        per_epoch_info = loss_output.per_epoch_info\n                        batch_memory = loss_output.batch_memory\n                        stage.stage_component_uuid_to_stream_memory[\n                            stage_component.uuid\n                        ] = loss_output.stream_memory\n                        bsize = loss_output.bsize\n                    else:\n                        raise NotImplementedError(\n                            f\"Loss of type {type(loss)} is not supported. Losses must be subclasses of\"\n                            f\" `AbstractActorCriticLoss` or `GenericAbstractLoss`.\"\n                        )\n\n                    if total_loss is None:\n                        total_loss = loss_weight * current_loss\n                    else:\n                        total_loss = total_loss + loss_weight * current_loss\n\n                    for key, value in current_info.items():\n                        info[f\"{loss_name}/{key}\"] = value\n\n                    if per_epoch_info is not None:\n                        for key, value in per_epoch_info.items():\n                            if max(loss_update_repeats_list, default=0) > 1:\n                                info[\n                                    f\"{loss_name}/{key}_epoch{current_update_repeat_index:02d}\"\n                                ] = value\n                                info[f\"{loss_name}/{key}_combined\"] = value\n                            else:\n                                info[f\"{loss_name}/{key}\"] = value\n\n                assert total_loss is not None, (\n                    f\"No {stage_component.uuid} losses specified for training in stage\"\n                    f\" {self.training_pipeline.current_stage_index}\"\n                )\n\n                total_loss_scalar = total_loss.item()\n                info[f\"total_loss\"] = total_loss_scalar\n\n                self.tracking_info_list.append(\n                    TrackingInfo(\n                        type=TrackingInfoType.LOSS,\n                        info=info,\n                        n=bsize,\n                        storage_uuid=stage_component.storage_uuid,\n                        stage_component_uuid=stage_component.uuid,\n                    )\n                )\n\n                to_track = {\n                    \"rollout_epochs\": max(loss_update_repeats_list, default=0),\n                    \"worker_batch_size\": bsize,\n                }\n\n                aggregate_bsize = None\n                if training:\n                    aggregate_bsize = self.distributed_weighted_sum(bsize, 1)\n                    to_track[\"global_batch_size\"] = aggregate_bsize\n                    to_track[\"lr\"] = self.optimizer.param_groups[0][\"lr\"]\n\n                if training_settings.num_mini_batch is not None:\n                    to_track[\"rollout_num_mini_batch\"] = (\n                        training_settings.num_mini_batch\n                    )\n\n                for k, v in to_track.items():\n                    # We need to set the bsize to 1 for `worker_batch_size` below as we're trying to record the\n                    # average batch size per worker, not the average per worker weighted by the size of the batches\n                    # of those workers.\n                    self.tracking_info_list.append(\n                        TrackingInfo(\n                            type=TrackingInfoType.UPDATE_INFO,\n                            info={k: v},\n                            n=1 if k == \"worker_batch_size\" else bsize,\n                            storage_uuid=stage_component.storage_uuid,\n                            stage_component_uuid=stage_component.uuid,\n                        )\n                    )\n\n                if not skip_backprop:\n                    total_grad_norm = self.backprop_step(\n                        total_loss=total_loss,\n                        max_grad_norm=training_settings.max_grad_norm,\n                        local_to_global_batch_size_ratio=bsize / aggregate_bsize,\n                    )\n                    self.tracking_info_list.append(\n                        TrackingInfo(\n                            type=TrackingInfoType.UPDATE_INFO,\n                            info={\"total_grad_norm\": total_grad_norm},\n                            n=bsize,\n                            storage_uuid=stage_component.storage_uuid,\n                            stage_component_uuid=stage_component.uuid,\n                        )\n                    )\n\n                stage.stage_component_uuid_to_stream_memory[stage_component.uuid] = (\n                    detach_recursively(\n                        input=stage.stage_component_uuid_to_stream_memory[\n                            stage_component.uuid\n                        ],\n                        inplace=True,\n                    )\n                )\n\n    def close(self, verbose=True):\n        self._is_closing = True\n\n        if \"_is_closed\" in self.__dict__ and self._is_closed:\n            return\n\n        def logif(s: Union[str, Exception]):\n            if verbose:\n                if isinstance(s, str):\n                    get_logger().info(s)\n                elif isinstance(s, Exception):\n                    get_logger().error(traceback.format_exc())\n                else:\n                    raise NotImplementedError()\n\n        if \"_vector_tasks\" in self.__dict__ and self._vector_tasks is not None:\n            try:\n                logif(\n                    f\"[{self.mode} worker {self.worker_id}] Closing OnPolicyRLEngine.vector_tasks.\"\n                )\n                self._vector_tasks.close()\n                logif(f\"[{self.mode} worker {self.worker_id}] Closed.\")\n            except Exception as e:\n                logif(\n                    f\"[{self.mode} worker {self.worker_id}] Exception raised when closing OnPolicyRLEngine.vector_tasks:\"\n                )\n                logif(e)\n\n        self._is_closed = True\n        self._is_closing = False\n\n    @property\n    def is_closed(self):\n        return self._is_closed\n\n    @property\n    def is_closing(self):\n        return self._is_closing\n\n    def __del__(self):\n        self.close(verbose=False)\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        self.close(verbose=False)\n\n\nclass OnPolicyTrainer(OnPolicyRLEngine):\n    def __init__(\n        self,\n        experiment_name: str,\n        config: ExperimentConfig,\n        results_queue: mp.Queue,\n        checkpoints_queue: Optional[mp.Queue],\n        checkpoints_dir: str = \"\",\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        mp_ctx: Optional[BaseContext] = None,\n        worker_id: int = 0,\n        num_workers: int = 1,\n        device: Union[str, torch.device, int] = \"cpu\",\n        distributed_ip: str = \"127.0.0.1\",\n        distributed_port: int = 0,\n        deterministic_agents: bool = False,\n        distributed_preemption_threshold: float = 0.7,\n        max_sampler_processes_per_worker: Optional[int] = None,\n        save_ckpt_after_every_pipeline_stage: bool = True,\n        first_local_worker_id: int = 0,\n        save_ckpt_at_every_host: bool = False,\n        **kwargs,\n    ):\n        kwargs[\"mode\"] = TRAIN_MODE_STR\n        super().__init__(\n            experiment_name=experiment_name,\n            config=config,\n            results_queue=results_queue,\n            checkpoints_queue=checkpoints_queue,\n            checkpoints_dir=checkpoints_dir,\n            seed=seed,\n            deterministic_cudnn=deterministic_cudnn,\n            mp_ctx=mp_ctx,\n            worker_id=worker_id,\n            num_workers=num_workers,\n            device=device,\n            distributed_ip=distributed_ip,\n            distributed_port=distributed_port,\n            deterministic_agents=deterministic_agents,\n            max_sampler_processes_per_worker=max_sampler_processes_per_worker,\n            **kwargs,\n        )\n\n        self.save_ckpt_after_every_pipeline_stage = save_ckpt_after_every_pipeline_stage\n\n        self.actor_critic.train()\n\n        self.training_pipeline: TrainingPipeline = config.training_pipeline()\n\n        if self.num_workers != 1:\n            # Ensure that we're only using early stopping criterions in the non-distributed setting.\n            if any(\n                stage.early_stopping_criterion is not None\n                for stage in self.training_pipeline.pipeline_stages\n            ):\n                raise NotImplementedError(\n                    \"Early stopping criterions are currently only allowed when using a single training worker, i.e.\"\n                    \" no distributed (multi-GPU) training. If this is a feature you'd like please create an issue\"\n                    \" at https://github.com/allenai/allenact/issues or (even better) create a pull request with this \"\n                    \" feature and we'll be happy to review it.\"\n                )\n\n        self.optimizer: optim.optimizer.Optimizer = (\n            self.training_pipeline.optimizer_builder(\n                params=[p for p in self.actor_critic.parameters() if p.requires_grad]\n            )\n        )\n\n        # noinspection PyProtectedMember\n        self.lr_scheduler: Optional[_LRScheduler] = None\n        if self.training_pipeline.lr_scheduler_builder is not None:\n            self.lr_scheduler = self.training_pipeline.lr_scheduler_builder(\n                optimizer=self.optimizer\n            )\n\n        if self.is_distributed:\n            # Tracks how many workers have finished their rollout\n            self.num_workers_done = torch.distributed.PrefixStore(  # type:ignore\n                \"num_workers_done\", self.store\n            )\n            # Tracks the number of steps taken by each worker in current rollout\n            self.num_workers_steps = torch.distributed.PrefixStore(  # type:ignore\n                \"num_workers_steps\", self.store\n            )\n            self.distributed_preemption_threshold = distributed_preemption_threshold\n            # Flag for finished worker in current epoch\n            self.offpolicy_epoch_done = torch.distributed.PrefixStore(  # type:ignore\n                \"offpolicy_epoch_done\", self.store\n            )\n            # Flag for finished worker in current epoch with custom component\n            self.insufficient_data_for_update = (\n                torch.distributed.PrefixStore(  # type:ignore\n                    \"insufficient_data_for_update\", self.store\n                )\n            )\n        else:\n            self.num_workers_done = None\n            self.num_workers_steps = None\n            self.distributed_preemption_threshold = 1.0\n            self.offpolicy_epoch_done = None\n\n        # Keeping track of training state\n        self.former_steps: Optional[int] = None\n        self.last_log: Optional[int] = None\n        self.last_save: Optional[int] = None\n        # The `self._last_aggregated_train_task_metrics` attribute defined\n        # below is used for early stopping criterion computations\n        self._last_aggregated_train_task_metrics: ScalarMeanTracker = (\n            ScalarMeanTracker()\n        )\n\n        self.first_local_worker_id = first_local_worker_id\n        self.save_ckpt_at_every_host = save_ckpt_at_every_host\n\n    def advance_seed(\n        self, seed: Optional[int], return_same_seed_per_worker=False\n    ) -> Optional[int]:\n        if seed is None:\n            return seed\n        seed = (seed ^ (self.training_pipeline.total_steps + 1)) % (\n            2**31 - 1\n        )  # same seed for all workers\n\n        if (not return_same_seed_per_worker) and (\n            self.mode == TRAIN_MODE_STR or self.mode == TEST_MODE_STR\n        ):\n            return self.worker_seeds(self.num_workers, seed)[\n                self.worker_id\n            ]  # doesn't modify the current rng state\n        else:\n            return self.worker_seeds(1, seed)[0]  # doesn't modify the current rng state\n\n    def deterministic_seeds(self) -> None:\n        if self.seed is not None:\n            set_seed(self.advance_seed(self.seed))  # known state for all workers\n            seeds = self.worker_seeds(\n                self.num_samplers, None\n            )  # use the latest seed for workers and update rng state\n            if self.vector_tasks is not None:\n                self.vector_tasks.set_seeds(seeds)\n\n    def save_error_data(self, batch: Dict[str, Any]) -> str:\n        model_path = os.path.join(\n            self.checkpoints_dir,\n            \"error_for_exp_{}__stage_{:02d}__steps_{:012d}.pt\".format(\n                self.experiment_name,\n                self.training_pipeline.current_stage_index,\n                self.training_pipeline.total_steps,\n            ),\n        )\n        with filelock.FileLock(\n            os.path.join(self.checkpoints_dir, \"error.lock\"), timeout=60\n        ):\n            if not os.path.exists(model_path):\n                save_dict = {\n                    \"model_state_dict\": self.actor_critic.state_dict(),  # type:ignore\n                    \"total_steps\": self.training_pipeline.total_steps,  # Total steps including current stage\n                    \"optimizer_state_dict\": self.optimizer.state_dict(),  # type: ignore\n                    \"training_pipeline_state_dict\": self.training_pipeline.state_dict(),\n                    \"trainer_seed\": self.seed,\n                    \"batch\": batch,\n                }\n\n                if self.lr_scheduler is not None:\n                    save_dict[\"scheduler_state\"] = cast(\n                        _LRScheduler, self.lr_scheduler\n                    ).state_dict()\n\n                torch.save(save_dict, model_path)\n        return model_path\n\n    def aggregate_and_send_logging_package(\n        self,\n        tracking_info_list: List[TrackingInfo],\n        logging_pkg: Optional[LoggingPackage] = None,\n        send_logging_package: bool = True,\n        checkpoint_file_name: Optional[str] = None,\n    ):\n        logging_pkg = super().aggregate_and_send_logging_package(\n            tracking_info_list=tracking_info_list,\n            logging_pkg=logging_pkg,\n            send_logging_package=send_logging_package,\n            checkpoint_file_name=checkpoint_file_name,\n        )\n\n        if self.mode == TRAIN_MODE_STR:\n            # Technically self.mode should always be \"train\" here (as this is the training engine),\n            # this conditional is defensive\n            self._last_aggregated_train_task_metrics.add_scalars(\n                scalars=logging_pkg.metrics_tracker.means(),\n                n=logging_pkg.metrics_tracker.counts(),\n            )\n\n        return logging_pkg\n\n    def checkpoint_save(self, pipeline_stage_index: Optional[int] = None) -> str:\n        model_path = os.path.join(\n            self.checkpoints_dir,\n            \"exp_{}__stage_{:02d}__steps_{:012d}.pt\".format(\n                self.experiment_name,\n                (\n                    self.training_pipeline.current_stage_index\n                    if pipeline_stage_index is None\n                    else pipeline_stage_index\n                ),\n                self.training_pipeline.total_steps,\n            ),\n        )\n\n        save_dict = {\n            \"model_state_dict\": self.actor_critic.state_dict(),  # type:ignore\n            \"total_steps\": self.training_pipeline.total_steps,  # Total steps including current stage\n            \"optimizer_state_dict\": self.optimizer.state_dict(),  # type: ignore\n            \"training_pipeline_state_dict\": self.training_pipeline.state_dict(),\n            \"trainer_seed\": self.seed,\n        }\n\n        if self.lr_scheduler is not None:\n            save_dict[\"scheduler_state\"] = cast(\n                _LRScheduler, self.lr_scheduler\n            ).state_dict()\n\n        torch.save(save_dict, model_path)\n        return model_path\n\n    def checkpoint_load(\n        self, ckpt: Union[str, Dict[str, Any]], restart_pipeline: bool = False\n    ) -> Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]]:\n        if restart_pipeline:\n            if \"training_pipeline_state_dict\" in ckpt:\n                del ckpt[\"training_pipeline_state_dict\"]\n\n        ckpt = super().checkpoint_load(ckpt, restart_pipeline=restart_pipeline)\n\n        if restart_pipeline:\n            self.training_pipeline.restart_pipeline()\n        else:\n            self.seed = cast(int, ckpt[\"trainer_seed\"])\n            self.optimizer.load_state_dict(ckpt[\"optimizer_state_dict\"])  # type: ignore\n            if self.lr_scheduler is not None and \"scheduler_state\" in ckpt:\n                self.lr_scheduler.load_state_dict(ckpt[\"scheduler_state\"])  # type: ignore\n\n        self.deterministic_seeds()\n\n        return ckpt\n\n    @property\n    def step_count(self):\n        return self.training_pipeline.current_stage.steps_taken_in_stage\n\n    @step_count.setter\n    def step_count(self, val: int) -> None:\n        self.training_pipeline.current_stage.steps_taken_in_stage = val\n\n    @property\n    def log_interval(self):\n        return (\n            self.training_pipeline.current_stage.training_settings.metric_accumulate_interval\n        )\n\n    @property\n    def approx_steps(self):\n        if self.is_distributed:\n            # the actual number of steps gets synchronized after each rollout\n            return (\n                self.step_count - self.former_steps\n            ) * self.num_workers + self.former_steps\n        else:\n            return self.step_count  # this is actually accurate\n\n    def act(\n        self,\n        rollout_storage: RolloutStorage,\n        dist_wrapper_class: Optional[type] = None,\n    ):\n        if self.training_pipeline.current_stage.teacher_forcing is not None:\n            assert dist_wrapper_class is None\n\n            def tracking_callback(type: TrackingInfoType, info: Dict[str, Any], n: int):\n                self.tracking_info_list.append(\n                    TrackingInfo(\n                        type=type,\n                        info=info,\n                        n=n,\n                        storage_uuid=self.training_pipeline.rollout_storage_uuid,\n                        stage_component_uuid=None,\n                    )\n                )\n\n            dist_wrapper_class = partial(\n                TeacherForcingDistr,\n                action_space=self.actor_critic.action_space,\n                num_active_samplers=self.num_active_samplers,\n                approx_steps=self.approx_steps,\n                teacher_forcing=self.training_pipeline.current_stage.teacher_forcing,\n                tracking_callback=tracking_callback,\n            )\n\n        actions, actor_critic_output, memory, step_observation = super().act(\n            rollout_storage=rollout_storage,\n            dist_wrapper_class=dist_wrapper_class,\n        )\n\n        self.step_count += self.num_active_samplers\n\n        return actions, actor_critic_output, memory, step_observation\n\n    def advantage_stats(self, advantages: torch.Tensor) -> Dict[str, torch.Tensor]:\n        r\"\"\"Computes the mean and variances of advantages (possibly over multiple workers).\n        For multiple workers, this method is equivalent to first collecting all versions of\n        advantages and then computing the mean and variance locally over that.\n\n        # Parameters\n\n        advantages: Tensors to compute mean and variance over. Assumed to be solely the\n         worker's local copy of this tensor, the resultant mean and variance will be computed\n         as though _all_ workers' versions of this tensor were concatenated together in\n         distributed training.\n        \"\"\"\n\n        # Step count has already been updated with the steps from all workers\n        global_rollout_steps = self.step_count - self.former_steps\n\n        if self.is_distributed:\n            summed_advantages = advantages.sum()\n            dist.all_reduce(summed_advantages)\n            mean = summed_advantages / global_rollout_steps\n\n            summed_squares = (advantages - mean).pow(2).sum()\n            dist.all_reduce(summed_squares)\n            std = (summed_squares / (global_rollout_steps - 1)).sqrt()\n        else:\n            # noinspection PyArgumentList\n            mean, std = advantages.mean(), advantages.std()\n\n        return {\"mean\": mean, \"std\": std}\n\n    def backprop_step(\n        self,\n        total_loss: torch.Tensor,\n        max_grad_norm: float,\n        local_to_global_batch_size_ratio: float = 1.0,\n    ):\n        self.optimizer.zero_grad()  # type: ignore\n        if isinstance(total_loss, torch.Tensor):\n            total_loss.backward()\n\n        if self.is_distributed:\n            # From https://github.com/pytorch/pytorch/issues/43135\n            reductions, all_params = [], []\n            for p in self.actor_critic.parameters():\n                # you can also organize grads to larger buckets to make all_reduce more efficient\n                if p.requires_grad:\n                    if p.grad is None:\n                        p.grad = torch.zeros_like(p.data)\n                    else:  # local_global_batch_size_tuple is not None, since we're distributed:\n                        p.grad = p.grad * local_to_global_batch_size_ratio\n                    reductions.append(\n                        dist.all_reduce(\n                            p.grad,\n                            async_op=True,\n                        )  # sum\n                    )  # synchronize\n                    all_params.append(p)\n            for reduction, p in zip(reductions, all_params):\n                reduction.wait()\n\n        if hasattr(self.actor_critic, \"compute_total_grad_norm\"):\n            total_grad_norm = self.actor_critic.compute_total_grad_norm().item()\n        else:\n            total_grad_norm = 0.0\n\n        nn.utils.clip_grad_norm_(\n            self.actor_critic.parameters(),\n            max_norm=max_grad_norm,  # type: ignore\n        )\n\n        self.optimizer.step()  # type: ignore\n        return total_grad_norm\n\n    def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter(\n        self, pipeline_stage_index: Optional[int] = None\n    ):\n        model_path = None\n        self.deterministic_seeds()\n        if (\n            self.save_ckpt_at_every_host\n            and self.worker_id == self.first_local_worker_id\n        ) or self.worker_id == 0:\n            model_path = self.checkpoint_save(pipeline_stage_index=pipeline_stage_index)\n            if self.checkpoints_queue is not None:\n                self.checkpoints_queue.put((\"eval\", model_path))\n        self.last_save = self.training_pipeline.total_steps\n        return model_path\n\n    def run_pipeline(self, valid_on_initial_weights: bool = False):\n        cur_stage_training_settings = (\n            self.training_pipeline.current_stage.training_settings\n        )\n\n        # Change engine attributes that depend on the current stage\n        self.training_pipeline.current_stage.change_engine_attributes(self)\n\n        rollout_storage = self.training_pipeline.rollout_storage\n        uuid_to_storage = self.training_pipeline.current_stage_storage\n        self.initialize_storage_and_viz(\n            storage_to_initialize=cast(\n                List[ExperienceStorage], list(uuid_to_storage.values())\n            )\n        )\n        self.tracking_info_list.clear()\n\n        self.last_log = self.training_pipeline.total_steps\n\n        if self.last_save is None:\n            self.last_save = self.training_pipeline.total_steps\n\n        should_save_checkpoints = (\n            self.checkpoints_dir != \"\"\n            and cur_stage_training_settings.save_interval is not None\n            and cur_stage_training_settings.save_interval > 0\n        )\n        already_saved_checkpoint = False\n\n        if (\n            valid_on_initial_weights\n            and should_save_checkpoints\n            and self.checkpoints_queue is not None\n        ):\n            if (\n                self.save_ckpt_at_every_host\n                and self.worker_id == self.first_local_worker_id\n            ) or self.worker_id == 0:\n                model_path = self.checkpoint_save()\n                if self.checkpoints_queue is not None:\n                    self.checkpoints_queue.put((\"eval\", model_path))\n\n        while True:\n            pipeline_stage_changed = self.training_pipeline.before_rollout(\n                train_metrics=self._last_aggregated_train_task_metrics\n            )  # This is `False` at the very start of training, i.e. pipeline starts with a stage initialized\n\n            self._last_aggregated_train_task_metrics.reset()\n            training_is_complete = self.training_pipeline.current_stage is None\n\n            # `training_is_complete` should imply `pipeline_stage_changed`\n            assert pipeline_stage_changed or not training_is_complete\n\n            #  Saving checkpoints and initializing storage when the pipeline stage changes\n            if pipeline_stage_changed:\n                # Here we handle saving a checkpoint after a pipeline stage ends. We\n                # do this:\n                # (1) after every pipeline stage if the `self.save_ckpt_after_every_pipeline_stage`\n                #   boolean is True, and\n                # (2) when we have reached the end of ALL training (i.e. all stages are complete).\n                if (\n                    should_save_checkpoints\n                    and (  # Might happen if the `save_interval` was hit just previously, see below\n                        not already_saved_checkpoint\n                    )\n                    and (\n                        self.save_ckpt_after_every_pipeline_stage\n                        or training_is_complete\n                    )\n                ):\n                    self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter(\n                        pipeline_stage_index=(\n                            self.training_pipeline.current_stage_index - 1\n                            if not training_is_complete\n                            else len(self.training_pipeline.pipeline_stages) - 1\n                        )\n                    )\n\n                # If training is complete, break out\n                if training_is_complete:\n                    break\n\n                # Here we handle updating our training settings after a pipeline stage ends.\n                # Update the training settings we're using\n                cur_stage_training_settings = (\n                    self.training_pipeline.current_stage.training_settings\n                )\n\n                # If the pipeline stage changed we must initialize any new custom storage and\n                # stop updating any custom storage that is no longer in use (this second bit\n                # is done by simply updating `uuid_to_storage` to the new custom storage objects).\n                new_uuid_to_storage = self.training_pipeline.current_stage_storage\n                storage_to_initialize = [\n                    s\n                    for uuid, s in new_uuid_to_storage.items()\n                    if uuid\n                    not in uuid_to_storage  # Don't initialize storage already in use\n                ]\n                self.initialize_storage_and_viz(\n                    storage_to_initialize=storage_to_initialize,\n                )\n                uuid_to_storage = new_uuid_to_storage\n\n                # Change engine attributes that depend on the current stage\n                self.training_pipeline.current_stage.change_engine_attributes(self)\n\n            already_saved_checkpoint = False\n\n            if self.is_distributed:\n                self.num_workers_done.set(\"done\", str(0))\n                self.num_workers_steps.set(\"steps\", str(0))\n                # Ensure all workers are done before incrementing num_workers_{steps, done}\n                dist.barrier(\n                    device_ids=(\n                        None\n                        if self.device == torch.device(\"cpu\")\n                        else [self.device.index]\n                    )\n                )\n\n            self.former_steps = self.step_count\n            former_storage_experiences = {\n                k: v.total_experiences\n                for k, v in self.training_pipeline.current_stage_storage.items()\n            }\n\n            if self.training_pipeline.rollout_storage_uuid is None:\n                # In this case we're not expecting to collect storage experiences, i.e. everything\n                # will be off-policy.\n\n                # self.step_count is normally updated by the `self.collect_step_across_all_task_samplers`\n                # call below, but since we're not collecting onpolicy experiences, we need to update\n                # it here. The step count here is now just effectively a count of the number of times\n                # we've called `compute_losses_track_them_and_backprop` below.\n                self.step_count += 1\n\n                before_update_info = dict(\n                    next_value=None,\n                    use_gae=cur_stage_training_settings.use_gae,\n                    gamma=cur_stage_training_settings.gamma,\n                    tau=cur_stage_training_settings.gae_lambda,\n                    adv_stats_callback=self.advantage_stats,\n                )\n            else:\n                vector_tasks_already_restarted = False\n                step = -1\n                while step < cur_stage_training_settings.num_steps - 1:\n                    step += 1\n\n                    try:\n                        num_paused = self.collect_step_across_all_task_samplers(\n                            rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid,\n                            uuid_to_storage=uuid_to_storage,\n                        )\n                    except (TimeoutError, EOFError) as e:\n                        if (\n                            not self.try_restart_after_task_error\n                        ) or self.mode != TRAIN_MODE_STR:\n                            # Apparently you can just call `raise` here and doing so will just raise the exception as though\n                            # it was not caught (so the stacktrace isn't messed up)\n                            raise\n                        elif vector_tasks_already_restarted:\n                            raise RuntimeError(\n                                f\"[{self.mode} worker {self.worker_id}] `vector_tasks` has timed out twice in the same\"\n                                f\" rollout. This suggests that this error was not recoverable. Timeout exception:\\n{traceback.format_exc()}\"\n                            )\n                        else:\n                            get_logger().warning(\n                                f\"[{self.mode} worker {self.worker_id}] `vector_tasks` appears to have crashed during\"\n                                f\" training due to an {type(e).__name__} error. You have set\"\n                                f\" `try_restart_after_task_error` to `True` so we will attempt to restart these tasks from\"\n                                f\" the beginning. USE THIS FEATURE AT YOUR OWN\"\n                                f\" RISK. Exception:\\n{traceback.format_exc()}.\"\n                            )\n                            self.vector_tasks.close()\n                            self._vector_tasks = None\n\n                            vector_tasks_already_restarted = True\n                            for (\n                                storage\n                            ) in self.training_pipeline.current_stage_storage.values():\n                                storage.after_updates()\n                            self.initialize_storage_and_viz(\n                                storage_to_initialize=cast(\n                                    List[ExperienceStorage],\n                                    list(uuid_to_storage.values()),\n                                )\n                            )\n                            step = -1\n                            continue\n\n                    # A more informative error message should already have been thrown in be given in\n                    # `collect_step_across_all_task_samplers` if `num_paused != 0` here but this serves\n                    # as a sanity check.\n                    assert num_paused == 0\n\n                    if self.is_distributed:\n                        # Preempt stragglers\n                        # Each worker will stop collecting steps for the current rollout whenever a\n                        # 100 * distributed_preemption_threshold percentage of workers are finished collecting their\n                        # rollout steps, and we have collected at least 25% but less than 90% of the steps.\n                        num_done = int(self.num_workers_done.get(\"done\"))\n                        if (\n                            num_done\n                            > self.distributed_preemption_threshold * self.num_workers\n                            and 0.25 * cur_stage_training_settings.num_steps\n                            <= step\n                            < 0.9 * cur_stage_training_settings.num_steps\n                        ):\n                            get_logger().debug(\n                                f\"[{self.mode} worker {self.worker_id}] Preempted after {step}\"\n                                f\" steps (out of {cur_stage_training_settings.num_steps})\"\n                                f\" with {num_done} workers done\"\n                            )\n                            break\n\n                with torch.no_grad():\n                    actor_critic_output, _ = self.actor_critic(\n                        **rollout_storage.agent_input_for_next_step()\n                    )\n\n                self.training_pipeline.rollout_count += 1\n\n                if self.is_distributed:\n                    # Mark that a worker is done collecting experience\n                    self.num_workers_done.add(\"done\", 1)\n                    self.num_workers_steps.add(\n                        \"steps\", self.step_count - self.former_steps\n                    )\n\n                    # Ensure all workers are done before updating step counter\n                    dist.barrier(\n                        device_ids=(\n                            None\n                            if self.device == torch.device(\"cpu\")\n                            else [self.device.index]\n                        )\n                    )\n\n                    ndone = int(self.num_workers_done.get(\"done\"))\n                    assert (\n                        ndone == self.num_workers\n                    ), f\"# workers done {ndone} != # workers {self.num_workers}\"\n\n                    # get the actual step_count\n                    self.step_count = (\n                        int(self.num_workers_steps.get(\"steps\")) + self.former_steps\n                    )\n\n                before_update_info = dict(\n                    next_value=actor_critic_output.values.detach(),\n                    use_gae=cur_stage_training_settings.use_gae,\n                    gamma=cur_stage_training_settings.gamma,\n                    tau=cur_stage_training_settings.gae_lambda,\n                    adv_stats_callback=self.advantage_stats,\n                )\n\n            # Prepare storage for iteration during updates\n            for storage in self.training_pipeline.current_stage_storage.values():\n                storage.before_updates(**before_update_info)\n\n            for sc in self.training_pipeline.current_stage.stage_components:\n                component_storage = uuid_to_storage[sc.storage_uuid]\n\n                self.compute_losses_track_them_and_backprop(\n                    stage=self.training_pipeline.current_stage,\n                    stage_component=sc,\n                    storage=component_storage,\n                )\n\n            for storage in self.training_pipeline.current_stage_storage.values():\n                storage.after_updates()\n\n            # We update the storage step counts saved in\n            # `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` here rather than with\n            # `self.steps` above because some storage step counts may only change after the update calls above.\n            # This may seem a bit weird but consider a storage that corresponds to a fixed dataset\n            # used for imitation learning. For such a dataset, the \"steps\" will only increase as\n            # new batches are sampled during update calls.\n            # Note: We don't need to sort the keys below to ensure that distributed updates happen correctly\n            #   as `self.training_pipeline.current_stage_storage` is an ordered `dict`.\n            # First we calculate the change in counts (possibly aggregating across devices)\n            change_in_storage_experiences = {}\n            for k in sorted(self.training_pipeline.current_stage_storage.keys()):\n                delta = (\n                    self.training_pipeline.current_stage_storage[k].total_experiences\n                    - former_storage_experiences[k]\n                )\n                assert delta >= 0\n                change_in_storage_experiences[k] = self.distributed_weighted_sum(\n                    to_share=delta, weight=1\n                )\n\n            # Then we update `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` with the above\n            # computed changes.\n            for storage_uuid, delta in change_in_storage_experiences.items():\n                self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage[\n                    storage_uuid\n                ] += delta\n\n            if self.lr_scheduler is not None:\n                self.lr_scheduler.step(epoch=self.training_pipeline.total_steps)\n\n            # Here we handle saving a checkpoint every `save_interval` steps, saving after\n            # a pipeline stage completes is controlled above\n            checkpoint_file_name = None\n            if should_save_checkpoints and (\n                self.training_pipeline.total_steps - self.last_save\n                >= cur_stage_training_settings.save_interval\n            ):\n                checkpoint_file_name = (\n                    self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter()\n                )\n                already_saved_checkpoint = True\n\n            if (\n                self.training_pipeline.total_steps - self.last_log >= self.log_interval\n                or self.training_pipeline.current_stage.is_complete\n            ):\n                self.aggregate_and_send_logging_package(\n                    tracking_info_list=self.tracking_info_list,\n                    checkpoint_file_name=checkpoint_file_name,\n                )\n                self.tracking_info_list.clear()\n                self.last_log = self.training_pipeline.total_steps\n\n            if (\n                cur_stage_training_settings.advance_scene_rollout_period is not None\n            ) and (\n                self.training_pipeline.rollout_count\n                % cur_stage_training_settings.advance_scene_rollout_period\n                == 0\n            ):\n                get_logger().info(\n                    f\"[{self.mode} worker {self.worker_id}] Force advance\"\n                    f\" tasks with {self.training_pipeline.rollout_count} rollouts\"\n                )\n                self.vector_tasks.next_task(force_advance_scene=True)\n                self.initialize_storage_and_viz(\n                    storage_to_initialize=cast(\n                        List[ExperienceStorage], list(uuid_to_storage.values())\n                    )\n                )\n\n    def train(\n        self,\n        checkpoint_file_name: Optional[str] = None,\n        restart_pipeline: bool = False,\n        valid_on_initial_weights: bool = False,\n    ):\n        assert (\n            self.mode == TRAIN_MODE_STR\n        ), \"train only to be called from a train instance\"\n\n        training_completed_successfully = False\n        # noinspection PyBroadException\n        try:\n            if checkpoint_file_name is not None:\n                self.checkpoint_load(checkpoint_file_name, restart_pipeline)\n\n            self.run_pipeline(valid_on_initial_weights=valid_on_initial_weights)\n\n            training_completed_successfully = True\n        except KeyboardInterrupt:\n            get_logger().info(\n                f\"[{self.mode} worker {self.worker_id}] KeyboardInterrupt, exiting.\"\n            )\n        except Exception as e:\n            get_logger().error(\n                f\"[{self.mode} worker {self.worker_id}] Encountered {type(e).__name__}, exiting.\"\n            )\n            get_logger().error(traceback.format_exc())\n        finally:\n            if training_completed_successfully:\n                if self.worker_id == 0:\n                    self.results_queue.put((\"train_stopped\", 0))\n                get_logger().info(\n                    f\"[{self.mode} worker {self.worker_id}] Training finished successfully.\"\n                )\n            else:\n                self.results_queue.put((\"train_stopped\", 1 + self.worker_id))\n            self.close()\n\n\nclass OnPolicyInference(OnPolicyRLEngine):\n    def __init__(\n        self,\n        config: ExperimentConfig,\n        results_queue: mp.Queue,  # to output aggregated results\n        checkpoints_queue: mp.Queue,  # to write/read (trainer/evaluator) ready checkpoints\n        checkpoints_dir: str = \"\",\n        mode: str = \"valid\",  # or \"test\"\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        mp_ctx: Optional[BaseContext] = None,\n        device: Union[str, torch.device, int] = \"cpu\",\n        deterministic_agents: bool = False,\n        worker_id: int = 0,\n        num_workers: int = 1,\n        distributed_port: int = 0,\n        enforce_expert: bool = False,\n        **kwargs,\n    ):\n        super().__init__(\n            experiment_name=\"\",\n            config=config,\n            results_queue=results_queue,\n            checkpoints_queue=checkpoints_queue,\n            checkpoints_dir=checkpoints_dir,\n            mode=mode,\n            seed=seed,\n            deterministic_cudnn=deterministic_cudnn,\n            mp_ctx=mp_ctx,\n            deterministic_agents=deterministic_agents,\n            device=device,\n            worker_id=worker_id,\n            num_workers=num_workers,\n            distributed_port=distributed_port,\n            **kwargs,\n        )\n\n        self.enforce_expert = enforce_expert\n\n    def run_eval(\n        self,\n        checkpoint_file_path: str,\n        rollout_steps: int = 100,\n        visualizer: Optional[VizSuite] = None,\n        update_secs: float = 20.0,\n        verbose: bool = False,\n    ) -> LoggingPackage:\n        assert self.actor_critic is not None, \"called `run_eval` with no actor_critic\"\n\n        # Sanity check that we haven't entered an invalid state. During eval the training_pipeline\n        # should be only set in this function and always unset at the end of it.\n        assert self.training_pipeline is None, (\n            \"`training_pipeline` should be `None` before calling `run_eval`.\"\n            \" This is necessary as we want to initialize new storages.\"\n        )\n        self.training_pipeline = self.config.training_pipeline()\n\n        ckpt = self.checkpoint_load(checkpoint_file_path, restart_pipeline=False)\n        total_steps = cast(int, ckpt[\"total_steps\"])\n\n        eval_pipeline_stage = cast(\n            PipelineStage,\n            getattr(self.training_pipeline, f\"{self.mode}_pipeline_stage\"),\n        )\n        assert (\n            len(eval_pipeline_stage.stage_components) <= 1\n        ), \"Only one StageComponent is supported during inference.\"\n        uuid_to_storage = self.training_pipeline.get_stage_storage(eval_pipeline_stage)\n\n        assert len(uuid_to_storage) > 0, (\n            \"No storage found for eval pipeline stage, this is a bug in AllenAct,\"\n            \" please submit an issue on GitHub (https://github.com/allenai/allenact/issues).\"\n        )\n\n        uuid_to_rollout_storage = {\n            uuid: storage\n            for uuid, storage in uuid_to_storage.items()\n            if isinstance(storage, RolloutStorage)\n        }\n        uuid_to_non_rollout_storage = {\n            uuid: storage\n            for uuid, storage in uuid_to_storage.items()\n            if not isinstance(storage, RolloutStorage)\n        }\n\n        if len(uuid_to_rollout_storage) > 1 or len(uuid_to_non_rollout_storage) > 1:\n            raise NotImplementedError(\n                \"Only one RolloutStorage and non-RolloutStorage object is allowed within an evaluation pipeline stage.\"\n                \" If you'd like to evaluate against multiple storages please\"\n                \" submit an issue on GitHub (https://github.com/allenai/allenact/issues). For the moment you'll need\"\n                \" to evaluate against these storages separately.\"\n            )\n\n        rollout_storage = self.training_pipeline.rollout_storage\n\n        if visualizer is not None:\n            assert visualizer.empty()\n\n        num_paused = self.initialize_storage_and_viz(\n            storage_to_initialize=cast(\n                List[ExperienceStorage], list(uuid_to_storage.values())\n            ),\n            visualizer=visualizer,\n        )\n        assert num_paused == 0, f\"{num_paused} tasks paused when initializing eval\"\n\n        if rollout_storage is not None:\n            num_tasks = sum(\n                self.vector_tasks.command(\n                    \"sampler_attr\", [\"length\"] * self.num_active_samplers\n                )\n            ) + (  # We need to add this as the first tasks have already been sampled\n                self.num_active_samplers\n            )\n        else:\n            num_tasks = 0\n\n        # get_logger().debug(\"worker {self.worker_id} number of tasks {num_tasks}\")\n        steps = 0\n\n        self.actor_critic.eval()\n\n        last_time: float = time.time()\n        init_time: float = last_time\n        frames: int = 0\n        if verbose:\n            get_logger().info(\n                f\"[{self.mode} worker {self.worker_id}] Running evaluation on {num_tasks} tasks\"\n                f\" for ckpt {checkpoint_file_path}\"\n            )\n\n        if self.enforce_expert:\n            dist_wrapper_class = partial(\n                TeacherForcingDistr,\n                action_space=self.actor_critic.action_space,\n                num_active_samplers=None,\n                approx_steps=None,\n                teacher_forcing=None,\n                tracking_callback=None,\n                always_enforce=True,\n            )\n        else:\n            dist_wrapper_class = None\n\n        logging_pkg = LoggingPackage(\n            mode=self.mode,\n            training_steps=total_steps,\n            storage_uuid_to_total_experiences=self.training_pipeline.storage_uuid_to_total_experiences,\n        )\n        should_compute_onpolicy_losses = (\n            len(eval_pipeline_stage.loss_names) > 0\n            and eval_pipeline_stage.stage_components[0].storage_uuid\n            == self.training_pipeline.rollout_storage_uuid\n        )\n        while self.num_active_samplers > 0:\n            frames += self.num_active_samplers\n            num_newly_paused = self.collect_step_across_all_task_samplers(\n                rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid,\n                uuid_to_storage=uuid_to_rollout_storage,\n                visualizer=visualizer,\n                dist_wrapper_class=dist_wrapper_class,\n            )\n            steps += 1\n\n            if should_compute_onpolicy_losses and num_newly_paused > 0:\n                # The `collect_step_across_all_task_samplers` method will automatically drop\n                # parts of the rollout storage that correspond to paused tasks (namely by calling\"\n                # `rollout_storage.sampler_select(UNPAUSED_TASK_INDS)`). This makes sense when you don't need to\n                # compute losses for tasks but is a bit limiting here as we're throwing away data before\n                # using it to compute losses. As changing this is non-trivial we'll just warn the user\n                # for now.\n                get_logger().warning(\n                    f\"[{self.mode} worker {self.worker_id}] {num_newly_paused * rollout_storage.step} steps\"\n                    f\" will be dropped when computing losses in evaluation. This is a limitation of the current\"\n                    f\" implementation of rollout collection in AllenAct. If you'd like to see this\"\n                    f\" functionality improved please submit an issue on GitHub\"\n                    f\" (https://github.com/allenai/allenact/issues).\"\n                )\n\n            if self.num_active_samplers == 0 or steps % rollout_steps == 0:\n                if should_compute_onpolicy_losses and self.num_active_samplers > 0:\n                    with torch.no_grad():\n                        actor_critic_output, _ = self.actor_critic(\n                            **rollout_storage.agent_input_for_next_step()\n                        )\n                        before_update_info = dict(\n                            next_value=actor_critic_output.values.detach(),\n                            use_gae=eval_pipeline_stage.training_settings.use_gae,\n                            gamma=eval_pipeline_stage.training_settings.gamma,\n                            tau=eval_pipeline_stage.training_settings.gae_lambda,\n                            adv_stats_callback=lambda advantages: {\n                                \"mean\": advantages.mean(),\n                                \"std\": advantages.std(),\n                            },\n                        )\n                    # Prepare storage for iteration during loss computation\n                    for storage in uuid_to_rollout_storage.values():\n                        storage.before_updates(**before_update_info)\n\n                    # Compute losses\n                    with torch.no_grad():\n                        for sc in eval_pipeline_stage.stage_components:\n                            self.compute_losses_track_them_and_backprop(\n                                stage=eval_pipeline_stage,\n                                stage_component=sc,\n                                storage=uuid_to_rollout_storage[sc.storage_uuid],\n                                skip_backprop=True,\n                            )\n\n                for storage in uuid_to_rollout_storage.values():\n                    storage.after_updates()\n\n            cur_time = time.time()\n            if self.num_active_samplers == 0 or cur_time - last_time >= update_secs:\n                logging_pkg = self.aggregate_and_send_logging_package(\n                    tracking_info_list=self.tracking_info_list,\n                    logging_pkg=logging_pkg,\n                    send_logging_package=False,\n                )\n                self.tracking_info_list.clear()\n\n                if verbose:\n                    npending: int\n                    lengths: List[int]\n                    if self.num_active_samplers > 0:\n                        lengths = self.vector_tasks.command(\n                            \"sampler_attr\",\n                            [\"length\"] * self.num_active_samplers,\n                        )\n                        npending = sum(lengths)\n                    else:\n                        lengths = []\n                        npending = 0\n                    est_time_to_complete = (\n                        \"{:.2f}\".format(\n                            (\n                                (cur_time - init_time)\n                                * (npending / (num_tasks - npending))\n                                / 60\n                            )\n                        )\n                        if npending != num_tasks\n                        else \"???\"\n                    )\n                    get_logger().info(\n                        f\"[{self.mode} worker {self.worker_id}]\"\n                        f\" For ckpt {checkpoint_file_path}\"\n                        f\" {frames / (cur_time - init_time):.1f} fps,\"\n                        f\" {npending}/{num_tasks} tasks pending ({lengths}).\"\n                        f\" ~{est_time_to_complete} min. to complete.\"\n                    )\n                    if logging_pkg.num_non_empty_metrics_dicts_added != 0:\n                        get_logger().info(\n                            \", \".join(\n                                [\n                                    f\"[{self.mode} worker {self.worker_id}]\"\n                                    f\" num_{self.mode}_tasks_complete {logging_pkg.num_non_empty_metrics_dicts_added}\",\n                                    *[\n                                        f\"{k} {v:.3g}\"\n                                        for k, v in logging_pkg.metrics_tracker.means().items()\n                                    ],\n                                    *[\n                                        f\"{k0[1]}/{k1} {v1:.3g}\"\n                                        for k0, v0 in logging_pkg.info_trackers.items()\n                                        for k1, v1 in v0.means().items()\n                                    ],\n                                ]\n                            )\n                        )\n\n                    last_time = cur_time\n\n        get_logger().info(\n            f\"[{self.mode} worker {self.worker_id}] Task evaluation complete, all task samplers paused.\"\n        )\n\n        if rollout_storage is not None:\n            self.vector_tasks.resume_all()\n            self.vector_tasks.set_seeds(self.worker_seeds(self.num_samplers, self.seed))\n            self.vector_tasks.reset_all()\n\n        logging_pkg = self.aggregate_and_send_logging_package(\n            tracking_info_list=self.tracking_info_list,\n            logging_pkg=logging_pkg,\n            send_logging_package=False,\n        )\n        self.tracking_info_list.clear()\n\n        logging_pkg.viz_data = (\n            visualizer.read_and_reset() if visualizer is not None else None\n        )\n\n        should_compute_offpolicy_losses = (\n            len(eval_pipeline_stage.loss_names) > 0\n            and not should_compute_onpolicy_losses\n        )\n        if should_compute_offpolicy_losses:\n            # In this case we are evaluating a non-rollout storage, e.g. some off-policy data\n            get_logger().info(\n                f\"[{self.mode} worker {self.worker_id}] Non-rollout storage detected, will now compute losses\"\n                f\" using this storage.\"\n            )\n\n            offpolicy_eval_done = False\n            while not offpolicy_eval_done:\n                before_update_info = dict(\n                    next_value=None,\n                    use_gae=eval_pipeline_stage.training_settings.use_gae,\n                    gamma=eval_pipeline_stage.training_settings.gamma,\n                    tau=eval_pipeline_stage.training_settings.gae_lambda,\n                    adv_stats_callback=lambda advantages: {\n                        \"mean\": advantages.mean(),\n                        \"std\": advantages.std(),\n                    },\n                )\n                # Prepare storage for iteration during loss computation\n                for storage in uuid_to_non_rollout_storage.values():\n                    storage.before_updates(**before_update_info)\n\n                # Compute losses\n                assert len(eval_pipeline_stage.stage_components) == 1\n                try:\n                    for sc in eval_pipeline_stage.stage_components:\n                        with torch.no_grad():\n                            self.compute_losses_track_them_and_backprop(\n                                stage=eval_pipeline_stage,\n                                stage_component=sc,\n                                storage=uuid_to_non_rollout_storage[sc.storage_uuid],\n                                skip_backprop=True,\n                            )\n                except EOFError:\n                    offpolicy_eval_done = True\n\n                for storage in uuid_to_non_rollout_storage.values():\n                    storage.after_updates()\n\n                total_bsize = sum(\n                    tif.info.get(\"worker_batch_size\", 0)\n                    for tif in self.tracking_info_list\n                )\n                logging_pkg = self.aggregate_and_send_logging_package(\n                    tracking_info_list=self.tracking_info_list,\n                    logging_pkg=logging_pkg,\n                    send_logging_package=False,\n                )\n                self.tracking_info_list.clear()\n\n                cur_time = time.time()\n                if verbose and (cur_time - last_time >= update_secs):\n                    get_logger().info(\n                        f\"[{self.mode} worker {self.worker_id}]\"\n                        f\" For ckpt {checkpoint_file_path}\"\n                        f\" {total_bsize / (cur_time - init_time):.1f} its/sec.\"\n                    )\n                    if logging_pkg.info_trackers != 0:\n                        get_logger().info(\n                            \", \".join(\n                                [\n                                    f\"[{self.mode} worker {self.worker_id}]\"\n                                    f\" num_{self.mode}_iters_complete {total_bsize}\",\n                                    *[\n                                        f\"{'/'.join(k0)}/{k1} {v1:.3g}\"\n                                        for k0, v0 in logging_pkg.info_trackers.items()\n                                        for k1, v1 in v0.means().items()\n                                    ],\n                                ]\n                            )\n                        )\n\n                    last_time = cur_time\n\n        # Call after_updates here to reset all storages\n        for storage in uuid_to_storage.values():\n            storage.after_updates()\n\n        # Set the training pipeline to `None` so that the storages do not\n        # persist across calls to `run_eval`\n        self.training_pipeline = None\n\n        logging_pkg.checkpoint_file_name = checkpoint_file_path\n\n        return logging_pkg\n\n    @staticmethod\n    def skip_to_latest(checkpoints_queue: mp.Queue, command: Optional[str], data):\n        assert (\n            checkpoints_queue is not None\n        ), \"Attempting to process checkpoints queue but this queue is `None`.\"\n        cond = True\n        while cond:\n            sentinel = (\"skip.AUTO.sentinel\", time.time())\n            checkpoints_queue.put(\n                sentinel\n            )  # valid since a single valid process is the only consumer\n            forwarded = False\n            while not forwarded:\n                new_command: Optional[str]\n                new_data: Any\n                (\n                    new_command,\n                    new_data,\n                ) = checkpoints_queue.get()  # block until next command arrives\n                if new_command == command:\n                    data = new_data\n                elif new_command == sentinel[0]:\n                    assert (\n                        new_data == sentinel[1]\n                    ), f\"Wrong sentinel found: {new_data} vs {sentinel[1]}\"\n                    forwarded = True\n                else:\n                    raise ValueError(\n                        f\"Unexpected command {new_command} with data {new_data}\"\n                    )\n            time.sleep(1)\n            cond = not checkpoints_queue.empty()\n        return data\n\n    def process_checkpoints(self):\n        assert (\n            self.mode != TRAIN_MODE_STR\n        ), \"process_checkpoints only to be called from a valid or test instance\"\n\n        assert (\n            self.checkpoints_queue is not None\n        ), \"Attempting to process checkpoints queue but this queue is `None`.\"\n\n        visualizer: Optional[VizSuite] = None\n\n        finalized = False\n        # noinspection PyBroadException\n        try:\n            while True:\n                command: Optional[str]\n                ckp_file_path: Any\n                (\n                    command,\n                    ckp_file_path,\n                ) = self.checkpoints_queue.get()  # block until first command arrives\n                # get_logger().debug(\n                #     \"{} {} command {} data {}\".format(\n                #         self.mode, self.worker_id, command, data\n                #     )\n                # )\n\n                if command == \"eval\":\n                    if self.mode == VALID_MODE_STR:\n                        # skip to latest using\n                        # 1. there's only consumer in valid\n                        # 2. there's no quit/exit/close message issued by runner nor trainer\n                        ckp_file_path = self.skip_to_latest(\n                            checkpoints_queue=self.checkpoints_queue,\n                            command=command,\n                            data=ckp_file_path,\n                        )\n\n                    if (\n                        visualizer is None\n                        and self.machine_params.visualizer is not None\n                    ):\n                        visualizer = self.machine_params.visualizer\n\n                    eval_package = self.run_eval(\n                        checkpoint_file_path=ckp_file_path,\n                        visualizer=visualizer,\n                        verbose=True,\n                        update_secs=20 if self.mode == TEST_MODE_STR else 5 * 60,\n                    )\n\n                    self.results_queue.put(eval_package)\n\n                    if self.is_distributed:\n                        dist.barrier()\n                elif command in [\"quit\", \"exit\", \"close\"]:\n                    finalized = True\n                    break\n                else:\n                    raise NotImplementedError()\n        except KeyboardInterrupt:\n            get_logger().info(\n                f\"[{self.mode} worker {self.worker_id}] KeyboardInterrupt, exiting.\"\n            )\n        except Exception as e:\n            get_logger().error(\n                f\"[{self.mode} worker {self.worker_id}] Encountered {type(e).__name__}, exiting.\"\n            )\n            get_logger().error(traceback.format_exc())\n        finally:\n            if finalized:\n                if self.mode == TEST_MODE_STR:\n                    self.results_queue.put((\"test_stopped\", 0))\n                get_logger().info(\n                    f\"[{self.mode} worker {self.worker_id}] Complete, all checkpoints processed.\"\n                )\n            else:\n                if self.mode == TEST_MODE_STR:\n                    self.results_queue.put((\"test_stopped\", self.worker_id + 1))\n            self.close(verbose=self.mode == TEST_MODE_STR)\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/losses/__init__.py",
    "content": "from .a2cacktr import A2C, ACKTR, A2CACKTR\nfrom .ppo import PPO\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/losses/a2cacktr.py",
    "content": "\"\"\"Implementation of A2C and ACKTR losses.\"\"\"\n\nfrom typing import cast, Tuple, Dict, Optional\n\nimport torch\n\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n    ObservationType,\n)\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput\nfrom allenact.utils.system import get_logger\n\n\nclass A2CACKTR(AbstractActorCriticLoss):\n    \"\"\"Class implementing A2C and ACKTR losses.\n\n    # Attributes\n\n    acktr : `True` if should use ACKTR loss (currently not supported), otherwise uses A2C loss.\n    value_loss_coef : Weight of value loss.\n    entropy_coef : Weight of entropy (encouraging) loss.\n    entropy_method_name : Name of Distr's entropy method name. Default is `entropy`,\n                          but we might use `conditional_entropy` for `SequentialDistr`.\n    \"\"\"\n\n    def __init__(\n        self,\n        value_loss_coef,\n        entropy_coef,\n        acktr=False,\n        entropy_method_name: str = \"entropy\",\n        *args,\n        **kwargs,\n    ):\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(*args, **kwargs)\n        self.acktr = acktr\n        self.loss_key = \"a2c_total\" if not acktr else \"aktr_total\"\n\n        self.value_loss_coef = value_loss_coef\n        self.entropy_coef = entropy_coef\n        self.entropy_method_name = entropy_method_name\n\n    def loss_per_step(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n    ) -> Dict[str, Tuple[torch.Tensor, Optional[float]]]:\n        actions = cast(torch.LongTensor, batch[\"actions\"])\n        values = actor_critic_output.values\n        action_log_probs = actor_critic_output.distributions.log_prob(actions)\n        action_log_probs = action_log_probs.view(\n            action_log_probs.shape\n            + (1,)\n            * (\n                len(cast(torch.Tensor, batch[\"adv_targ\"]).shape)\n                - len(action_log_probs.shape)\n            )\n        )\n\n        dist_entropy: torch.FloatTensor = getattr(\n            actor_critic_output.distributions, self.entropy_method_name\n        )()\n        dist_entropy = dist_entropy.view(\n            dist_entropy.shape\n            + ((1,) * (len(action_log_probs.shape) - len(dist_entropy.shape)))\n        )\n\n        value_loss = 0.5 * (cast(torch.FloatTensor, batch[\"returns\"]) - values).pow(2)\n\n        # TODO: Decided not to use normalized advantages here,\n        #   is this correct? (it's how it's done in Kostrikov's)\n        action_loss = -(\n            cast(torch.FloatTensor, batch[\"adv_targ\"]).detach() * action_log_probs\n        )\n\n        if self.acktr:\n            # TODO: Currently acktr doesn't really work because of this natural gradient stuff\n            #   that we should figure out how to integrate properly.\n            get_logger().warning(\"acktr is only partially supported.\")\n\n        return {\n            \"value\": (value_loss, self.value_loss_coef),\n            \"action\": (action_loss, None),\n            \"entropy\": (dist_entropy.mul_(-1.0), self.entropy_coef),  # type: ignore\n        }\n\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n        *args,\n        **kwargs,\n    ):\n        losses_per_step = self.loss_per_step(\n            step_count=step_count,\n            batch=batch,\n            actor_critic_output=actor_critic_output,\n        )\n        losses = {\n            key: (loss.mean(), weight)\n            for (key, (loss, weight)) in losses_per_step.items()\n        }\n\n        total_loss = cast(\n            torch.Tensor,\n            sum(\n                loss * weight if weight is not None else loss\n                for loss, weight in losses.values()\n            ),\n        )\n\n        return (\n            total_loss,\n            {\n                self.loss_key: total_loss.item(),\n                **{key: loss.item() for key, (loss, _) in losses.items()},\n            },\n        )\n\n\nclass A2C(A2CACKTR):\n    \"\"\"A2C Loss.\"\"\"\n\n    def __init__(\n        self,\n        value_loss_coef,\n        entropy_coef,\n        entropy_method_name: str = \"entropy\",\n        *args,\n        **kwargs,\n    ):\n        super().__init__(\n            value_loss_coef=value_loss_coef,\n            entropy_coef=entropy_coef,\n            acktr=False,\n            entropy_method_name=entropy_method_name,\n            *args,\n            **kwargs,\n        )\n\n\nclass ACKTR(A2CACKTR):\n    \"\"\"ACKTR Loss.\n\n    This code is not supported as it currently lacks an implementation\n    for recurrent models.\n    \"\"\"\n\n    def __init__(\n        self,\n        value_loss_coef,\n        entropy_coef,\n        entropy_method_name: str = \"entropy\",\n        *args,\n        **kwargs,\n    ):\n        super().__init__(\n            value_loss_coef=value_loss_coef,\n            entropy_coef=entropy_coef,\n            acktr=True,\n            entropy_method_name=entropy_method_name,\n            *args,\n            **kwargs,\n        )\n\n\nA2CConfig = dict(\n    value_loss_coef=0.5,\n    entropy_coef=0.01,\n)\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/losses/abstract_loss.py",
    "content": "\"\"\"Defining abstract loss classes for actor critic models.\"\"\"\n\nimport abc\nfrom typing import Dict, Tuple, Union\n\nimport torch\n\nfrom allenact.algorithms.onpolicy_sync.policy import ObservationType\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import Loss, ActorCriticOutput\n\n\nclass AbstractActorCriticLoss(Loss):\n    \"\"\"Abstract class representing a loss function used to train an\n    ActorCriticModel.\"\"\"\n\n    # noinspection PyMethodOverriding\n    @abc.abstractmethod\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n        *args,\n        **kwargs,\n    ) -> Union[\n        Tuple[torch.FloatTensor, Dict[str, float]],\n        Tuple[torch.FloatTensor, Dict[str, float], Dict[str, float]],\n    ]:\n        \"\"\"Computes the loss.\n\n        # Parameters\n\n        batch : A batch of data corresponding to the information collected when rolling out (possibly many) agents\n            over a fixed number of steps. In particular this batch should have the same format as that returned by\n            `RolloutStorage.batched_experience_generator`.\n        actor_critic_output : The output of calling an ActorCriticModel on the observations in `batch`.\n        args : Extra args.\n        kwargs : Extra kwargs.\n\n        # Returns\n\n        A (0-dimensional) torch.FloatTensor corresponding to the computed loss. `.backward()` will be called on this\n        tensor in order to compute a gradient update to the ActorCriticModel's parameters.\n        A Dict[str, float] with scalar values corresponding to sub-losses.\n        An optional Dict[str, float] with scalar values corresponding to extra info to be processed per epoch and\n        combined across epochs by the engine.\n        \"\"\"\n        # TODO: The above documentation is missing what the batch dimensions are.\n\n        raise NotImplementedError()\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.py",
    "content": "import functools\nfrom typing import Dict, cast, Sequence, Set\n\nimport torch\n\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n)\nfrom allenact.algorithms.onpolicy_sync.policy import ObservationType\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput\n\n\nclass GroupedActionImitation(AbstractActorCriticLoss):\n    def __init__(\n        self, nactions: int, action_groups: Sequence[Set[int]], *args, **kwargs\n    ):\n        super().__init__(*args, **kwargs)\n\n        assert (\n            sum(len(ag) for ag in action_groups) == nactions\n            and len(functools.reduce(lambda x, y: x | y, action_groups)) == nactions\n        ), f\"`action_groups` (==`{action_groups}`) must be a partition of `[0, 1, 2, ..., nactions - 1]`\"\n\n        self.nactions = nactions\n        self.action_groups_mask = torch.FloatTensor(\n            [\n                [i in action_group for i in range(nactions)]\n                for action_group in action_groups\n            ]\n            + [[1] * nactions]  # type:ignore\n        )\n\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n        *args,\n        **kwargs,\n    ):\n        observations = cast(Dict[str, torch.Tensor], batch[\"observations\"])\n\n        assert \"expert_group_action\" in observations\n\n        expert_group_actions = observations[\"expert_group_action\"]\n\n        # expert_group_actions = expert_group_actions + (expert_group_actions == -1).long() * (\n        #     1 + self.action_groups_mask.shape[0]\n        # )\n\n        if self.action_groups_mask.get_device() != expert_group_actions.get_device():\n            self.action_groups_mask = cast(\n                torch.FloatTensor,\n                self.action_groups_mask.cuda(expert_group_actions.get_device()),\n            )\n\n        expert_group_actions_reshaped = expert_group_actions.view(-1, 1)\n\n        expert_group_actions_mask = self.action_groups_mask[\n            expert_group_actions_reshaped\n        ]\n\n        probs_tensor = actor_critic_output.distributions.probs_tensor\n        expert_group_actions_mask = expert_group_actions_mask.view(probs_tensor.shape)\n\n        total_loss = -(\n            torch.log((probs_tensor * expert_group_actions_mask).sum(-1))\n        ).mean()\n\n        return total_loss, {\n            \"grouped_action_cross_entropy\": total_loss.item(),\n        }\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/losses/imitation.py",
    "content": "\"\"\"Defining imitation losses for actor critic type models.\"\"\"\n\nfrom collections import OrderedDict\nfrom typing import Dict, cast, Optional, Union\n\nimport torch\n\nimport allenact.utils.spaces_utils as su\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n    ObservationType,\n)\nfrom allenact.base_abstractions.distributions import (\n    Distr,\n    CategoricalDistr,\n    SequentialDistr,\n    ConditionalDistr,\n)\nfrom allenact.base_abstractions.misc import ActorCriticOutput\nfrom allenact.base_abstractions.sensor import AbstractExpertSensor\n\n\nclass Imitation(AbstractActorCriticLoss):\n    \"\"\"Expert imitation loss.\"\"\"\n\n    def __init__(\n        self, expert_sensor: Optional[AbstractExpertSensor] = None, *args, **kwargs\n    ):\n        super().__init__(*args, **kwargs)\n\n        self.expert_sensor = expert_sensor\n\n    @staticmethod\n    def group_loss(\n        distribution: Union[CategoricalDistr, ConditionalDistr],\n        expert_actions: torch.Tensor,\n        expert_actions_masks: torch.Tensor,\n    ):\n        assert isinstance(distribution, CategoricalDistr) or (\n            isinstance(distribution, ConditionalDistr)\n            and isinstance(distribution.distr, CategoricalDistr)\n        ), \"This implementation only supports (groups of) `CategoricalDistr`\"\n\n        expert_successes = expert_actions_masks.sum()\n\n        log_probs = distribution.log_prob(cast(torch.LongTensor, expert_actions))\n        assert (\n            log_probs.shape[: len(expert_actions_masks.shape)]\n            == expert_actions_masks.shape\n        )\n\n        # Add dimensions to `expert_actions_masks` on the right to allow for masking\n        # if necessary.\n        len_diff = len(log_probs.shape) - len(expert_actions_masks.shape)\n        assert len_diff >= 0\n        expert_actions_masks = expert_actions_masks.view(\n            *expert_actions_masks.shape, *((1,) * len_diff)\n        )\n\n        group_loss = -(expert_actions_masks * log_probs).sum() / torch.clamp(\n            expert_successes, min=1\n        )\n\n        return group_loss, expert_successes\n\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[Distr],\n        *args,\n        **kwargs,\n    ):\n        \"\"\"Computes the imitation loss.\n\n        # Parameters\n\n        batch : A batch of data corresponding to the information collected when rolling out (possibly many) agents\n            over a fixed number of steps. In particular this batch should have the same format as that returned by\n            `RolloutStorage.batched_experience_generator`.\n            Here `batch[\"observations\"]` must contain `\"expert_action\"` observations\n            or `\"expert_policy\"` observations. See `ExpertActionSensor` (or `ExpertPolicySensor`) for an example of\n            a sensor producing such observations.\n        actor_critic_output : The output of calling an ActorCriticModel on the observations in `batch`.\n        args : Extra args. Ignored.\n        kwargs : Extra kwargs. Ignored.\n\n        # Returns\n\n        A (0-dimensional) torch.FloatTensor corresponding to the computed loss. `.backward()` will be called on this\n        tensor in order to compute a gradient update to the ActorCriticModel's parameters.\n        \"\"\"\n        observations = cast(Dict[str, torch.Tensor], batch[\"observations\"])\n\n        losses = OrderedDict()\n\n        should_report_loss = False\n\n        if \"expert_action\" in observations:\n            if self.expert_sensor is None or not self.expert_sensor.use_groups:\n                expert_actions_and_mask = observations[\"expert_action\"]\n\n                assert expert_actions_and_mask.shape[-1] == 2\n                expert_actions_and_mask_reshaped = expert_actions_and_mask.view(-1, 2)\n\n                expert_actions = expert_actions_and_mask_reshaped[:, 0].view(\n                    *expert_actions_and_mask.shape[:-1], 1\n                )\n                expert_actions_masks = (\n                    expert_actions_and_mask_reshaped[:, 1]\n                    .float()\n                    .view(*expert_actions_and_mask.shape[:-1], 1)\n                )\n\n                total_loss, expert_successes = self.group_loss(\n                    cast(CategoricalDistr, actor_critic_output.distributions),\n                    expert_actions,\n                    expert_actions_masks,\n                )\n\n                should_report_loss = expert_successes.item() != 0\n            else:\n                expert_actions = su.unflatten(\n                    self.expert_sensor.observation_space, observations[\"expert_action\"]\n                )\n\n                total_loss = 0\n\n                ready_actions = OrderedDict()\n\n                for group_name, cd in zip(\n                    self.expert_sensor.group_spaces,\n                    cast(\n                        SequentialDistr, actor_critic_output.distributions\n                    ).conditional_distrs,\n                ):\n                    assert group_name == cd.action_group_name\n\n                    cd.reset()\n                    cd.condition_on_input(**ready_actions)\n\n                    expert_action = expert_actions[group_name][\n                        AbstractExpertSensor.ACTION_POLICY_LABEL\n                    ]\n                    expert_action_masks = expert_actions[group_name][\n                        AbstractExpertSensor.EXPERT_SUCCESS_LABEL\n                    ]\n\n                    ready_actions[group_name] = expert_action\n\n                    current_loss, expert_successes = self.group_loss(\n                        cd,\n                        expert_action,\n                        expert_action_masks,\n                    )\n\n                    should_report_loss = (\n                        expert_successes.item() != 0 or should_report_loss\n                    )\n\n                    cd.reset()\n\n                    if expert_successes.item() != 0:\n                        losses[group_name + \"_cross_entropy\"] = current_loss.item()\n                        total_loss = total_loss + current_loss\n        elif \"expert_policy\" in observations:\n            if self.expert_sensor is None or not self.expert_sensor.use_groups:\n                assert isinstance(\n                    actor_critic_output.distributions, CategoricalDistr\n                ), \"This implementation currently only supports `CategoricalDistr`\"\n\n                expert_policies = cast(Dict[str, torch.Tensor], batch[\"observations\"])[\n                    \"expert_policy\"\n                ][..., :-1]\n                expert_actions_masks = cast(\n                    Dict[str, torch.Tensor], batch[\"observations\"]\n                )[\"expert_policy\"][..., -1:]\n\n                expert_successes = expert_actions_masks.sum()\n                if expert_successes.item() > 0:\n                    should_report_loss = True\n\n                log_probs = cast(\n                    CategoricalDistr, actor_critic_output.distributions\n                ).log_probs_tensor\n\n                # Add dimensions to `expert_actions_masks` on the right to allow for masking\n                # if necessary.\n                len_diff = len(log_probs.shape) - len(expert_actions_masks.shape)\n                assert len_diff >= 0\n                expert_actions_masks = expert_actions_masks.view(\n                    *expert_actions_masks.shape, *((1,) * len_diff)\n                )\n\n                total_loss = (\n                    -(log_probs * expert_policies) * expert_actions_masks\n                ).sum() / torch.clamp(expert_successes, min=1)\n            else:\n                raise NotImplementedError(\n                    \"This implementation currently only supports `CategoricalDistr`\"\n                )\n        else:\n            raise NotImplementedError(\n                \"Imitation loss requires either `expert_action` or `expert_policy`\"\n                \" sensor to be active.\"\n            )\n        return (\n            total_loss,\n            (\n                {\"expert_cross_entropy\": total_loss.item(), **losses}\n                if should_report_loss\n                else {}\n            ),\n        )\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/losses/ppo.py",
    "content": "\"\"\"Defining the PPO loss for actor critic type models.\"\"\"\n\nfrom typing import Dict, Optional, Callable, cast, Tuple\n\nimport torch\n\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n    ObservationType,\n)\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput\n\n\nclass PPO(AbstractActorCriticLoss):\n    \"\"\"Implementation of the Proximal Policy Optimization loss.\n\n    # Attributes\n\n    clip_param : The clipping parameter to use.\n    value_loss_coef : Weight of the value loss.\n    entropy_coef : Weight of the entropy (encouraging) loss.\n    use_clipped_value_loss : Whether or not to also clip the value loss.\n    clip_decay : Callable for clip param decay factor (function of the current number of steps)\n    entropy_method_name : Name of Distr's entropy method name. Default is `entropy`,\n                          but we might use `conditional_entropy` for `SequentialDistr`\n    show_ratios : If True, adds tracking for the PPO ratio (linear, clamped, and used) in each\n                  epoch to be logged by the engine.\n    normalize_advantage: Whether or not to use normalized advantage. Default is True.\n    \"\"\"\n\n    def __init__(\n        self,\n        clip_param: float,\n        value_loss_coef: float,\n        entropy_coef: float,\n        use_clipped_value_loss=True,\n        clip_decay: Optional[Callable[[int], float]] = None,\n        entropy_method_name: str = \"entropy\",\n        normalize_advantage: bool = True,\n        show_ratios: bool = False,\n        *args,\n        **kwargs\n    ):\n        \"\"\"Initializer.\n\n        See the class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(*args, **kwargs)\n        self.clip_param = clip_param\n        self.value_loss_coef = value_loss_coef\n        self.entropy_coef = entropy_coef\n        self.use_clipped_value_loss = use_clipped_value_loss\n        self.clip_decay = clip_decay if clip_decay is not None else (lambda x: 1.0)\n        self.entropy_method_name = entropy_method_name\n        self.show_ratios = show_ratios\n        if normalize_advantage:\n            self.adv_key = \"norm_adv_targ\"\n        else:\n            self.adv_key = \"adv_targ\"\n\n    def loss_per_step(\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n    ) -> Tuple[\n        Dict[str, Tuple[torch.Tensor, Optional[float]]], Dict[str, torch.Tensor]\n    ]:  # TODO tuple output\n\n        actions = cast(torch.LongTensor, batch[\"actions\"])\n        values = actor_critic_output.values\n\n        action_log_probs = actor_critic_output.distributions.log_prob(actions)\n        dist_entropy: torch.FloatTensor = getattr(\n            actor_critic_output.distributions, self.entropy_method_name\n        )()\n\n        def add_trailing_dims(t: torch.Tensor):\n            assert len(t.shape) <= len(batch[self.adv_key].shape)\n            return t.view(\n                t.shape + ((1,) * (len(batch[self.adv_key].shape) - len(t.shape)))\n            )\n\n        dist_entropy = add_trailing_dims(dist_entropy)\n\n        clip_param = self.clip_param * self.clip_decay(step_count)\n\n        ratio = torch.exp(action_log_probs - batch[\"old_action_log_probs\"])\n        ratio = add_trailing_dims(ratio)\n        clamped_ratio = torch.clamp(ratio, 1.0 - clip_param, 1.0 + clip_param)\n\n        surr1 = ratio * batch[self.adv_key]\n        surr2 = clamped_ratio * batch[self.adv_key]\n\n        use_clamped = surr2 < surr1\n        action_loss = -torch.where(cast(torch.Tensor, use_clamped), surr2, surr1)\n\n        if self.use_clipped_value_loss:\n            value_pred_clipped = batch[\"values\"] + (values - batch[\"values\"]).clamp(\n                -clip_param, clip_param\n            )\n            value_losses = (values - batch[\"returns\"]).pow(2)\n            value_losses_clipped = (value_pred_clipped - batch[\"returns\"]).pow(2)\n            value_loss = 0.5 * torch.max(value_losses, value_losses_clipped)\n        else:\n            value_loss = 0.5 * (cast(torch.FloatTensor, batch[\"returns\"]) - values).pow(\n                2\n            )\n\n        # noinspection PyUnresolvedReferences\n        return (\n            {\n                \"value\": (value_loss, self.value_loss_coef),\n                \"action\": (action_loss, None),\n                \"entropy\": (dist_entropy.mul_(-1.0), self.entropy_coef),  # type: ignore\n            },\n            (\n                {\n                    \"ratio\": ratio,\n                    \"ratio_clamped\": clamped_ratio,\n                    \"ratio_used\": torch.where(\n                        cast(torch.Tensor, use_clamped), clamped_ratio, ratio\n                    ),\n                }\n                if self.show_ratios\n                else {}\n            ),\n        )\n\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n        *args,\n        **kwargs\n    ):\n        losses_per_step, ratio_info = self.loss_per_step(\n            step_count=step_count,\n            batch=batch,\n            actor_critic_output=actor_critic_output,\n        )\n        losses = {\n            key: (loss.mean(), weight)\n            for (key, (loss, weight)) in losses_per_step.items()\n        }\n\n        total_loss = sum(\n            loss * weight if weight is not None else loss\n            for loss, weight in losses.values()\n        )\n\n        result = (\n            total_loss,\n            {\n                \"ppo_total\": cast(torch.Tensor, total_loss).item(),\n                **{key: loss.item() for key, (loss, _) in losses.items()},\n            },\n            {key: float(value.mean().item()) for key, value in ratio_info.items()},\n        )\n\n        return result if self.show_ratios else result[:2]\n\n\nclass PPOValue(AbstractActorCriticLoss):\n    \"\"\"Implementation of the Proximal Policy Optimization loss.\n\n    # Attributes\n\n    clip_param : The clipping parameter to use.\n    use_clipped_value_loss : Whether or not to also clip the value loss.\n    \"\"\"\n\n    def __init__(\n        self,\n        clip_param: float,\n        use_clipped_value_loss=True,\n        clip_decay: Optional[Callable[[int], float]] = None,\n        *args,\n        **kwargs\n    ):\n        \"\"\"Initializer.\n\n        See the class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(*args, **kwargs)\n        self.clip_param = clip_param\n        self.use_clipped_value_loss = use_clipped_value_loss\n        self.clip_decay = clip_decay if clip_decay is not None else (lambda x: 1.0)\n\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n        *args,\n        **kwargs\n    ):\n        values = actor_critic_output.values\n        clip_param = self.clip_param * self.clip_decay(step_count)\n\n        if self.use_clipped_value_loss:\n            value_pred_clipped = batch[\"values\"] + (values - batch[\"values\"]).clamp(\n                -clip_param, clip_param\n            )\n            value_losses = (values - batch[\"returns\"]).pow(2)\n            value_losses_clipped = (value_pred_clipped - batch[\"returns\"]).pow(2)\n            value_loss = 0.5 * torch.max(value_losses, value_losses_clipped).mean()\n        else:\n            value_loss = (\n                0.5 * (cast(torch.FloatTensor, batch[\"returns\"]) - values).pow(2).mean()\n            )\n\n        return (\n            value_loss,\n            {\n                \"value\": value_loss.item(),\n            },\n        )\n\n\nPPOConfig = dict(clip_param=0.1, value_loss_coef=0.5, entropy_coef=0.01)\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/misc.py",
    "content": "from enum import Enum\nfrom typing import Dict, Any, Optional\n\nimport attr\n\n\nclass TrackingInfoType(Enum):\n    LOSS = \"loss\"\n    TEACHER_FORCING = \"teacher_forcing\"\n    UPDATE_INFO = \"update_info\"\n\n\n@attr.s(kw_only=True)\nclass TrackingInfo:\n    type: TrackingInfoType = attr.ib()\n    info: Dict[str, Any] = attr.ib()\n    n: int = attr.ib()\n    storage_uuid: Optional[str] = attr.ib()\n    stage_component_uuid: Optional[str] = attr.ib()\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/policy.py",
    "content": "#!/usr/bin/env python3\n\n# Copyright (c) Facebook, Inc. and its affiliates.\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\nimport abc\nfrom collections import OrderedDict\nfrom typing import TypeVar, Generic, Tuple, Optional, Union, Dict, List, Any\n\nimport gym\nimport torch\nfrom gym.spaces.dict import Dict as SpaceDict\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput, Memory\n\nDistributionType = TypeVar(\"DistributionType\")\n\nMemoryDimType = Tuple[str, Optional[int]]\nMemoryShapeType = Tuple[MemoryDimType, ...]\nMemorySpecType = Tuple[MemoryShapeType, torch.dtype]\nFullMemorySpecType = Dict[str, MemorySpecType]\n\nObservationType = Dict[str, Union[torch.Tensor, Dict[str, Any]]]\nActionType = Union[torch.Tensor, OrderedDict, Tuple, int]\n\n\nclass ActorCriticModel(Generic[DistributionType], nn.Module):\n    \"\"\"Abstract class defining a deep (recurrent) actor critic agent.\n\n    When defining a new agent, you should subclass this class and implement the abstract methods.\n\n    # Attributes\n\n    action_space : The space of actions available to the agent. This is of type `gym.spaces.Space`.\n    observation_space: The observation space expected by the agent. This is of type `gym.spaces.dict`.\n    \"\"\"\n\n    def __init__(self, action_space: gym.Space, observation_space: SpaceDict):\n        \"\"\"Initializer.\n\n        # Parameters\n\n        action_space : The space of actions available to the agent.\n        observation_space: The observation space expected by the agent.\n        \"\"\"\n        super().__init__()\n        self.action_space = action_space\n        self.observation_space = observation_space\n        self.memory_spec: Optional[List[Optional[FullMemorySpecType]]] = None\n\n    @property\n    def recurrent_memory_specification(self) -> Optional[FullMemorySpecType]:\n        \"\"\"The memory specification for the `ActorCriticModel`. See docs for\n        `_recurrent_memory_shape`\n\n        # Returns\n\n        The memory specification from `_recurrent_memory_shape`.\n        \"\"\"\n        if self.memory_spec is None:\n            self.memory_spec = [self._recurrent_memory_specification()]\n\n            spec = self.memory_spec[0]\n\n            if spec is None:\n                return None\n\n            for key in spec:\n                dims, _ = spec[key]\n                dim_names = [d[0] for d in dims]\n\n                assert (\n                    \"step\" not in dim_names\n                ), \"`step` is automatically added and cannot be reused\"\n\n                assert \"sampler\" in dim_names, \"`sampler` dim must be defined\"\n\n        return self.memory_spec[0]\n\n    @abc.abstractmethod\n    def _recurrent_memory_specification(self) -> Optional[FullMemorySpecType]:\n        \"\"\"Implementation of memory specification for the `ActorCriticModel`.\n\n        # Returns\n\n        If None, it indicates the model is memory-less.\n        Otherwise, it is a one-level dictionary (a map) with string keys (memory type identification) and\n        tuple values (memory type specification). Each specification tuple contains:\n        1. Memory type named shape, e.g.\n        `((\"layer\", 1), (\"sampler\", None), (\"agent\", 2), (\"hidden\", 32))`\n        for a two-agent GRU memory, where\n        the `sampler` dimension placeholder *always* precedes the optional `agent` dimension;\n        the optional `agent` dimension has the number of agents in the model and is *always* the one after\n        `sampler` if present;\n        and `layer` and `hidden` correspond to the standard RNN hidden state parametrization.\n        2. The data type, e.g. `torch.float32`.\n\n        The `sampler` dimension placeholder is mandatory for all memories.\n\n        For a single-agent ActorCritic model it is often more convenient to skip the agent dimension, e.g.\n        `((\"layer\", 1), (\"sampler\", None), (\"hidden\", 32))` for a GRU memory.\n        \"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: ActionType,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        \"\"\"Transforms input observations (& previous hidden state) into action\n        probabilities and the state value.\n\n        # Parameters\n\n        observations : Multi-level map from key strings to tensors of shape [steps, samplers, (agents,) ...] with the\n                       current observations.\n        memory : `Memory` object with recurrent memory. The shape of each tensor is determined by the corresponding\n                 entry in `_recurrent_memory_specification`.\n        prev_actions : ActionType with tensors of shape [steps, samplers, ...] with the previous actions.\n        masks : tensor of shape [steps, samplers, agents, 1] with zeros indicating steps where a new episode/task\n                starts.\n\n        # Returns\n\n        A tuple whose first element is an object of class ActorCriticOutput which stores\n        the agents' probability distribution over possible actions (shape [steps, samplers, ...]),\n        the agents' value for the state (shape [steps, samplers, ..., 1]), and any extra information needed for\n        loss computations. The second element is an optional `Memory`, which is only used in models with recurrent\n        memory.\n        \"\"\"\n        raise NotImplementedError()\n\n\nclass LinearActorCriticHead(nn.Module):\n    def __init__(self, input_size: int, num_actions: int):\n        super().__init__()\n        self.input_size = input_size\n        self.num_actions = num_actions\n        self.actor_and_critic = nn.Linear(input_size, 1 + num_actions)\n\n        nn.init.orthogonal_(self.actor_and_critic.weight)\n        nn.init.constant_(self.actor_and_critic.bias, 0)\n\n    def forward(self, x) -> Tuple[CategoricalDistr, torch.Tensor]:\n        out = self.actor_and_critic(x)\n\n        logits = out[..., :-1]\n        values = out[..., -1:]\n        # noinspection PyArgumentList\n        return (\n            # logits are [step, sampler, ...]\n            CategoricalDistr(logits=logits),\n            # values are [step, sampler, flattened]\n            values.view(*values.shape[:2], -1),\n        )\n\n\nclass LinearCriticHead(nn.Module):\n    def __init__(self, input_size: int):\n        super().__init__()\n        self.fc = nn.Linear(input_size, 1)\n        nn.init.orthogonal_(self.fc.weight)\n        nn.init.constant_(self.fc.bias, 0)\n\n    def forward(self, x):\n        return self.fc(x).view(*x.shape[:2], -1)  # [steps, samplers, flattened]\n\n\nclass LinearActorHead(nn.Module):\n    def __init__(self, num_inputs: int, num_outputs: int):\n        super().__init__()\n\n        self.linear = nn.Linear(num_inputs, num_outputs)\n        nn.init.orthogonal_(self.linear.weight, gain=0.01)\n        nn.init.constant_(self.linear.bias, 0)\n\n    def forward(self, x: torch.FloatTensor):  # type: ignore\n        x = self.linear(x)  # type:ignore\n\n        # noinspection PyArgumentList\n        return CategoricalDistr(logits=x)  # logits are [step, sampler, ...]\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/runner.py",
    "content": "\"\"\"Defines the reinforcement learning `OnPolicyRunner`.\"\"\"\n\nimport copy\nimport enum\nimport glob\nimport importlib.util\nimport inspect\nimport itertools\nimport json\nimport math\nimport os\nimport pathlib\nimport queue\nimport random\nimport signal\nimport subprocess\nimport sys\nimport time\nimport traceback\nfrom collections import defaultdict\nfrom multiprocessing.context import BaseContext\nfrom multiprocessing.process import BaseProcess\nfrom typing import Any, Dict, List, Optional, Sequence, Tuple, Union, Set\n\nimport filelock\nimport numpy as np\nimport torch\nimport torch.multiprocessing as mp\nfrom setproctitle import setproctitle as ptitle\nfrom torch.distributions.utils import lazy_property\n\nfrom allenact.algorithms.onpolicy_sync.engine import (\n    TEST_MODE_STR,\n    TRAIN_MODE_STR,\n    VALID_MODE_STR,\n    OnPolicyInference,\n    OnPolicyRLEngine,\n    OnPolicyTrainer,\n)\nfrom allenact.base_abstractions.callbacks import Callback\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.utils.experiment_utils import (\n    LoggingPackage,\n    ScalarMeanTracker,\n    set_deterministic_cudnn,\n    set_seed,\n    download_checkpoint_from_wandb,\n)\nfrom allenact.utils.misc_utils import (\n    NumpyJSONEncoder,\n    all_equal,\n    get_git_diff_of_project,\n)\nfrom allenact.utils.model_utils import md5_hash_of_state_dict\nfrom allenact.utils.system import find_free_port, get_logger\nfrom allenact.utils.tensor_utils import SummaryWriter\nfrom allenact.utils.viz_utils import VizSuite\n\nCONFIG_KWARGS_STR = \"__CONFIG_KWARGS__\"\n\n\nclass SaveDirFormat(enum.Enum):\n    \"\"\"Directory formats that can be used when saving tensorboard logs,\n    checkpoints, etc.\n\n    during training/evaluation.\n    FLAT: the first-level directories are logs, checkpoints, metrics, etc; the second-level are time strings of each experiment\n    NESTED: the opposite to FLAT.\n    \"\"\"\n\n    FLAT = \"FLAT\"\n    NESTED = \"NESTED\"\n\n\n# Has results queue (aggregated per trainer), checkpoints queue and mp context\n# Instantiates train, validate, and test workers\n# Logging\n# Saves configs, makes folder for trainer models\nclass OnPolicyRunner(object):\n    def __init__(\n        self,\n        config: ExperimentConfig,\n        output_dir: str,\n        loaded_config_src_files: Optional[Dict[str, str]],\n        seed: Optional[int] = None,\n        mode: str = \"train\",\n        deterministic_cudnn: bool = False,\n        deterministic_agents: bool = False,\n        mp_ctx: Optional[BaseContext] = None,\n        multiprocessing_start_method: str = \"default\",\n        extra_tag: str = \"\",\n        disable_tensorboard: bool = False,\n        disable_config_saving: bool = False,\n        distributed_ip_and_port: str = \"127.0.0.1:0\",\n        distributed_preemption_threshold: float = 0.7,\n        machine_id: int = 0,\n        save_dir_fmt: SaveDirFormat = SaveDirFormat.FLAT,\n        callbacks_paths: Optional[str] = None,\n    ):\n        self.config = config\n        self.output_dir = output_dir\n        self.loaded_config_src_files = loaded_config_src_files\n        self.seed = seed if seed is not None else random.randint(0, 2**31 - 1)\n        self.deterministic_cudnn = deterministic_cudnn\n        self.distributed_preemption_threshold = distributed_preemption_threshold\n        if multiprocessing_start_method == \"default\":\n            if torch.cuda.is_available():\n                multiprocessing_start_method = \"forkserver\"\n            else:\n                # Spawn seems to play nicer with cpus and debugging\n                multiprocessing_start_method = \"spawn\"\n        self.mp_ctx = self.init_context(mp_ctx, multiprocessing_start_method)\n        self.extra_tag = extra_tag\n        self.mode = mode.lower().strip()\n        self.visualizer: Optional[VizSuite] = None\n        self.deterministic_agents = deterministic_agents\n        self.disable_tensorboard = disable_tensorboard\n        self.disable_config_saving = disable_config_saving\n\n        assert self.mode in [\n            TRAIN_MODE_STR,\n            TEST_MODE_STR,\n        ], \"Only 'train' and 'test' modes supported in runner\"\n\n        if self.deterministic_cudnn:\n            set_deterministic_cudnn()\n\n        set_seed(self.seed)\n\n        self.queues: Optional[Dict[str, mp.Queue]] = None\n\n        self.processes: Dict[str, List[Union[BaseProcess, mp.Process]]] = defaultdict(\n            list\n        )\n\n        self.current_checkpoint = None\n\n        self._local_start_time_str: Optional[str] = None\n\n        self._is_closed: bool = False\n\n        self._collect_valid_results: bool = False\n\n        self.distributed_ip_and_port = distributed_ip_and_port\n        self.machine_id = machine_id\n\n        self.save_dir_fmt = save_dir_fmt\n\n        self.callbacks_paths = callbacks_paths\n\n    @lazy_property\n    def callbacks(self):\n        return self.setup_callback_classes(self.callbacks_paths)\n\n    @property\n    def local_start_time_str(self) -> str:\n        if self._local_start_time_str is None:\n            raise RuntimeError(\n                \"Local start time string does not exist as neither `start_train()` or `start_test()`\"\n                \" has been called on this runner.\"\n            )\n        return self._local_start_time_str\n\n    @property\n    def running_validation(self):\n        pipeline = self.config.training_pipeline()\n        return (\n            sum(\n                MachineParams.instance_from(\n                    self.config.machine_params(VALID_MODE_STR)\n                ).nprocesses\n            )\n            > 0\n            or (\n                pipeline.rollout_storage_uuid is None\n                and len(pipeline.valid_pipeline_stage.loss_names) > 0\n            )\n        ) and self.machine_id == 0\n\n    @staticmethod\n    def init_context(\n        mp_ctx: Optional[BaseContext] = None,\n        multiprocessing_start_method: str = \"forkserver\",\n        valid_start_methods: Tuple[str, ...] = (\"forkserver\", \"spawn\", \"fork\"),\n    ):\n        if mp_ctx is None:\n            assert multiprocessing_start_method in valid_start_methods, (\n                f\"multiprocessing_start_method must be one of {valid_start_methods}.\"\n                f\" Got '{multiprocessing_start_method}'\"\n            )\n\n            mp_ctx = mp.get_context(multiprocessing_start_method)\n        elif multiprocessing_start_method != mp_ctx.get_start_method():\n            get_logger().warning(\n                f\"ignoring multiprocessing_start_method '{multiprocessing_start_method}'\"\n                f\" and using given context with '{mp_ctx.get_start_method()}'\"\n            )\n\n        return mp_ctx\n\n    def setup_callback_classes(self, callbacks: Optional[str]) -> Set[Callback]:\n        \"\"\"Get a list of Callback classes from a comma-separated list of files,\n        paths, and/or functions.\n\n        After separating the `callbacks` into a list of strings, each string should either\n        be a:\n        1. Name of a function defined on the experiment config that, when called, returns an\n           object with of type `Callback`.\n        2. Path to a python file containing a single class that inherits from `Callback`.\n        3. Module path (e.g. `path.to.module`) where this module contains a single class that\n            inherits from `Callback`.\n        \"\"\"\n        if callbacks == \"\" or callbacks is None:\n            return set()\n\n        setup_dict = dict(\n            name=f\"{self.experiment_name}/{self.local_start_time_str}\",\n            config=self.config,\n            mode=self.mode,\n        )\n\n        callback_objects = set()\n        files = callbacks.split(\",\")\n        for filename in files:\n            # Check if the `filename` is a function on the config\n            if not any(k in filename for k in [\".\", \"/\"]):\n                callback_func = getattr(self.config, filename, None)\n                if callback_func is not None:\n                    callback = callback_func()\n                    callback.setup(**setup_dict)\n                    callback_objects.add(callback)\n                    continue\n\n            # Otherwise find the Callback class in the file or module\n            module_path = filename.replace(\"/\", \".\")\n            if module_path.endswith(\".py\"):\n                module_path = module_path[:-3]\n            module = importlib.import_module(module_path)\n            classes = inspect.getmembers(module, inspect.isclass)\n\n            callback_classes = [\n                mod_class[1]\n                for mod_class in classes\n                if issubclass(mod_class[1], Callback)\n            ]\n\n            assert callback_classes == 1, (\n                f\"Expected a single callback class in {filename}, but found {len(callback_classes)}.\"\n                f\" These classes were found: {callback_classes}.\"\n            )\n\n            for mod_class in callback_classes:\n                # NOTE: initialize the callback class\n                callback = mod_class[1]()\n                callback.setup(**setup_dict)\n                callback_objects.add(callback)\n\n        return callback_objects\n\n    def _acquire_unique_local_start_time_string(self) -> str:\n        \"\"\"Creates a (unique) local start time string for this experiment.\n\n        Ensures through file locks that the local start time string\n        produced is unique. This implies that, if one has many\n        experiments starting in parallel, at most one will be started\n        every second (as the local start time string only records the\n        time up to the current second).\n        \"\"\"\n        os.makedirs(self.output_dir, exist_ok=True)\n        start_time_string_lock_path = os.path.abspath(\n            os.path.join(self.output_dir, \".allenact_start_time_string.lock\")\n        )\n        try:\n            with filelock.FileLock(start_time_string_lock_path, timeout=60):\n                last_start_time_string_path = os.path.join(\n                    self.output_dir, \".allenact_last_start_time_string\"\n                )\n                pathlib.Path(last_start_time_string_path).touch()\n\n                with open(last_start_time_string_path, \"r\") as f:\n                    last_start_time_string_list = f.readlines()\n\n                while True:\n                    candidate_str = time.strftime(\n                        \"%Y-%m-%d_%H-%M-%S\", time.localtime(time.time())\n                    )\n                    if (\n                        len(last_start_time_string_list) == 0\n                        or last_start_time_string_list[0].strip() != candidate_str\n                    ):\n                        break\n                    time.sleep(0.2)\n\n                with open(last_start_time_string_path, \"w\") as f:\n                    f.write(candidate_str)\n\n        except filelock.Timeout as e:\n            get_logger().exception(\n                f\"Could not acquire the lock for {start_time_string_lock_path} for 60 seconds,\"\n                \" this suggests an unexpected deadlock. Please close all AllenAct training processes,\"\n                \" delete this lockfile, and try again.\"\n            )\n            raise e\n\n        assert candidate_str is not None\n        return candidate_str\n\n    def worker_devices(self, mode: str):\n        machine_params: MachineParams = MachineParams.instance_from(\n            self.config.machine_params(mode)\n        )\n        devices = machine_params.devices\n\n        assert all_equal(devices) or all(\n            d.index >= 0 for d in devices\n        ), f\"Cannot have a mix of CPU and GPU devices (`devices == {devices}`)\"\n\n        get_logger().info(f\"Using {len(devices)} {mode} workers on devices {devices}\")\n        return devices\n\n    def local_worker_ids(self, mode: str):\n        machine_params: MachineParams = MachineParams.instance_from(\n            self.config.machine_params(mode, machine_id=self.machine_id)\n        )\n        ids = machine_params.local_worker_ids\n\n        get_logger().info(\n            f\"Using local worker ids {ids} (total {len(ids)} workers in machine {self.machine_id})\"\n        )\n\n        return ids\n\n    def init_visualizer(self, mode: str):\n        if not self.disable_tensorboard:\n            # Note: Avoid instantiating anything in machine_params (use Builder if needed)\n            machine_params = MachineParams.instance_from(\n                self.config.machine_params(mode)\n            )\n            self.visualizer = machine_params.visualizer\n\n    @staticmethod\n    def init_process(mode: str, id: int, to_close_on_termination: OnPolicyRLEngine):\n        ptitle(f\"{mode}-{id}\")\n\n        def create_handler(termination_type: str):\n            def handler(_signo, _frame):\n                prefix = f\"{termination_type} signal sent to worker {mode}-{id}.\"\n                if to_close_on_termination.is_closed:\n                    get_logger().info(\n                        f\"{prefix} Worker {mode}-{id} is already closed, exiting.\"\n                    )\n                    sys.exit(0)\n                elif not to_close_on_termination.is_closing:\n                    get_logger().info(\n                        f\"{prefix} Forcing worker {mode}-{id} to close and exiting.\"\n                    )\n                    # noinspection PyBroadException\n                    try:\n                        to_close_on_termination.close(True)\n                    except Exception:\n                        get_logger().error(\n                            f\"Error occurred when closing the RL engine used by work {mode}-{id}.\"\n                            f\" We cannot recover from this and will simply exit. The exception:\\n\"\n                            f\"{traceback.format_exc()}\"\n                        )\n                        sys.exit(1)\n                    sys.exit(0)\n                else:\n                    get_logger().info(\n                        f\"{prefix} Worker {mode}-{id} is already closing, ignoring this signal.\"\n                    )\n\n            return handler\n\n        signal.signal(signal.SIGTERM, create_handler(\"Termination\"))\n        signal.signal(signal.SIGINT, create_handler(\"Interrupt\"))\n\n    @staticmethod\n    def init_worker(engine_class, args, kwargs):\n        mode = kwargs[\"mode\"]\n        id = kwargs[\"worker_id\"]\n\n        worker = None\n        try:\n            worker = engine_class(*args, **kwargs)\n        except Exception:\n            get_logger().error(f\"Encountered Exception. Terminating {mode} worker {id}\")\n            get_logger().exception(traceback.format_exc())\n            kwargs[\"results_queue\"].put((f\"{mode}_stopped\", 1 + id))\n        finally:\n            return worker\n\n    @lazy_property\n    def _get_callback_sensors(self) -> List[Sensor]:\n        callback_sensors: List[Sensor] = []\n        for c in self.callbacks:\n            sensors = c.callback_sensors()\n            if sensors is not None:\n                callback_sensors.extend(sensors)\n        return callback_sensors\n\n    @staticmethod\n    def train_loop(\n        id: int = 0,\n        checkpoint: Optional[str] = None,\n        restart_pipeline: bool = False,\n        valid_on_initial_weights: bool = False,\n        *engine_args,\n        **engine_kwargs,\n    ):\n        engine_kwargs[\"mode\"] = TRAIN_MODE_STR\n        engine_kwargs[\"worker_id\"] = id\n        engine_kwargs_for_print = {\n            k: (v if k != \"initial_model_state_dict\" else \"[SUPPRESSED]\")\n            for k, v in engine_kwargs.items()\n        }\n        get_logger().info(f\"train {id} args {engine_kwargs_for_print}\")\n\n        trainer: OnPolicyTrainer = OnPolicyRunner.init_worker(\n            engine_class=OnPolicyTrainer, args=engine_args, kwargs=engine_kwargs\n        )\n        if trainer is not None:\n            OnPolicyRunner.init_process(\"Train\", id, to_close_on_termination=trainer)\n            trainer.train(\n                checkpoint_file_name=checkpoint,\n                restart_pipeline=restart_pipeline,\n                valid_on_initial_weights=valid_on_initial_weights,\n            )\n\n    @staticmethod\n    def valid_loop(id: int = 0, *engine_args, **engine_kwargs):\n        engine_kwargs[\"mode\"] = VALID_MODE_STR\n        engine_kwargs[\"worker_id\"] = id\n        get_logger().info(f\"valid {id} args {engine_kwargs}\")\n\n        valid = OnPolicyRunner.init_worker(\n            engine_class=OnPolicyInference, args=engine_args, kwargs=engine_kwargs\n        )\n        if valid is not None:\n            OnPolicyRunner.init_process(\"Valid\", id, to_close_on_termination=valid)\n            valid.process_checkpoints()  # gets checkpoints via queue\n\n    @staticmethod\n    def test_loop(id: int = 0, *engine_args, **engine_kwargs):\n        engine_kwargs[\"mode\"] = TEST_MODE_STR\n        engine_kwargs[\"worker_id\"] = id\n        get_logger().info(f\"test {id} args {engine_kwargs}\")\n\n        test = OnPolicyRunner.init_worker(OnPolicyInference, engine_args, engine_kwargs)\n        if test is not None:\n            OnPolicyRunner.init_process(\"Test\", id, to_close_on_termination=test)\n            test.process_checkpoints()  # gets checkpoints via queue\n\n    def _initialize_start_train_or_start_test(self):\n        self._is_closed = False\n\n        if self.queues is not None:\n            for k, q in self.queues.items():\n                try:\n                    out = q.get(timeout=1)\n                    raise RuntimeError(\n                        f\"{k} queue was not empty before starting new training/testing (contained {out}).\"\n                        f\" This should not happen, please report how you obtained this error\"\n                        f\" by creating an issue at https://github.com/allenai/allenact/issues.\"\n                    )\n                except queue.Empty:\n                    pass\n\n        self.queues = {\n            \"results\": self.mp_ctx.Queue(),\n            \"checkpoints\": self.mp_ctx.Queue(),\n        }\n\n        self._local_start_time_str = self._acquire_unique_local_start_time_string()\n\n    def get_port(self):\n        passed_port = int(self.distributed_ip_and_port.split(\":\")[1])\n        if passed_port == 0:\n            assert (\n                self.machine_id == 0\n            ), \"Only runner with `machine_id` == 0 can search for a free port.\"\n            distributed_port = find_free_port(\n                self.distributed_ip_and_port.split(\":\")[0]\n            )\n        else:\n            distributed_port = passed_port\n\n        get_logger().info(\n            f\"Engines on machine_id == {self.machine_id} using port {distributed_port} and seed {self.seed}\"\n        )\n\n        return distributed_port\n\n    def start_train(\n        self,\n        checkpoint: Optional[str] = None,\n        restart_pipeline: bool = False,\n        max_sampler_processes_per_worker: Optional[int] = None,\n        save_ckpt_after_every_pipeline_stage: bool = True,\n        collect_valid_results: bool = False,\n        valid_on_initial_weights: bool = False,\n        try_restart_after_task_error: bool = False,\n        save_ckpt_at_every_host: bool = False,\n    ):\n        self._initialize_start_train_or_start_test()\n\n        self._collect_valid_results = collect_valid_results\n\n        if not self.disable_config_saving:\n            self.save_project_state()\n\n        devices = self.worker_devices(TRAIN_MODE_STR)\n        num_workers = len(devices)\n\n        # Be extra careful to ensure that all models start\n        # with the same initializations.\n        set_seed(self.seed)\n        initial_model_state_dict = self.config.create_model(\n            sensor_preprocessor_graph=MachineParams.instance_from(\n                self.config.machine_params(self.mode)\n            ).sensor_preprocessor_graph\n        ).state_dict()\n\n        distributed_port = 0 if num_workers == 1 else self.get_port()\n\n        if (\n            num_workers > 1\n            and \"NCCL_ASYNC_ERROR_HANDLING\" not in os.environ\n            and \"NCCL_BLOCKING_WAIT\" not in os.environ\n        ):\n            # This ensures the NCCL distributed backend will throw errors\n            # if we timeout at a call to `barrier()`\n            os.environ[\"NCCL_ASYNC_ERROR_HANDLING\"] = \"1\"\n\n        worker_ids = self.local_worker_ids(TRAIN_MODE_STR)\n\n        if checkpoint is not None:\n            if checkpoint[:8] == \"wandb://\":\n                ckpt_dir = \"/tmp/wandb_ckpts\"\n                os.makedirs(ckpt_dir, exist_ok=True)\n                checkpoint = download_checkpoint_from_wandb(\n                    checkpoint, ckpt_dir, only_allow_one_ckpt=True\n                )\n\n        model_hash = None\n        for trainer_id in worker_ids:\n            training_kwargs = dict(\n                id=trainer_id,\n                checkpoint=checkpoint,\n                restart_pipeline=restart_pipeline,\n                experiment_name=self.experiment_name,\n                config=self.config,\n                callback_sensors=self._get_callback_sensors,\n                results_queue=self.queues[\"results\"],\n                checkpoints_queue=(\n                    self.queues[\"checkpoints\"] if self.running_validation else None\n                ),\n                checkpoints_dir=self.checkpoint_dir(),\n                seed=self.seed,\n                deterministic_cudnn=self.deterministic_cudnn,\n                mp_ctx=self.mp_ctx,\n                num_workers=num_workers,\n                device=devices[trainer_id],\n                distributed_ip=self.distributed_ip_and_port.split(\":\")[0],\n                distributed_port=distributed_port,\n                max_sampler_processes_per_worker=max_sampler_processes_per_worker,\n                save_ckpt_after_every_pipeline_stage=save_ckpt_after_every_pipeline_stage,\n                initial_model_state_dict=(\n                    initial_model_state_dict if model_hash is None else model_hash\n                ),\n                first_local_worker_id=worker_ids[0],\n                distributed_preemption_threshold=self.distributed_preemption_threshold,\n                valid_on_initial_weights=valid_on_initial_weights,\n                try_restart_after_task_error=try_restart_after_task_error,\n                save_ckpt_at_every_host=save_ckpt_at_every_host,\n            )\n            train: BaseProcess = self.mp_ctx.Process(\n                target=self.train_loop,\n                kwargs=training_kwargs,\n            )\n            try:\n                train.start()\n            except (ValueError, OSError, ConnectionRefusedError, EOFError) as e:\n                # If the `initial_model_state_dict` is too large we sometimes\n                # run into errors passing it with multiprocessing. In such cases\n                # we instead hash the state_dict and confirm, in each engine worker, that\n                # this hash equals the model the engine worker instantiates.\n                if (\n                    (isinstance(e, ValueError) and e.args[0] == \"too many fds\")\n                    or (isinstance(e, OSError) and e.errno == 22)\n                    or (isinstance(e, ConnectionRefusedError) and e.errno == 111)\n                    or isinstance(e, EOFError)\n                ):\n                    model_hash = md5_hash_of_state_dict(initial_model_state_dict)\n                    training_kwargs[\"initial_model_state_dict\"] = model_hash\n                    train = self.mp_ctx.Process(\n                        target=self.train_loop,\n                        kwargs=training_kwargs,\n                    )\n                    train.start()\n                else:\n                    raise e\n\n            self.processes[TRAIN_MODE_STR].append(train)\n\n        get_logger().info(\n            f\"Started {len(self.processes[TRAIN_MODE_STR])} train processes\"\n        )\n\n        # Validation\n        if self.running_validation:\n            device = self.worker_devices(VALID_MODE_STR)[0]\n            self.init_visualizer(VALID_MODE_STR)\n            valid: BaseProcess = self.mp_ctx.Process(\n                target=self.valid_loop,\n                args=(0,),\n                kwargs=dict(\n                    config=self.config,\n                    callback_sensors=self._get_callback_sensors,\n                    results_queue=self.queues[\"results\"],\n                    checkpoints_queue=self.queues[\"checkpoints\"],\n                    seed=12345,  # TODO allow same order for randomly sampled tasks? Is this any useful anyway?\n                    deterministic_cudnn=self.deterministic_cudnn,\n                    deterministic_agents=self.deterministic_agents,\n                    mp_ctx=self.mp_ctx,\n                    device=device,\n                    max_sampler_processes_per_worker=max_sampler_processes_per_worker,\n                ),\n            )\n            valid.start()\n            self.processes[VALID_MODE_STR].append(valid)\n\n            get_logger().info(\n                f\"Started {len(self.processes[VALID_MODE_STR])} valid processes\"\n            )\n        else:\n            get_logger().info(\n                \"No processes allocated to validation, no validation will be run.\"\n            )\n\n        metrics_file_template: Optional[str] = None\n\n        if self._collect_valid_results:\n            metrics_dir = self.metric_path(self.local_start_time_str)\n            os.makedirs(metrics_dir, exist_ok=True)\n            suffix = f\"__valid_{self.local_start_time_str}\"\n            metrics_file_template = os.path.join(\n                metrics_dir, \"metrics\" + suffix + \"{:012d}.json\"\n            )  # template for training steps\n\n            get_logger().info(\n                f\"Saving valid metrics with template {metrics_file_template}\"\n            )\n\n            # Check output file can be written\n            with open(metrics_file_template.format(0), \"w\") as f:\n                json.dump([], f, indent=4, sort_keys=True, cls=NumpyJSONEncoder)\n\n        valid_results = self.log_and_close(\n            start_time_str=self.local_start_time_str,\n            nworkers=len(worker_ids),  # TODO num_workers once we forward metrics,\n            metrics_file=metrics_file_template,\n        )\n\n        if not self._collect_valid_results:\n            return self.local_start_time_str\n        else:\n            return self.local_start_time_str, valid_results\n\n    def start_test(\n        self,\n        checkpoint_path_dir_or_pattern: str,\n        infer_output_dir: bool = False,\n        approx_ckpt_step_interval: Optional[Union[float, int]] = None,\n        max_sampler_processes_per_worker: Optional[int] = None,\n        inference_expert: bool = False,\n    ) -> List[Dict]:\n        # Tester always runs on a single machine\n        assert (\n            self.machine_id == 0\n        ), f\"Received `machine_id={self.machine_id} for test. Only one machine supported.\"\n        assert isinstance(\n            checkpoint_path_dir_or_pattern, str\n        ), \"Must provide a --checkpoint path or pattern to test on.\"\n\n        self.extra_tag += (\n            \"__\" * (len(self.extra_tag) > 0) + \"enforced_test_expert\"\n        ) * inference_expert\n        self._initialize_start_train_or_start_test()\n\n        devices = self.worker_devices(TEST_MODE_STR)\n        self.init_visualizer(TEST_MODE_STR)\n        num_testers = len(devices)\n\n        distributed_port = 0\n        if num_testers > 1:\n            distributed_port = find_free_port()\n\n        # Tester always runs on a single machine\n        for tester_it in range(num_testers):\n            test: BaseProcess = self.mp_ctx.Process(\n                target=self.test_loop,\n                args=(tester_it,),\n                kwargs=dict(\n                    config=self.config,\n                    callback_sensors=self._get_callback_sensors,\n                    results_queue=self.queues[\"results\"],\n                    checkpoints_queue=self.queues[\"checkpoints\"],\n                    seed=12345,  # TODO allow same order for randomly sampled tasks? Is this any useful anyway?\n                    deterministic_cudnn=self.deterministic_cudnn,\n                    deterministic_agents=self.deterministic_agents,\n                    mp_ctx=self.mp_ctx,\n                    num_workers=num_testers,\n                    device=devices[tester_it],\n                    max_sampler_processes_per_worker=max_sampler_processes_per_worker,\n                    distributed_port=distributed_port,\n                    enforce_expert=inference_expert,\n                ),\n            )\n\n            test.start()\n            self.processes[TEST_MODE_STR].append(test)\n\n        get_logger().info(\n            f\"Started {len(self.processes[TEST_MODE_STR])} test processes\"\n        )\n\n        checkpoint_paths = self.get_checkpoint_files(\n            checkpoint_path_dir_or_pattern=checkpoint_path_dir_or_pattern,\n            approx_ckpt_step_interval=approx_ckpt_step_interval,\n        )\n        steps = [self.step_from_checkpoint(cp) for cp in checkpoint_paths]\n\n        get_logger().info(f\"Running test on {len(steps)} steps {steps}\")\n\n        for checkpoint_path in checkpoint_paths:\n            # Make all testers work on each checkpoint\n            for tester_it in range(num_testers):\n                self.queues[\"checkpoints\"].put((\"eval\", checkpoint_path))\n\n        # Signal all testers to terminate cleanly\n        for _ in range(num_testers):\n            self.queues[\"checkpoints\"].put((\"quit\", None))\n\n        if self.save_dir_fmt == SaveDirFormat.NESTED:\n            if infer_output_dir:  # NOTE: we change output_dir here\n                self.output_dir = self.checkpoint_log_folder_str(checkpoint_paths[0])\n            suffix = \"\"\n        elif self.save_dir_fmt == SaveDirFormat.FLAT:\n            suffix = f\"__test_{self.local_start_time_str}\"\n        else:\n            raise NotImplementedError\n        metrics_dir = self.metric_path(self.local_start_time_str)\n        os.makedirs(metrics_dir, exist_ok=True)\n        metrics_file_path = os.path.join(metrics_dir, \"metrics\" + suffix + \".json\")\n\n        get_logger().info(f\"Saving test metrics in {metrics_file_path}\")\n\n        # Check output file can be written\n        with open(metrics_file_path, \"w\") as f:\n            json.dump([], f, indent=4, sort_keys=True, cls=NumpyJSONEncoder)\n\n        return self.log_and_close(\n            start_time_str=self.checkpoint_start_time_str(checkpoint_paths[0]),\n            nworkers=num_testers,\n            test_steps=steps,\n            metrics_file=metrics_file_path,\n        )\n\n    @staticmethod\n    def checkpoint_start_time_str(checkpoint_file_name):\n        parts = checkpoint_file_name.split(os.path.sep)\n        assert len(parts) > 1, f\"{checkpoint_file_name} is not a valid checkpoint path\"\n        start_time_str = parts[-2]\n        get_logger().info(f\"Using checkpoint start time {start_time_str}\")\n        return start_time_str\n\n    @staticmethod\n    def checkpoint_log_folder_str(checkpoint_file_name):\n        parts = checkpoint_file_name.split(os.path.sep)\n        assert len(parts) > 1, f\"{checkpoint_file_name} is not a valid checkpoint path\"\n        log_folder_str = os.path.sep.join(parts[:-2])  # remove checkpoints/*.pt\n        get_logger().info(f\"Using log folder {log_folder_str}\")\n        return log_folder_str\n\n    @property\n    def experiment_name(self):\n        if len(self.extra_tag) > 0:\n            return f\"{self.config.tag()}_{self.extra_tag}\"\n        return self.config.tag()\n\n    def checkpoint_dir(\n        self, start_time_str: Optional[str] = None, create_if_none: bool = True\n    ):\n        path_parts = [\n            (\n                self.config.tag()\n                if self.extra_tag == \"\"\n                else os.path.join(self.config.tag(), self.extra_tag)\n            ),\n            start_time_str or self.local_start_time_str,\n        ]\n        if self.save_dir_fmt == SaveDirFormat.NESTED:\n            folder = os.path.join(\n                self.output_dir,\n                *path_parts,\n                \"checkpoints\",\n            )\n        elif self.save_dir_fmt == SaveDirFormat.FLAT:\n            folder = os.path.join(\n                self.output_dir,\n                \"checkpoints\",\n                *path_parts,\n            )\n        else:\n            raise NotImplementedError\n        if create_if_none:\n            os.makedirs(folder, exist_ok=True)\n        return folder\n\n    def log_writer_path(self, start_time_str: str) -> str:\n        if self.save_dir_fmt == SaveDirFormat.NESTED:\n            if self.mode == TEST_MODE_STR:\n                return os.path.join(\n                    self.output_dir,\n                    \"test\",\n                    self.config.tag(),\n                    self.local_start_time_str,\n                )\n            path = os.path.join(\n                self.output_dir,\n                (\n                    self.config.tag()\n                    if self.extra_tag == \"\"\n                    else os.path.join(self.config.tag(), self.extra_tag)\n                ),\n                start_time_str,\n                \"train_tb\",\n            )\n            return path\n        elif self.save_dir_fmt == SaveDirFormat.FLAT:\n            path = os.path.join(\n                self.output_dir,\n                \"tb\",\n                (\n                    self.config.tag()\n                    if self.extra_tag == \"\"\n                    else os.path.join(self.config.tag(), self.extra_tag)\n                ),\n                start_time_str,\n            )\n            if self.mode == TEST_MODE_STR:\n                path = os.path.join(path, \"test\", self.local_start_time_str)\n            return path\n        else:\n            raise NotImplementedError\n\n    def metric_path(self, start_time_str: str) -> str:\n        if self.save_dir_fmt == SaveDirFormat.NESTED:\n            return os.path.join(\n                self.output_dir,\n                \"test\",\n                self.config.tag(),\n                start_time_str,\n            )\n        elif self.save_dir_fmt == SaveDirFormat.FLAT:\n            return os.path.join(\n                self.output_dir,\n                \"metrics\",\n                (\n                    self.config.tag()\n                    if self.extra_tag == \"\"\n                    else os.path.join(self.config.tag(), self.extra_tag)\n                ),\n                start_time_str,\n            )\n        else:\n            raise NotImplementedError\n\n    def save_project_state(self):\n        path_parts = [\n            (\n                self.config.tag()\n                if self.extra_tag == \"\"\n                else os.path.join(self.config.tag(), self.extra_tag)\n            ),\n            self.local_start_time_str,\n        ]\n        if self.save_dir_fmt == SaveDirFormat.NESTED:\n            base_dir = os.path.join(\n                self.output_dir,\n                *path_parts,\n                \"used_configs\",\n            )\n        elif self.save_dir_fmt == SaveDirFormat.FLAT:\n            base_dir = os.path.join(\n                self.output_dir,\n                \"used_configs\",\n                *path_parts,\n            )\n        else:\n            raise NotImplementedError\n        os.makedirs(base_dir, exist_ok=True)\n\n        # Saving current git diff\n        try:\n            sha, diff_str = get_git_diff_of_project()\n            with open(os.path.join(base_dir, f\"{sha}.patch\"), \"w\") as f:\n                f.write(diff_str)\n\n            get_logger().info(f\"Git diff saved to {base_dir}\")\n        except subprocess.CalledProcessError:\n            get_logger().warning(\n                \"Failed to get a git diff of the current project.\"\n                f\" Is it possible that {os.getcwd()} is not under version control?\"\n            )\n\n        # Saving configs\n        if self.loaded_config_src_files is not None:\n            for src_path in self.loaded_config_src_files:\n                if src_path == CONFIG_KWARGS_STR:\n                    # We also save key-word arguments passed to the experiment\n                    # initializer.\n                    save_path = os.path.join(base_dir, \"config_kwargs.json\")\n                    assert not os.path.exists(\n                        save_path\n                    ), f\"{save_path} should not already exist.\"\n                    with open(save_path, \"w\") as f:\n                        json.dump(json.loads(self.loaded_config_src_files[src_path]), f)\n                    continue\n\n                assert os.path.isfile(src_path), f\"Config file {src_path} not found\"\n                src_path = os.path.abspath(src_path)\n\n                # To prevent overwriting files with the same name, we loop\n                # here until we find a prefix (if necessary) to prevent\n                # name collisions.\n                k = -1\n                while True:\n                    prefix = \"\" if k == -1 else f\"namecollision{k}__\"\n                    k += 1\n                    dst_path = os.path.join(\n                        base_dir,\n                        f\"{prefix}{os.path.basename(src_path)}\",\n                    )\n                    if not os.path.exists(dst_path):\n                        os.makedirs(os.path.dirname(dst_path), exist_ok=True)\n                        with open(src_path, \"r\") as f:\n                            file_contents = f.read()\n                        with open(dst_path, \"w\") as f:\n                            f.write(\n                                f\"### THIS FILE ORIGINALLY LOCATED AT '{src_path}'\\n\\n{file_contents}\"\n                            )\n                        break\n\n        get_logger().info(f\"Config files saved to {base_dir}\")\n        for callback in self.callbacks:\n            callback.after_save_project_state(base_dir=base_dir)\n\n    def _update_keys(\n        self,\n        d: Union[Dict[str, Any], str],\n        tag_if_not_a_loss: str,\n        mode: str,\n        stage_component_uuid: Optional[str] = None,\n    ) -> Union[Dict[str, Any], str]:\n        midfix = \"-\" if stage_component_uuid is None else f\"-{stage_component_uuid}-\"\n\n        def _convert(key: str):\n            if key.startswith(\"losses/\"):\n                return f\"{mode}{midfix}{key}\"\n            else:\n                return f\"{mode}{midfix}{tag_if_not_a_loss}/{key}\"\n\n        if isinstance(d, str):\n            return _convert(d)\n        return {_convert(k): v for k, v in d.items()}\n\n    def _process_logging_packages(\n        self,\n        log_writer: Optional[SummaryWriter],\n        pkgs: Union[LoggingPackage, List[LoggingPackage]],\n        last_steps: Optional[int],\n        last_storage_uuid_to_total_experiences: Optional[Dict[str, int]],\n        last_time: Optional[float],\n        all_results: Optional[List[Any]] = None,\n    ):\n        mode = pkgs[0].mode\n        assert all(\n            pkg.mode == mode for pkg in pkgs\n        ), \"All logging packages must be the same mode.\"\n        assert mode == self.mode or (\n            mode == VALID_MODE_STR and self.mode == TRAIN_MODE_STR\n        ), (\n            \"Logging package mode must match the logger mode except when training where the logging package may\"\n            \"be of mode 'valid'.\"\n        )\n        training = mode == TRAIN_MODE_STR  # Are we logging training packages\n\n        current_time = time.time()\n\n        training_steps = pkgs[0].training_steps\n        storage_uuid_to_total_experiences = pkgs[0].storage_uuid_to_total_experiences\n        callback_metric_means = dict()\n\n        def update_keys_misc(\n            key_or_dict: Union[str, Dict[str, Any]],\n            stage_component_uuid: Optional[str] = None,\n        ):\n            # Important to use mode and not self.mode here\n            return self._update_keys(\n                d=key_or_dict,\n                tag_if_not_a_loss=\"misc\",\n                mode=mode,\n                stage_component_uuid=stage_component_uuid,\n            )\n\n        def update_keys_metric(\n            key_or_dict: Union[str, Dict[str, Any]],\n            stage_component_uuid: Optional[str] = None,\n        ):\n            # Important to use mode and not self.mode here\n            return self._update_keys(\n                d=key_or_dict,\n                tag_if_not_a_loss=\"metrics\",\n                mode=mode,\n                stage_component_uuid=stage_component_uuid,\n            )\n\n        if training and log_writer is not None:\n            log_writer.add_scalar(\n                tag=update_keys_misc(\"pipeline_stage\"),\n                scalar_value=pkgs[0].pipeline_stage,\n                global_step=training_steps,\n            )\n        callback_metric_means[update_keys_misc(\"pipeline_stage\")] = pkgs[\n            0\n        ].pipeline_stage\n\n        storage_uuid_to_total_experiences_key = {}\n        for storage_uuid, val in storage_uuid_to_total_experiences.items():\n            total_experiences_key = update_keys_misc(\n                f\"{storage_uuid}_total_experiences\"\n            )\n            storage_uuid_to_total_experiences_key[storage_uuid] = total_experiences_key\n\n            if training and log_writer is not None:\n                log_writer.add_scalar(\n                    tag=total_experiences_key,\n                    scalar_value=val,\n                    global_step=training_steps,\n                )\n            callback_metric_means[total_experiences_key] = val\n\n        metrics_and_info_tracker = ScalarMeanTracker()\n        scalar_name_to_total_storage_experience = {}\n        scalar_name_to_total_experiences_key = {}\n        storage_uuid_to_stage_component_uuids = defaultdict(lambda: set())\n        metric_dicts_list, render, checkpoint_file_name = [], {}, []\n        tasks_callback_data = []\n\n        for pkg in pkgs:\n            metrics_and_info_tracker.add_scalars(\n                scalars=update_keys_metric(pkg.metrics_tracker.means()),\n                n=update_keys_metric(pkg.metrics_tracker.counts()),\n            )\n            tasks_callback_data.extend(pkg.task_callback_data)\n            metric_dicts_list.extend(pkg.metric_dicts)\n            if pkg.viz_data is not None:\n                render.update(pkg.viz_data)\n            checkpoint_file_name.append(pkg.checkpoint_file_name)\n\n            for (\n                (stage_component_uuid, storage_uuid),\n                info_tracker,\n            ) in pkg.info_trackers.items():\n\n                if stage_component_uuid is not None:\n                    storage_uuid_to_stage_component_uuids[storage_uuid].add(\n                        stage_component_uuid\n                    )\n\n                info_means = update_keys_misc(\n                    info_tracker.means(),\n                    stage_component_uuid,\n                )\n                info_counts = update_keys_misc(\n                    info_tracker.counts(),\n                    stage_component_uuid,\n                )\n                metrics_and_info_tracker.add_scalars(\n                    scalars=info_means,\n                    n=info_counts,\n                )\n\n                total_exp_for_storage = pkg.storage_uuid_to_total_experiences[\n                    storage_uuid\n                ]\n\n                if stage_component_uuid is None:\n                    assert total_exp_for_storage == training_steps\n\n                for scalar_name in info_means:\n                    if scalar_name in scalar_name_to_total_storage_experience:\n                        assert (\n                            total_exp_for_storage\n                            == scalar_name_to_total_storage_experience[scalar_name]\n                        ), (\n                            f\"For metric {scalar_name}: there is disagreement between the training steps parameter\"\n                            f\" across different workers ({total_exp_for_storage} !=\"\n                            f\" {scalar_name_to_total_storage_experience[scalar_name]}). This suggests an error in \"\n                            f\" AllenAct, please report this issue at https://github.com/allenai/allenact/issues.\"\n                        )\n                    else:\n                        scalar_name_to_total_storage_experience[scalar_name] = (\n                            total_exp_for_storage\n                        )\n                        scalar_name_to_total_experiences_key[scalar_name] = (\n                            storage_uuid_to_total_experiences_key[storage_uuid]\n                        )\n\n        if any(checkpoint_file_name):\n            ckpt_to_store = None\n            for ckpt in checkpoint_file_name:\n                if ckpt is not None:\n                    ckpt_to_store = ckpt\n            assert ckpt_to_store is not None\n            checkpoint_file_name = [ckpt_to_store]\n        # assert all_equal(\n        #     checkpoint_file_name\n        # ), f\"All {mode} logging packages must have the same checkpoint_file_name.\"\n\n        message = [\n            f\"{mode.upper()}: {training_steps} rollout steps ({pkgs[0].storage_uuid_to_total_experiences})\"\n        ]\n        metrics_and_info_means = metrics_and_info_tracker.means()\n        callback_metric_means.update(metrics_and_info_means)\n\n        for k in sorted(\n            metrics_and_info_means.keys(),\n            key=lambda mean_key: (mean_key.count(\"/\"), mean_key),\n        ):\n            if log_writer is not None:\n                log_writer.add_scalar(\n                    tag=k,\n                    scalar_value=metrics_and_info_means[k],\n                    global_step=scalar_name_to_total_storage_experience.get(\n                        k, training_steps\n                    ),\n                )\n            short_key = (\n                \"/\".join(k.split(\"/\")[1:])\n                if k.startswith(f\"{mode}-\") and \"/\" in k\n                else k\n            )\n            message.append(f\"{short_key} {metrics_and_info_means[k]:.3g}\")\n\n        if training:\n            # Log information about FPS and EPS (experiences per second, for non-rollout storage).\n            # Not needed during testing or validation.\n            message += [f\"elapsed_time {(current_time - last_time):.3g}s\"]\n\n            if last_steps > 0:\n                fps = (training_steps - last_steps) / (current_time - last_time)\n                message += [f\"approx_fps {fps:.3g}\"]\n                approx_fps_key = update_keys_misc(\"approx_fps\")\n                if log_writer is not None:\n                    log_writer.add_scalar(approx_fps_key, fps, training_steps)\n                callback_metric_means[approx_fps_key] = fps\n\n            for (\n                storage_uuid,\n                last_total_exp,\n            ) in last_storage_uuid_to_total_experiences.items():\n                if storage_uuid in storage_uuid_to_total_experiences:\n                    cur_total_exp = storage_uuid_to_total_experiences[storage_uuid]\n                    eps = (cur_total_exp - last_total_exp) / (current_time - last_time)\n                    message += [f\"{storage_uuid}/approx_eps {eps:.3g}\"]\n                    for stage_component_uuid in storage_uuid_to_stage_component_uuids[\n                        storage_uuid\n                    ]:\n                        approx_eps_key = update_keys_misc(\n                            f\"approx_eps\",\n                            stage_component_uuid,\n                        )\n                        callback_metric_means[approx_eps_key] = eps\n                        scalar_name_to_total_experiences_key[approx_eps_key] = (\n                            storage_uuid_to_total_experiences_key[storage_uuid]\n                        )\n\n                        if log_writer is not None:\n                            log_writer.add_scalar(\n                                approx_eps_key,\n                                eps,\n                                cur_total_exp,\n                            )\n\n        metrics_and_info_means_with_metrics_dicts_list = copy.deepcopy(\n            metrics_and_info_means\n        )\n        metrics_and_info_means_with_metrics_dicts_list.update(\n            {\"training_steps\": training_steps, \"tasks\": metric_dicts_list}\n        )\n        if all_results is not None:\n            all_results.append(metrics_and_info_means_with_metrics_dicts_list)\n\n        num_tasks = sum([pkg.num_non_empty_metrics_dicts_added for pkg in pkgs])\n        num_tasks_completed_key = update_keys_misc(\"num_tasks_completed_since_last_log\")\n        if log_writer is not None:\n            log_writer.add_scalar(num_tasks_completed_key, num_tasks, training_steps)\n        callback_metric_means[num_tasks_completed_key] = num_tasks\n\n        message.append(f\"new_tasks_completed {num_tasks}\")\n        if not training:\n            message.append(f\"checkpoint {checkpoint_file_name[0]}\")\n\n        get_logger().info(\" \".join(message))\n\n        for callback in self.callbacks:\n            if mode == TRAIN_MODE_STR:\n                callback.on_train_log(\n                    metrics=metric_dicts_list,\n                    metric_means=callback_metric_means,\n                    step=training_steps,\n                    checkpoint_file_name=checkpoint_file_name[0],\n                    tasks_data=tasks_callback_data,\n                    scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,\n                )\n\n            if mode == VALID_MODE_STR:\n                callback.on_valid_log(\n                    metrics=metrics_and_info_means_with_metrics_dicts_list,\n                    metric_means=callback_metric_means,\n                    step=training_steps,\n                    checkpoint_file_name=checkpoint_file_name[0],\n                    tasks_data=tasks_callback_data,\n                    scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,\n                )\n\n            if mode == TEST_MODE_STR:\n                callback.on_test_log(\n                    metrics=metrics_and_info_means_with_metrics_dicts_list,\n                    metric_means=callback_metric_means,\n                    step=training_steps,\n                    checkpoint_file_name=checkpoint_file_name[0],\n                    tasks_data=tasks_callback_data,\n                    scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,\n                )\n\n        if self.visualizer is not None:\n            self.visualizer.log(\n                log_writer=log_writer,\n                task_outputs=metric_dicts_list,\n                render=render,\n                num_steps=training_steps,\n            )\n\n        return training_steps, storage_uuid_to_total_experiences, current_time\n\n    def process_valid_package(\n        self,\n        log_writer: Optional[SummaryWriter],\n        pkg: LoggingPackage,\n        all_results: Optional[List[Any]] = None,\n    ):\n        return self._process_logging_packages(\n            log_writer=log_writer,\n            pkgs=[pkg],\n            last_steps=None,\n            last_storage_uuid_to_total_experiences=None,\n            last_time=None,\n            all_results=all_results,\n        )\n\n    def process_train_packages(\n        self,\n        log_writer: Optional[SummaryWriter],\n        pkgs: List[LoggingPackage],\n        last_steps: int,\n        last_storage_uuid_to_total_experiences: Dict[str, int],\n        last_time: float,\n    ):\n        return self._process_logging_packages(\n            log_writer=log_writer,\n            pkgs=pkgs,\n            last_steps=last_steps,\n            last_storage_uuid_to_total_experiences=last_storage_uuid_to_total_experiences,\n            last_time=last_time,\n        )\n\n    def process_test_packages(\n        self,\n        log_writer: Optional[SummaryWriter],\n        pkgs: List[LoggingPackage],\n        all_results: Optional[List[Any]] = None,\n    ):\n        return self._process_logging_packages(\n            log_writer=log_writer,\n            pkgs=pkgs,\n            last_steps=None,\n            last_storage_uuid_to_total_experiences=None,\n            last_time=None,\n            all_results=all_results,\n        )\n\n    def log_and_close(\n        self,\n        start_time_str: str,\n        nworkers: int,\n        test_steps: Sequence[int] = (),\n        metrics_file: Optional[str] = None,\n    ) -> List[Dict]:\n        ptitle(f\"AllenAct-Logging-{self.local_start_time_str}\")\n        finalized = False\n\n        log_writer: Optional[SummaryWriter] = None\n        if not self.disable_tensorboard:\n            log_writer = SummaryWriter(\n                log_dir=self.log_writer_path(start_time_str),\n                filename_suffix=f\"__{self.mode}_{self.local_start_time_str}\",\n            )\n\n        # To aggregate/buffer metrics from trainers/testers\n        collected: List[LoggingPackage] = []\n        last_train_steps = 0\n        last_storage_uuid_to_total_experiences = {}\n        last_train_time = time.time()\n        # test_steps = sorted(test_steps, reverse=True)\n        eval_results: List[Dict] = []\n        unfinished_workers = nworkers\n\n        try:\n            while True:\n                try:\n                    package: Union[\n                        LoggingPackage, Union[Tuple[str, Any], Tuple[str, Any, Any]]\n                    ] = self.queues[\"results\"].get(timeout=1)\n\n                    if isinstance(package, LoggingPackage):\n                        pkg_mode = package.mode\n\n                        if pkg_mode == TRAIN_MODE_STR:\n                            collected.append(package)\n                            if len(collected) >= nworkers:\n\n                                collected = sorted(\n                                    collected,\n                                    key=lambda pkg: (\n                                        pkg.training_steps,\n                                        *sorted(\n                                            pkg.storage_uuid_to_total_experiences.items()\n                                        ),\n                                    ),\n                                )\n\n                                if (\n                                    collected[nworkers - 1].training_steps\n                                    == collected[0].training_steps\n                                    and collected[\n                                        nworkers - 1\n                                    ].storage_uuid_to_total_experiences\n                                    == collected[0].storage_uuid_to_total_experiences\n                                ):  # ensure all workers have provided the same training_steps and total_experiences\n                                    (\n                                        last_train_steps,\n                                        last_storage_uuid_to_total_experiences,\n                                        last_train_time,\n                                    ) = self.process_train_packages(\n                                        log_writer=log_writer,\n                                        pkgs=collected[:nworkers],\n                                        last_steps=last_train_steps,\n                                        last_storage_uuid_to_total_experiences=last_storage_uuid_to_total_experiences,\n                                        last_time=last_train_time,\n                                    )\n                                    collected = collected[nworkers:]\n                                elif len(collected) > 2 * nworkers:\n                                    get_logger().warning(\n                                        f\"Unable to aggregate train packages from all {nworkers} workers\"\n                                        f\"after {len(collected)} packages collected\"\n                                    )\n                        elif (\n                            pkg_mode == VALID_MODE_STR\n                        ):  # they all come from a single worker\n                            if (\n                                package.training_steps is not None\n                            ):  # no validation samplers\n                                self.process_valid_package(\n                                    log_writer=log_writer,\n                                    pkg=package,\n                                    all_results=(\n                                        eval_results\n                                        if self._collect_valid_results\n                                        else None\n                                    ),\n                                )\n\n                                if metrics_file is not None:\n                                    with open(\n                                        metrics_file.format(package.training_steps), \"w\"\n                                    ) as f:\n                                        json.dump(\n                                            eval_results[-1],\n                                            f,\n                                            indent=4,\n                                            sort_keys=True,\n                                            cls=NumpyJSONEncoder,\n                                        )\n                                        get_logger().info(\n                                            \"Written valid results file {}\".format(\n                                                metrics_file.format(\n                                                    package.training_steps\n                                                ),\n                                            )\n                                        )\n\n                            if (\n                                finalized and self.queues[\"checkpoints\"].empty()\n                            ):  # assume queue is actually empty after trainer finished and no checkpoints in queue\n                                break\n                        elif pkg_mode == TEST_MODE_STR:\n                            collected.append(package)\n                            if len(collected) >= nworkers:\n                                collected = sorted(\n                                    collected, key=lambda x: x.training_steps\n                                )  # sort by num_steps\n                                if (\n                                    collected[nworkers - 1].training_steps\n                                    == collected[0].training_steps\n                                ):  # ensure nworkers have provided the same num_steps\n                                    self.process_test_packages(\n                                        log_writer=log_writer,\n                                        pkgs=collected[:nworkers],\n                                        all_results=eval_results,\n                                    )\n\n                                    collected = collected[nworkers:]\n                                    with open(metrics_file, \"w\") as f:\n                                        json.dump(\n                                            eval_results,\n                                            f,\n                                            indent=4,\n                                            sort_keys=True,\n                                            cls=NumpyJSONEncoder,\n                                        )\n                                        get_logger().info(\n                                            f\"Updated {metrics_file} up to checkpoint\"\n                                            f\" {test_steps[len(eval_results) - 1]}\"\n                                        )\n                        else:\n                            get_logger().error(\n                                f\"Runner received unknown package of type {pkg_mode}\"\n                            )\n                    else:\n                        pkg_mode = package[0]\n\n                        if pkg_mode == \"train_stopped\":\n                            if package[1] == 0:\n                                finalized = True\n                                if not self.running_validation:\n                                    get_logger().info(\n                                        \"Terminating runner after trainer done (no validation)\"\n                                    )\n                                    break\n                            else:\n                                raise Exception(\n                                    f\"Train worker {package[1] - 1} abnormally terminated\"\n                                )\n                        elif pkg_mode == \"valid_stopped\":\n                            raise Exception(\n                                f\"Valid worker {package[1] - 1} abnormally terminated\"\n                            )\n                        elif pkg_mode == \"test_stopped\":\n                            if package[1] == 0:\n                                unfinished_workers -= 1\n                                if unfinished_workers == 0:\n                                    get_logger().info(\n                                        \"Last tester finished. Terminating\"\n                                    )\n                                    finalized = True\n                                    break\n                            else:\n                                raise RuntimeError(\n                                    f\"Test worker {package[1] - 1} abnormally terminated\"\n                                )\n                        else:\n                            get_logger().error(\n                                f\"Runner received invalid package tuple {package}\"\n                            )\n                except queue.Empty as _:\n                    if all(\n                        p.exitcode is not None\n                        for p in itertools.chain(*self.processes.values())\n                    ):\n                        break\n        except KeyboardInterrupt:\n            get_logger().info(\"KeyboardInterrupt. Terminating runner.\")\n        except Exception:\n            get_logger().error(\"Encountered Exception. Terminating runner.\")\n            get_logger().exception(traceback.format_exc())\n        finally:\n            if finalized:\n                get_logger().info(\"Done\")\n            if log_writer is not None:\n                log_writer.close()\n            self.close()\n            return eval_results\n\n    def get_checkpoint_files(\n        self,\n        checkpoint_path_dir_or_pattern: str,\n        approx_ckpt_step_interval: Optional[int] = None,\n    ):\n        if \"wandb://\" == checkpoint_path_dir_or_pattern[:8]:\n            eval_dir = \"/tmp/wandb_ckpts_to_eval/{}\".format(self.local_start_time_str)\n            os.makedirs(eval_dir, exist_ok=True)\n            return download_checkpoint_from_wandb(\n                checkpoint_path_dir_or_pattern, eval_dir, only_allow_one_ckpt=False\n            )\n\n        if os.path.isdir(checkpoint_path_dir_or_pattern):\n            # The fragment is a path to a directory, lets use this directory\n            # as the base dir to search for checkpoints\n            checkpoint_path_dir_or_pattern = os.path.join(\n                checkpoint_path_dir_or_pattern, \"*.pt\"\n            )\n\n        ckpt_paths = glob.glob(checkpoint_path_dir_or_pattern, recursive=True)\n\n        if len(ckpt_paths) == 0:\n            raise FileNotFoundError(\n                f\"Could not find any checkpoints at {os.path.abspath(checkpoint_path_dir_or_pattern)}, is it possible\"\n                f\" the path has been mispecified?\"\n            )\n\n        step_count_ckpt_pairs = [(self.step_from_checkpoint(p), p) for p in ckpt_paths]\n        step_count_ckpt_pairs.sort()\n        ckpts_paths = [p for _, p in step_count_ckpt_pairs]\n        step_counts = np.array([sc for sc, _ in step_count_ckpt_pairs])\n\n        if approx_ckpt_step_interval is not None:\n            assert (\n                approx_ckpt_step_interval > 0\n            ), \"`approx_ckpt_step_interval` must be >0\"\n            inds_to_eval = set()\n            for i in range(\n                math.ceil(step_count_ckpt_pairs[-1][0] / approx_ckpt_step_interval) + 1\n            ):\n                inds_to_eval.add(\n                    int(np.argmin(np.abs(step_counts - i * approx_ckpt_step_interval)))\n                )\n\n            ckpts_paths = [ckpts_paths[ind] for ind in sorted(list(inds_to_eval))]\n        return ckpts_paths\n\n    @staticmethod\n    def step_from_checkpoint(ckpt_path: str) -> int:\n        parts = os.path.basename(ckpt_path).split(\"__\")\n        for part in parts:\n            if \"steps_\" in part:\n                possible_num = part.split(\"_\")[-1].split(\".\")[0]\n                if possible_num.isdigit():\n                    return int(possible_num)\n\n        get_logger().warning(\n            f\"The checkpoint {os.path.basename(ckpt_path)} does not follow the checkpoint naming convention\"\n            f\" used by AllenAct. As a fall back we must load the checkpoint into memory to find the\"\n            f\" training step count, this may increase startup time if the checkpoints are large or many\"\n            f\" must be loaded in sequence.\"\n        )\n        ckpt = torch.load(ckpt_path, map_location=\"cpu\")\n        return ckpt[\"total_steps\"]\n\n    def close(self, verbose=True):\n        if self._is_closed:\n            return\n\n        def logif(s: Union[str, Exception]):\n            if verbose:\n                if isinstance(s, str):\n                    get_logger().info(s)\n                elif isinstance(s, Exception):\n                    get_logger().exception(traceback.format_exc())\n                else:\n                    raise NotImplementedError()\n\n        # First send termination signals\n        for process_type in self.processes:\n            for it, process in enumerate(self.processes[process_type]):\n                if process.is_alive():\n                    logif(f\"Terminating {process_type} {it}\")\n                    process.terminate()\n\n        # Now join processes\n        for process_type in self.processes:\n            for it, process in enumerate(self.processes[process_type]):\n                try:\n                    logif(f\"Joining {process_type} {it}\")\n                    process.join(1)\n                    logif(f\"Closed {process_type} {it}\")\n                except Exception as e:\n                    logif(f\"Exception raised when closing {process_type} {it}\")\n                    logif(e)\n\n        self.processes.clear()\n        self._is_closed = True\n\n    def __del__(self):\n        self.close(verbose=True)\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        self.close(verbose=True)\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/storage.py",
    "content": "# Original work Copyright (c) Facebook, Inc. and its affiliates.\n# Modified work Copyright (c) Allen Institute for AI\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\nimport abc\nimport random\nfrom typing import (\n    Union,\n    List,\n    Dict,\n    Tuple,\n    Sequence,\n    cast,\n    Optional,\n    Callable,\n    Any,\n    Generator,\n)\n\nimport gym\nimport numpy as np\nimport torch\n\nimport allenact.utils.spaces_utils as su\nfrom allenact.algorithms.onpolicy_sync.policy import (\n    FullMemorySpecType,\n    ObservationType,\n    ActionType,\n)\nfrom allenact.base_abstractions.misc import Memory\nfrom allenact.utils.system import get_logger\n\n\nclass ExperienceStorage(abc.ABC):\n    @abc.abstractmethod\n    def initialize(self, *, observations: ObservationType, **kwargs):\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def add(\n        self,\n        observations: ObservationType,\n        memory: Optional[Memory],\n        actions: torch.Tensor,\n        action_log_probs: torch.Tensor,\n        value_preds: torch.Tensor,\n        rewards: torch.Tensor,\n        masks: torch.Tensor,\n    ):\n        \"\"\"\n        # Parameters\n        observations : Observations after taking `actions`\n        memory: Memory after having observed the last set of observations.\n        actions: Actions taken to reach the current state, i.e. taking these actions has led to a new state with\n            new `observations`.\n        action_log_probs : Log probs of `actions`\n        value_preds : Value predictions corresponding to the last observations\n            (i.e. the states before taking `actions`).\n        rewards : Rewards from taking `actions` in the last set of states.\n        masks : Masks corresponding to the current states, having 0 entries where `observations` correspond to\n            observations from the beginning of a new episode.\n        \"\"\"\n        raise NotImplementedError\n\n    def before_updates(self, **kwargs):\n        pass\n\n    def after_updates(self, **kwargs) -> int:\n        pass\n\n    @abc.abstractmethod\n    def to(self, device: torch.device):\n        pass\n\n    @abc.abstractmethod\n    def set_partition(self, index: int, num_parts: int):\n        raise NotImplementedError\n\n    @property\n    @abc.abstractmethod\n    def total_experiences(self) -> int:\n        raise NotImplementedError\n\n\nclass RolloutStorage(ExperienceStorage, abc.ABC):\n    # noinspection PyMethodOverriding\n    @abc.abstractmethod\n    def initialize(\n        self,\n        *,\n        observations: ObservationType,\n        num_samplers: int,\n        recurrent_memory_specification: FullMemorySpecType,\n        action_space: gym.Space,\n        **kwargs,\n    ):\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def agent_input_for_next_step(self) -> Dict[str, Any]:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def sampler_select(self, keep_list: Sequence[int]):\n        raise NotImplementedError\n\n\nclass StreamingStorageMixin(abc.ABC):\n    @abc.abstractmethod\n    def next_batch(self) -> Dict[str, Any]:\n        raise NotImplementedError\n\n    def reset_stream(self):\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def empty(self) -> bool:\n        raise NotImplementedError\n\n\nclass MiniBatchStorageMixin(abc.ABC):\n    @abc.abstractmethod\n    def batched_experience_generator(\n        self,\n        num_mini_batch: int,\n    ) -> Generator[Dict[str, Any], None, None]:\n        raise NotImplementedError\n\n\nclass RolloutBlockStorage(RolloutStorage, MiniBatchStorageMixin):\n    \"\"\"Class for storing rollout information for RL trainers.\"\"\"\n\n    FLATTEN_SEPARATOR: str = \"._AUTOFLATTEN_.\"\n\n    def __init__(self, init_size: int = 50):\n        self.full_size = init_size\n\n        self.flattened_to_unflattened: Dict[str, Dict[str, List[str]]] = {\n            \"memory\": dict(),\n            \"observations\": dict(),\n        }\n        self.unflattened_to_flattened: Dict[str, Dict[Tuple[str, ...], str]] = {\n            \"memory\": dict(),\n            \"observations\": dict(),\n        }\n\n        self.dim_names = [\"step\", \"sampler\", None]\n\n        self.memory_specification: Optional[FullMemorySpecType] = None\n        self.action_space: Optional[gym.Space] = None\n        self.memory_first_last: Optional[Memory] = None\n        self._observations_full: Memory = Memory()\n\n        self._value_preds_full: Optional[torch.Tensor] = None\n        self._returns_full: Optional[torch.Tensor] = None\n        self._rewards_full: Optional[torch.Tensor] = None\n        self._action_log_probs_full: Optional[torch.Tensor] = None\n\n        self.step = 0\n        self._total_steps = 0\n        self._before_update_called = False\n        self.device = torch.device(\"cpu\")\n\n        # self._advantages and self._normalized_advantages are only computed\n        # when `before_updates` is called\n        self._advantages: Optional[torch.Tensor] = None\n        self._normalized_advantages: Optional[torch.Tensor] = None\n\n        self._masks_full: Optional[torch.Tensor] = None\n        self._actions_full: Optional[torch.Tensor] = None\n        self._prev_actions_full: Optional[torch.Tensor] = None\n\n    def initialize(\n        self,\n        *,\n        observations: ObservationType,\n        num_samplers: int,\n        recurrent_memory_specification: FullMemorySpecType,\n        action_space: gym.Space,\n        **kwargs,\n    ):\n        if self.memory_specification is None:\n            self.memory_specification = recurrent_memory_specification or {}\n            self.action_space = action_space\n\n            self.memory_first_last: Memory = self.create_memory(\n                spec=self.memory_specification,\n                num_samplers=num_samplers,\n            ).to(self.device)\n            for key in self.memory_specification:\n                self.flattened_to_unflattened[\"memory\"][key] = [key]\n                self.unflattened_to_flattened[\"memory\"][(key,)] = key\n\n            self._masks_full = torch.zeros(\n                self.full_size + 1, num_samplers, 1, device=self.device\n            )\n            action_flat_dim = su.flatdim(self.action_space)\n            self._actions_full = torch.zeros(\n                self.full_size, num_samplers, action_flat_dim, device=self.device\n            )\n            self._prev_actions_full = torch.zeros(\n                self.full_size + 1, num_samplers, action_flat_dim, device=self.device\n            )\n\n        assert self.step == 0, \"Must call `after_updates` before calling `initialize`\"\n        self.insert_observations(observations=observations, time_step=0)\n        self.prev_actions[0].zero_()  # Have to zero previous actions\n        self.masks[0].zero_()  # Have to zero masks\n\n    @property\n    def total_experiences(self) -> int:\n        return self._total_steps\n\n    @total_experiences.setter\n    def total_experiences(self, value: int):\n        self._total_steps = value\n\n    def set_partition(self, index: int, num_parts: int):\n        pass\n\n    @property\n    def value_preds(self) -> torch.Tensor:\n        return self._value_preds_full[: self.step + 1]\n\n    @property\n    def rewards(self) -> torch.Tensor:\n        return self._rewards_full[: self.step]\n\n    @property\n    def returns(self) -> torch.Tensor:\n        return self._returns_full[: self.step + 1]\n\n    @property\n    def action_log_probs(self) -> torch.Tensor:\n        return self._action_log_probs_full[: self.step]\n\n    @property\n    def actions(self) -> torch.Tensor:\n        return self._actions_full[: self.step]\n\n    @property\n    def prev_actions(self) -> torch.Tensor:\n        return self._prev_actions_full[: self.step + 1]\n\n    @property\n    def masks(self) -> torch.Tensor:\n        return self._masks_full[: self.step + 1]\n\n    @property\n    def observations(self) -> Memory:\n        return self._observations_full.slice(dim=0, start=0, stop=self.step + 1)\n\n    @staticmethod\n    def create_memory(\n        spec: Optional[FullMemorySpecType],\n        num_samplers: int,\n    ) -> Memory:\n        if spec is None:\n            return Memory()\n\n        memory = Memory()\n        for key in spec:\n            dims_template, dtype = spec[key]\n\n            dim_names = [\"step\"] + [d[0] for d in dims_template]\n            sampler_dim = dim_names.index(\"sampler\")\n\n            all_dims = [2] + [d[1] for d in dims_template]\n            all_dims[sampler_dim] = num_samplers\n\n            memory.check_append(\n                key=key,\n                tensor=torch.zeros(*all_dims, dtype=dtype),\n                sampler_dim=sampler_dim,\n            )\n\n        return memory\n\n    def to(self, device: torch.device):\n        for key in [\n            \"_observations_full\",\n            \"memory_first_last\",\n            \"_actions_full\",\n            \"_prev_actions_full\",\n            \"_masks_full\",\n            \"_rewards_full\",\n            \"_value_preds_full\",\n            \"_returns_full\",\n            \"_action_log_probs_full\",\n        ]:\n            val = getattr(self, key)\n            if val is not None:\n                setattr(self, key, val.to(device))\n\n        self.device = device\n\n    def insert_observations(\n        self,\n        observations: ObservationType,\n        time_step: int,\n    ):\n        self.insert_tensors(\n            storage=self._observations_full,\n            storage_name=\"observations\",\n            unflattened=observations,\n            time_step=time_step,\n        )\n\n    def insert_memory(\n        self,\n        memory: Optional[Memory],\n        time_step: int,\n    ):\n        if memory is None:\n            assert len(self.memory_first_last) == 0\n            return\n\n        # `min(time_step, 1)` as we only store the first and last memories:\n        #  * first memory is used for loss computation when the agent model has to compute\n        #    all its outputs again given the full batch.\n        #  * last memory ised used by the agent when collecting rollouts\n        self.insert_tensors(\n            storage=self.memory_first_last,\n            storage_name=\"memory\",\n            unflattened=memory,\n            time_step=min(time_step, 1),\n        )\n\n    def insert_tensors(\n        self,\n        storage: Memory,\n        storage_name: str,\n        unflattened: Union[ObservationType, Memory],\n        prefix: str = \"\",\n        path: Sequence[str] = (),\n        time_step: int = 0,\n    ):\n        path = list(path)\n\n        for name in unflattened:\n            current_data = unflattened[name]\n\n            if isinstance(current_data, Dict):\n                self.insert_tensors(\n                    storage=storage,\n                    storage_name=storage_name,\n                    unflattened=cast(ObservationType, current_data),\n                    prefix=prefix + name + self.FLATTEN_SEPARATOR,\n                    path=path + [name],\n                    time_step=time_step,\n                )\n                continue\n\n            sampler_dim = self.dim_names.index(\"sampler\")\n            if isinstance(current_data, tuple):\n                sampler_dim = current_data[1]\n                current_data = current_data[0]\n\n            flatten_name = prefix + name\n            if flatten_name not in storage:\n                assert storage_name == \"observations\"\n                storage[flatten_name] = (\n                    torch.zeros_like(current_data)  # type:ignore\n                    .repeat(\n                        self.full_size + 1,  # required for observations (and memory)\n                        *(1 for _ in range(len(current_data.shape))),\n                    )\n                    .to(self.device),\n                    sampler_dim,\n                )\n\n                assert (\n                    flatten_name not in self.flattened_to_unflattened[storage_name]\n                ), f\"new flattened name {flatten_name} already existing in flattened spaces[{storage_name}]\"\n                self.flattened_to_unflattened[storage_name][flatten_name] = path + [\n                    name\n                ]\n                self.unflattened_to_flattened[storage_name][\n                    tuple(path + [name])\n                ] = flatten_name\n\n            try:\n                if storage_name == \"observations\":\n                    # current_data has a step dimension\n                    assert time_step >= 0\n                    storage[flatten_name][0][time_step : time_step + 1].copy_(\n                        current_data\n                    )\n                elif storage_name == \"memory\":\n                    # current_data does not have a step dimension\n                    storage[flatten_name][0][time_step].copy_(current_data)\n                else:\n                    raise NotImplementedError\n            except:\n                get_logger().error(\n                    f\"Error while inserting data in storage for name {flatten_name}\"\n                )\n                raise\n\n    def create_tensor_storage(\n        self, num_steps: int, template: torch.Tensor\n    ) -> torch.Tensor:\n        return torch.cat([torch.zeros_like(template).to(self.device)] * num_steps)\n\n    def _double_storage_size(self):\n        def pad_tensor_with_zeros(old_t: Optional[torch.Tensor]):\n            if old_t is None:\n                return None\n\n            assert old_t.shape[0] in [self.full_size, self.full_size + 1]\n            padded_t = torch.zeros(\n                old_t.shape[0] + self.full_size,\n                *old_t.shape[1:],\n                dtype=old_t.dtype,\n                device=old_t.device,\n            )\n            padded_t[: old_t.shape[0]] = old_t\n            return padded_t\n\n        for key in list(self._observations_full.keys()):\n            obs_tensor, sampler_dim = self._observations_full[key]\n            self._observations_full[key] = (\n                pad_tensor_with_zeros(obs_tensor),\n                sampler_dim,\n            )\n\n        self._actions_full = pad_tensor_with_zeros(self._actions_full)\n        self._prev_actions_full = pad_tensor_with_zeros(self._prev_actions_full)\n        self._masks_full = pad_tensor_with_zeros(self._masks_full)\n\n        self._rewards_full = pad_tensor_with_zeros(self._rewards_full)\n        self._value_preds_full = pad_tensor_with_zeros(self._value_preds_full)\n        self._returns_full = pad_tensor_with_zeros(self._returns_full)\n        self._action_log_probs_full = pad_tensor_with_zeros(self._action_log_probs_full)\n\n        self.full_size *= 2\n\n    def add(\n        self,\n        observations: ObservationType,\n        memory: Optional[Memory],\n        actions: torch.Tensor,\n        action_log_probs: torch.Tensor,\n        value_preds: torch.Tensor,\n        rewards: torch.Tensor,\n        masks: torch.Tensor,\n    ):\n        \"\"\"See `ExperienceStorage.add` documentation.\"\"\"\n        assert (\n            len(masks.shape) == 2 and masks.shape[1] == 1\n        ), f\"Can only add a single step worth of data at a time (mask shape = {masks.shape}).\"\n\n        self.total_experiences += masks.shape[0]\n\n        if self.step == self.full_size:\n            self._double_storage_size()\n        elif self.step > self.full_size:\n            raise RuntimeError\n\n        self.insert_observations(observations, time_step=self.step + 1)\n        self.insert_memory(memory, time_step=self.step + 1)\n\n        assert actions.shape == self._actions_full.shape[1:]\n\n        self._actions_full[self.step].copy_(actions)  # type:ignore\n        self._prev_actions_full[self.step + 1].copy_(actions)  # type:ignore\n        self._masks_full[self.step + 1].copy_(masks)  # type:ignore\n\n        if self._rewards_full is None:\n            # We delay the instantiation of storage for `rewards`, `value_preds`, `action_log_probs` and `returns`\n            # as we do not, a priori, know what shape these will be. For instance, if we are in a multi-agent setting\n            # then there may be many rewards (one for each agent).\n            self._rewards_full = self.create_tensor_storage(\n                self.full_size, rewards.unsqueeze(0)\n            )  # add step\n\n            value_returns_template = value_preds.unsqueeze(0)  # add step\n            self._value_preds_full = self.create_tensor_storage(\n                self.full_size + 1, value_returns_template\n            )\n            self._returns_full = self.create_tensor_storage(\n                self.full_size + 1, value_returns_template\n            )\n\n            self._action_log_probs_full = self.create_tensor_storage(\n                self.full_size, action_log_probs.unsqueeze(0)\n            )\n\n        self._value_preds_full[self.step].copy_(value_preds)  # type:ignore\n        self._rewards_full[self.step].copy_(rewards)  # type:ignore\n        self._action_log_probs_full[self.step].copy_(  # type:ignore\n            action_log_probs\n        )\n\n        self.step += 1\n        self._before_update_called = False\n\n        # We set the below to be None just for extra safety.\n        self._advantages = None\n        self._normalized_advantages = None\n\n    def sampler_select(self, keep_list: Sequence[int]):\n        keep_list = list(keep_list)\n        if self._actions_full.shape[1] == len(keep_list):  # samplers dim\n            return  # we are keeping everything, no need to copy\n\n        self._observations_full = self._observations_full.sampler_select(keep_list)\n        self.memory_first_last = self.memory_first_last.sampler_select(keep_list)\n        self._actions_full = self._actions_full[:, keep_list]\n        self._prev_actions_full = self._prev_actions_full[:, keep_list]\n        self._action_log_probs_full = self._action_log_probs_full[:, keep_list]\n        self._masks_full = self._masks_full[:, keep_list]\n\n        if self._rewards_full is not None:\n            self._value_preds_full = self._value_preds_full[:, keep_list]\n            self._rewards_full = self._rewards_full[:, keep_list]\n            self._returns_full = self._returns_full[:, keep_list]\n\n    def before_updates(\n        self,\n        *,\n        next_value: torch.Tensor,\n        use_gae: bool,\n        gamma: float,\n        tau: float,\n        adv_stats_callback: Callable[[torch.Tensor], Dict[str, torch.Tensor]],\n        **kwargs,\n    ):\n        assert len(kwargs) == 0\n        self.compute_returns(\n            next_value=next_value,\n            use_gae=use_gae,\n            gamma=gamma,\n            tau=tau,\n        )\n\n        self._advantages = self.returns[:-1] - self.value_preds[:-1]\n\n        adv_stats = adv_stats_callback(self._advantages)\n        self._normalized_advantages = (self._advantages - adv_stats[\"mean\"]) / (\n            adv_stats[\"std\"] + 1e-5\n        )\n\n        self._before_update_called = True\n\n    def after_updates(self, **kwargs):\n        assert len(kwargs) == 0\n\n        for storage in [self.observations, self.memory_first_last]:\n            for key in storage:\n                storage[key][0][0].copy_(storage[key][0][-1])\n\n        if self._masks_full is not None:\n            self.masks[0].copy_(self.masks[-1])\n\n        if self._prev_actions_full is not None:\n            self.prev_actions[0].copy_(self.prev_actions[-1])\n\n        self._before_update_called = False\n        self._advantages = None\n        self._normalized_advantages = None\n        self.step = 0\n\n    @staticmethod\n    def _extend_tensor_with_ones(stored_tensor: torch.Tensor, desired_num_dims: int):\n        # Ensure broadcast to all flattened dimensions\n        extended_shape = stored_tensor.shape + (1,) * (\n            desired_num_dims - len(stored_tensor.shape)\n        )\n        return stored_tensor.view(*extended_shape)\n\n    def compute_returns(\n        self, next_value: torch.Tensor, use_gae: bool, gamma: float, tau: float\n    ):\n        extended_mask = self._extend_tensor_with_ones(\n            self.masks, desired_num_dims=len(self.value_preds.shape)\n        )\n        extended_rewards = self._extend_tensor_with_ones(\n            self.rewards, desired_num_dims=len(self.value_preds.shape)\n        )\n\n        if use_gae:\n            self.value_preds[-1] = next_value\n            gae = 0\n            for step in reversed(range(extended_rewards.shape[0])):\n                delta = (\n                    extended_rewards[step]\n                    + gamma * self.value_preds[step + 1] * extended_mask[step + 1]\n                    - self.value_preds[step]\n                )\n                gae = delta + gamma * tau * extended_mask[step + 1] * gae  # type:ignore\n                self.returns[step] = gae + self.value_preds[step]\n        else:\n            self.returns[-1] = next_value\n            for step in reversed(range(extended_rewards.shape[0])):\n                self.returns[step] = (\n                    self.returns[step + 1] * gamma * extended_mask[step + 1]\n                    + extended_rewards[step]\n                )\n\n    def batched_experience_generator(\n        self,\n        num_mini_batch: int,\n    ):\n        assert self._before_update_called, (\n            \"self._before_update_called() must be called before\"\n            \" attempting to generated batched rollouts.\"\n        )\n        num_samplers = self.rewards.shape[1]\n        assert num_samplers >= num_mini_batch, (\n            f\"The number of task samplers ({num_samplers}) \"\n            f\"must be greater than or equal to the number of \"\n            f\"mini batches ({num_mini_batch}).\"\n        )\n\n        inds = np.round(\n            np.linspace(0, num_samplers, num_mini_batch + 1, endpoint=True)\n        ).astype(np.int32)\n        pairs = list(zip(inds[:-1], inds[1:]))\n        random.shuffle(pairs)\n\n        for start_ind, end_ind in pairs:\n            cur_samplers = list(range(start_ind, end_ind))\n\n            memory_batch = self.memory_first_last.step_squeeze(0).sampler_select(\n                cur_samplers\n            )\n            observations_batch = self.unflatten_observations(\n                self.observations.slice(dim=0, stop=-1).sampler_select(cur_samplers)\n            )\n\n            actions_batch = []\n            prev_actions_batch = []\n            value_preds_batch = []\n            return_batch = []\n            masks_batch = []\n            old_action_log_probs_batch = []\n            adv_targ = []\n            norm_adv_targ = []\n\n            for ind in cur_samplers:\n                actions_batch.append(self.actions[:, ind])\n                prev_actions_batch.append(self.prev_actions[:-1, ind])\n                value_preds_batch.append(self.value_preds[:-1, ind])\n                return_batch.append(self.returns[:-1, ind])\n                masks_batch.append(self.masks[:-1, ind])\n                old_action_log_probs_batch.append(self.action_log_probs[:, ind])\n\n                adv_targ.append(self._advantages[:, ind])\n                norm_adv_targ.append(self._normalized_advantages[:, ind])\n\n            actions_batch = torch.stack(actions_batch, 1)  # type:ignore\n            prev_actions_batch = torch.stack(prev_actions_batch, 1)  # type:ignore\n            value_preds_batch = torch.stack(value_preds_batch, 1)  # type:ignore\n            return_batch = torch.stack(return_batch, 1)  # type:ignore\n            masks_batch = torch.stack(masks_batch, 1)  # type:ignore\n            old_action_log_probs_batch = torch.stack(  # type:ignore\n                old_action_log_probs_batch, 1\n            )\n            adv_targ = torch.stack(adv_targ, 1)  # type:ignore\n            norm_adv_targ = torch.stack(norm_adv_targ, 1)  # type:ignore\n\n            yield {\n                \"observations\": observations_batch,\n                \"memory\": memory_batch,\n                \"actions\": su.unflatten(self.action_space, actions_batch),\n                \"prev_actions\": su.unflatten(self.action_space, prev_actions_batch),\n                \"values\": value_preds_batch,\n                \"returns\": return_batch,\n                \"masks\": masks_batch,\n                \"old_action_log_probs\": old_action_log_probs_batch,\n                \"adv_targ\": adv_targ,\n                \"norm_adv_targ\": norm_adv_targ,\n                \"bsize\": int(np.prod(masks_batch.shape[:2])),\n            }\n\n    def unflatten_observations(self, flattened_batch: Memory) -> ObservationType:\n        result: ObservationType = {}\n        for name in flattened_batch:\n            full_path = self.flattened_to_unflattened[\"observations\"][name]\n            cur_dict = result\n            for part in full_path[:-1]:\n                if part not in cur_dict:\n                    cur_dict[part] = {}\n                cur_dict = cast(ObservationType, cur_dict[part])\n            cur_dict[full_path[-1]] = flattened_batch[name][0]\n        return result\n\n    def pick_observation_step(self, step: int) -> ObservationType:\n        return self.unflatten_observations(self.observations.step_select(step))\n\n    def pick_memory_step(self, step: int) -> Memory:\n        assert step in [0, self.step, -1], \"Can only access the first or last memory.\"\n        return self.memory_first_last.step_squeeze(min(step, 1))\n\n    def pick_prev_actions_step(self, step: int) -> ActionType:\n        return su.unflatten(self.action_space, self.prev_actions[step : step + 1])\n\n    def agent_input_for_next_step(self) -> Dict[str, Any]:\n        return {\n            \"observations\": self.pick_observation_step(self.step),\n            \"memory\": self.pick_memory_step(self.step),\n            \"prev_actions\": self.pick_prev_actions_step(self.step),\n            \"masks\": self.masks[self.step : self.step + 1],\n        }\n"
  },
  {
    "path": "allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py",
    "content": "# Original work Copyright (c) Facebook, Inc. and its affiliates.\n# Modified work Copyright (c) Allen Institute for AI\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\nimport os\nimport signal\nimport time\nimport traceback\nfrom multiprocessing.connection import Connection\nfrom multiprocessing.context import BaseContext\nfrom multiprocessing.process import BaseProcess\nfrom threading import Thread\nfrom typing import (\n    Any,\n    Callable,\n    Dict,\n    Generator,\n    Iterator,\n    List,\n    Optional,\n    Sequence,\n    Set,\n    Tuple,\n    Union,\n    cast,\n)\n\nimport numpy as np\nfrom gym.spaces.dict import Dict as SpaceDict\nfrom setproctitle import setproctitle as ptitle\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import SensorSuite, Sensor\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.misc_utils import partition_sequence\nfrom allenact.utils.system import get_logger\nfrom allenact.utils.tensor_utils import tile_images\n\ntry:\n    # Use torch.multiprocessing if we can.\n    # We have yet to find a reason to not use it and\n    # you are required to use it when sending a torch.Tensor\n    # between processes\n    import torch.multiprocessing as mp\nexcept ImportError:\n    import multiprocessing as mp  # type: ignore\n\nDEFAULT_MP_CONTEXT_TYPE = \"forkserver\"\nCOMPLETE_TASK_METRICS_KEY = \"__AFTER_TASK_METRICS__\"\nCOMPLETE_TASK_CALLBACK_KEY = \"__AFTER_TASK_CALLBACK__\"\n\nSTEP_COMMAND = \"step\"\nNEXT_TASK_COMMAND = \"next_task\"\nRENDER_COMMAND = \"render\"\nCLOSE_COMMAND = \"close\"\nOBSERVATION_SPACE_COMMAND = \"observation_space\"\nACTION_SPACE_COMMAND = \"action_space\"\nCALL_COMMAND = \"call\"\nSAMPLER_COMMAND = \"call_sampler\"\nATTR_COMMAND = \"attr\"\nSAMPLER_ATTR_COMMAND = \"sampler_attr\"\nRESET_COMMAND = \"reset\"\nSEED_COMMAND = \"seed\"\nPAUSE_COMMAND = \"pause\"\nRESUME_COMMAND = \"resume\"\n\n\nclass DelaySignalHandling:\n    # Modified from https://stackoverflow.com/a/21919644\n    def __init__(self):\n        self.int_signal_received: Optional[Any] = None\n        self.term_signal_received: Optional[Any] = None\n        self.old_int_handler = None\n        self.old_term_handler = None\n\n    def __enter__(self):\n        self.int_signal_received: Optional[Any] = None\n        self.term_signal_received: Optional[Any] = None\n        self.old_int_handler = signal.signal(signal.SIGINT, self.int_handler)\n        self.old_term_handler = signal.signal(signal.SIGTERM, self.term_handler)\n\n    def int_handler(self, sig, frame):\n        self.int_signal_received = (sig, frame)\n        get_logger().debug(\"SIGINT received. Delaying KeyboardInterrupt.\")\n\n    def term_handler(self, sig, frame):\n        self.term_signal_received = (sig, frame)\n        get_logger().debug(\"SIGTERM received. Delaying termination.\")\n\n    def __exit__(self, type, value, traceback):\n        signal.signal(signal.SIGINT, self.old_int_handler)\n        signal.signal(signal.SIGTERM, self.old_term_handler)\n        if self.term_signal_received:\n            # For some reason there appear to be cases where the original termination\n            # handler is not callable. It is unclear to me exactly why this is the case\n            # but here we add a guard to double check that the handler is callable and,\n            # if it's not, we re-send the termination signal to the process and let\n            # the python internals handle it (note that we've already reset the termination\n            # handler to what it was originaly above in the signal.signal(...) code).\n            if callable(self.old_term_handler):\n                self.old_term_handler(*self.term_signal_received)\n            else:\n                get_logger().debug(\n                    \"Termination handler could not be called after delaying signal handling.\"\n                    f\" Resending the SIGTERM signal. Last (sig, frame) == ({self.term_signal_received}).\"\n                )\n                os.kill(os.getpid(), signal.SIGTERM)\n\n        if self.int_signal_received:\n            if callable(self.old_int_handler):\n                self.old_int_handler(*self.int_signal_received)\n            else:\n                signal.default_int_handler(*self.int_signal_received)\n\n\nclass VectorSampledTasks:\n    \"\"\"Vectorized collection of tasks. Creates multiple processes where each\n    process runs its own TaskSampler. Each process generates one Task from its\n    TaskSampler at a time and this class allows for interacting with these\n    tasks in a vectorized manner. When a task on a process completes, the\n    process samples another task from its task sampler. All the tasks are\n    synchronized (for step and new_task methods).\n\n    # Attributes\n\n    make_sampler_fn : function which creates a single TaskSampler.\n    sampler_fn_args : sequence of dictionaries describing the args\n        to pass to make_sampler_fn on each individual process.\n    auto_resample_when_done : automatically sample a new Task from the TaskSampler when\n        the Task completes. If False, a new Task will not be resampled until all\n        Tasks on all processes have completed. This functionality is provided for seamless training\n        of vectorized Tasks.\n    multiprocessing_start_method : the multiprocessing method used to\n        spawn worker processes. Valid methods are\n        ``{'spawn', 'forkserver', 'fork'}`` ``'forkserver'`` is the\n        recommended method as it works well with CUDA. If\n        ``'fork'`` is used, the subproccess  must be started before\n        any other GPU useage.\n    \"\"\"\n\n    observation_space: SpaceDict\n    _workers: List[Union[mp.Process, Thread, BaseProcess]]\n    _is_waiting: bool\n    _num_task_samplers: int\n    _auto_resample_when_done: bool\n    _mp_ctx: BaseContext\n    _connection_read_fns: List[Callable[[], Any]]\n    _connection_write_fns: List[Callable[[Any], None]]\n    _read_timeout: Optional[float]\n\n    def __init__(\n        self,\n        make_sampler_fn: Callable[..., TaskSampler],\n        sampler_fn_args: Sequence[Dict[str, Any]] = None,\n        callback_sensors: Optional[Sequence[Sensor]] = None,\n        auto_resample_when_done: bool = True,\n        multiprocessing_start_method: Optional[str] = \"forkserver\",\n        mp_ctx: Optional[BaseContext] = None,\n        should_log: bool = True,\n        max_processes: Optional[int] = None,\n        read_timeout: Optional[\n            float\n        ] = 60,  # Seconds to wait for a task to return a response before timing out\n    ) -> None:\n\n        self._is_waiting = False\n        self._is_closed = True\n        self.should_log = should_log\n        self.max_processes = max_processes\n        self.read_timeout = read_timeout\n\n        assert (\n            sampler_fn_args is not None and len(sampler_fn_args) > 0\n        ), \"number of processes to be created should be greater than 0\"\n\n        self._num_task_samplers = len(sampler_fn_args)\n        self._num_processes = (\n            self._num_task_samplers\n            if max_processes is None\n            else min(max_processes, self._num_task_samplers)\n        )\n\n        self._auto_resample_when_done = auto_resample_when_done\n\n        assert (multiprocessing_start_method is None) != (\n            mp_ctx is None\n        ), \"Exactly one of `multiprocessing_start_method`, and `mp_ctx` must be not None.\"\n        if multiprocessing_start_method is not None:\n            assert multiprocessing_start_method in self._valid_start_methods, (\n                \"multiprocessing_start_method must be one of {}. Got '{}'\"\n            ).format(self._valid_start_methods, multiprocessing_start_method)\n            self._mp_ctx = mp.get_context(multiprocessing_start_method)\n        else:\n            self._mp_ctx = cast(BaseContext, mp_ctx)\n\n        self.npaused_per_process = [0] * self._num_processes\n        self.sampler_index_to_process_ind_and_subprocess_ind: Optional[\n            List[List[int]]\n        ] = None\n        self._reset_sampler_index_to_process_ind_and_subprocess_ind()\n\n        self._workers: Optional[List[Union[mp.Process, Thread, BaseProcess]]] = None\n        for args in sampler_fn_args:\n            args[\"mp_ctx\"] = self._mp_ctx\n        (\n            connection_poll_fns,\n            connection_read_fns,\n            self._connection_write_fns,\n        ) = self._spawn_workers(  # noqa\n            make_sampler_fn=make_sampler_fn,\n            sampler_fn_args_list=[\n                args_list for args_list in self._partition_to_processes(sampler_fn_args)\n            ],\n            callback_sensor_suite=(\n                SensorSuite(callback_sensors)\n                if isinstance(callback_sensors, Sequence)\n                else callback_sensors\n            ),\n        )\n\n        self._connection_read_fns = [\n            self._create_read_function_with_timeout(\n                read_fn=read_fn, poll_fn=poll_fn, timeout=self.read_timeout\n            )\n            for read_fn, poll_fn in zip(connection_read_fns, connection_poll_fns)\n        ]\n\n        self._is_closed = False\n\n        for write_fn in self._connection_write_fns:\n            write_fn((OBSERVATION_SPACE_COMMAND, None))\n\n        # Note that we increase the read timeout below as initialization can take some time\n        observation_spaces = [\n            space\n            for read_fn in self._connection_read_fns\n            for space in read_fn(timeout_to_use=5 * self.read_timeout if self.read_timeout is not None else None)  # type: ignore\n        ]\n\n        if any(os is None for os in observation_spaces):\n            raise NotImplementedError(\n                \"It appears that the `all_observation_spaces_equal`\"\n                \" is not True for some task sampler created by\"\n                \" VectorSampledTasks. This is not currently supported.\"\n            )\n\n        if any(observation_spaces[0] != os for os in observation_spaces):\n            raise NotImplementedError(\n                \"It appears that the observation spaces of the samplers\"\n                \" created in VectorSampledTasks are not equal.\"\n                \" This is not currently supported.\"\n            )\n\n        self.observation_space = observation_spaces[0]\n        for write_fn in self._connection_write_fns:\n            write_fn((ACTION_SPACE_COMMAND, None))\n        self.action_spaces = [\n            space for read_fn in self._connection_read_fns for space in read_fn()\n        ]\n\n    @staticmethod\n    def _create_read_function_with_timeout(\n        *,\n        read_fn: Callable[[], Any],\n        poll_fn: Callable[[float], bool],\n        timeout: Optional[float],\n    ) -> Callable[[], Any]:\n        def read_with_timeout(timeout_to_use: Optional[float] = timeout):\n            if timeout_to_use is not None:\n                # noinspection PyArgumentList\n                if not poll_fn(timeout=timeout_to_use):\n                    raise TimeoutError(\n                        f\"Did not receive output from `VectorSampledTask` worker for {timeout_to_use} seconds.\"\n                    )\n\n            return read_fn()\n\n        return read_with_timeout\n\n    def _reset_sampler_index_to_process_ind_and_subprocess_ind(self):\n        self.sampler_index_to_process_ind_and_subprocess_ind = [\n            [i, j]\n            for i, part in enumerate(\n                partition_sequence([1] * self._num_task_samplers, self._num_processes)\n            )\n            for j in range(len(part))\n        ]\n\n    def _partition_to_processes(self, seq: Union[Iterator, Sequence]):\n        subparts_list: List[List] = [[] for _ in range(self._num_processes)]\n\n        seq = list(seq)\n        assert len(seq) == len(self.sampler_index_to_process_ind_and_subprocess_ind)\n\n        for sampler_index, (process_ind, subprocess_ind) in enumerate(\n            self.sampler_index_to_process_ind_and_subprocess_ind\n        ):\n            assert len(subparts_list[process_ind]) == subprocess_ind\n            subparts_list[process_ind].append(seq[sampler_index])\n\n        return subparts_list\n\n    @property\n    def is_closed(self) -> bool:\n        \"\"\"Has the vector task been closed.\"\"\"\n        return self._is_closed\n\n    @property\n    def num_unpaused_tasks(self) -> int:\n        \"\"\"Number of unpaused processes.\n\n        # Returns\n\n        Number of unpaused processes.\n        \"\"\"\n        return self._num_task_samplers - sum(self.npaused_per_process)\n\n    @property\n    def mp_ctx(self):\n        \"\"\"Get the multiprocessing process used by the vector task.\n\n        # Returns\n\n        The multiprocessing context.\n        \"\"\"\n        return self._mp_ctx\n\n    @staticmethod\n    def _task_sampling_loop_worker(\n        worker_id: Union[int, str],\n        connection_read_fn: Callable,\n        connection_write_fn: Callable,\n        make_sampler_fn: Callable[..., TaskSampler],\n        sampler_fn_args_list: List[Dict[str, Any]],\n        callback_sensor_suite: Optional[SensorSuite],\n        auto_resample_when_done: bool,\n        should_log: bool,\n        child_pipe: Optional[Connection] = None,\n        parent_pipe: Optional[Connection] = None,\n    ) -> None:\n        \"\"\"process worker for creating and interacting with the\n        Tasks/TaskSampler.\"\"\"\n\n        ptitle(f\"VectorSampledTask: {worker_id}\")\n\n        sp_vector_sampled_tasks = SingleProcessVectorSampledTasks(\n            make_sampler_fn=make_sampler_fn,\n            sampler_fn_args_list=sampler_fn_args_list,\n            callback_sensor_suite=callback_sensor_suite,\n            auto_resample_when_done=auto_resample_when_done,\n            should_log=should_log,\n        )\n\n        if parent_pipe is not None:\n            parent_pipe.close()  # Means this pipe will close when the calling process closes it\n        try:\n            while True:\n                read_input = connection_read_fn()\n\n                # TODO: Was the below necessary?\n                # with DelaySignalHandling():\n                #     # Delaying signal handling here is necessary to ensure that we don't\n                #     # (when processing a SIGTERM/SIGINT signal) attempt to send data to\n                #     # a generator while it is already processing other data.\n                if len(read_input) == 3:\n                    sampler_index, command, data = read_input\n\n                    assert command != CLOSE_COMMAND, \"Must close all processes at once.\"\n                    assert (\n                        command != RESUME_COMMAND\n                    ), \"Must resume all task samplers at once.\"\n\n                    if command == PAUSE_COMMAND:\n                        sp_vector_sampled_tasks.pause_at(sampler_index=sampler_index)\n                        connection_write_fn(\"done\")\n                    else:\n                        connection_write_fn(\n                            sp_vector_sampled_tasks.command_at(\n                                sampler_index=sampler_index,\n                                command=command,\n                                data=data,\n                            )\n                        )\n                else:\n                    commands, data_list = read_input\n\n                    assert (\n                        commands != PAUSE_COMMAND\n                    ), \"Cannot pause all task samplers at once.\"\n\n                    if commands == CLOSE_COMMAND:\n                        # Will close the `sp_vector_sampled_tasks` in the `finally` clause below\n                        break\n\n                    elif commands == RESUME_COMMAND:\n                        sp_vector_sampled_tasks.resume_all()\n                        connection_write_fn(\"done\")\n                    else:\n                        if isinstance(commands, str):\n                            commands = [\n                                commands\n                            ] * sp_vector_sampled_tasks.num_unpaused_tasks\n\n                        connection_write_fn(\n                            sp_vector_sampled_tasks.command(\n                                commands=commands, data_list=data_list\n                            )\n                        )\n\n        except KeyboardInterrupt:\n            if should_log:\n                get_logger().info(f\"Worker {worker_id} KeyboardInterrupt\")\n        except Exception as e:\n            get_logger().error(\n                f\"Worker {worker_id} encountered an exception:\\n{traceback.format_exc()}\"\n            )\n            raise e\n        finally:\n            try:\n                sp_vector_sampled_tasks.close()\n            except Exception:\n                pass\n\n            if child_pipe is not None:\n                child_pipe.close()\n            if should_log:\n                get_logger().info(f\"Worker {worker_id} closing.\")\n\n    def _spawn_workers(\n        self,\n        make_sampler_fn: Callable[..., TaskSampler],\n        sampler_fn_args_list: Sequence[Sequence[Dict[str, Any]]],\n        callback_sensor_suite: Optional[SensorSuite],\n    ) -> Tuple[\n        List[Callable[[], bool]], List[Callable[[], Any]], List[Callable[[Any], None]]\n    ]:\n        parent_connections, worker_connections = zip(\n            *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_processes)]\n        )\n        self._workers = []\n        k = 0\n        id: Union[int, str]\n        for id, (worker_conn, parent_conn, current_sampler_fn_args_list) in enumerate(\n            zip(worker_connections, parent_connections, sampler_fn_args_list)\n        ):\n            if len(current_sampler_fn_args_list) != 1:\n                id = f\"{id}({k}-{k + len(current_sampler_fn_args_list) - 1})\"\n                k += len(current_sampler_fn_args_list)\n\n            if self.should_log:\n                get_logger().info(\n                    f\"Starting {id}-th VectorSampledTask worker with args {current_sampler_fn_args_list}\"\n                )\n\n            ps = self._mp_ctx.Process(  # type: ignore\n                target=self._task_sampling_loop_worker,\n                kwargs=dict(\n                    worker_id=id,\n                    connection_read_fn=worker_conn.recv,\n                    connection_write_fn=worker_conn.send,\n                    make_sampler_fn=make_sampler_fn,\n                    sampler_fn_args_list=current_sampler_fn_args_list,\n                    callback_sensor_suite=callback_sensor_suite,\n                    auto_resample_when_done=self._auto_resample_when_done,\n                    should_log=self.should_log,\n                    child_pipe=worker_conn,\n                    parent_pipe=parent_conn,\n                ),\n            )\n            self._workers.append(ps)\n            ps.daemon = True\n            ps.start()\n            worker_conn.close()  # Means this pipe will close when the child process closes it\n            time.sleep(\n                0.1\n            )  # Useful to ensure things don't lock up when spawning many envs\n        return (\n            [p.poll for p in parent_connections],\n            [p.recv for p in parent_connections],\n            [p.send for p in parent_connections],\n        )\n\n    def next_task(self, **kwargs):\n        \"\"\"Move to the the next Task for all TaskSamplers.\n\n        # Parameters\n\n        kwargs : key word arguments passed to the `next_task` function of the samplers.\n\n        # Returns\n\n        List of initial observations for each of the new tasks.\n        \"\"\"\n        return self.command(\n            commands=NEXT_TASK_COMMAND, data_list=[kwargs] * self.num_unpaused_tasks\n        )\n\n    def get_observations(self):\n        \"\"\"Get observations for all unpaused tasks.\n\n        # Returns\n\n        List of observations for each of the unpaused tasks.\n        \"\"\"\n        return self.call(\n            [\"get_observations\"] * self.num_unpaused_tasks,\n        )\n\n    def command_at(\n        self, sampler_index: int, command: str, data: Optional[Any] = None\n    ) -> Any:\n        \"\"\"Runs the command on the selected task and returns the result.\n\n        # Parameters\n\n\n        # Returns\n\n        Result of the command.\n        \"\"\"\n        self._is_waiting = True\n        (\n            process_ind,\n            subprocess_ind,\n        ) = self.sampler_index_to_process_ind_and_subprocess_ind[sampler_index]\n        self._connection_write_fns[process_ind]((subprocess_ind, command, data))\n        result = self._connection_read_fns[process_ind]()\n        self._is_waiting = False\n        return result\n\n    def call_at(\n        self,\n        sampler_index: int,\n        function_name: str,\n        function_args: Optional[List[Any]] = None,\n    ) -> Any:\n        \"\"\"Calls a function (which is passed by name) on the selected task and\n        returns the result.\n\n        # Parameters\n\n        index : Which task to call the function on.\n        function_name : The name of the function to call on the task.\n        function_args : Optional function args.\n\n        # Returns\n\n        Result of calling the function.\n        \"\"\"\n        return self.command_at(\n            sampler_index=sampler_index,\n            command=CALL_COMMAND,\n            data=(function_name, function_args),\n        )\n\n    def next_task_at(self, sampler_index: int) -> List[RLStepResult]:\n        \"\"\"Move to the the next Task from the TaskSampler in index_process\n        process in the vector.\n\n        # Parameters\n\n        index_process : Index of the process to be reset.\n\n        # Returns\n\n        List of length one containing the observations the newly sampled task.\n        \"\"\"\n        return [\n            self.command_at(\n                sampler_index=sampler_index, command=NEXT_TASK_COMMAND, data=None\n            )\n        ]\n\n    def step_at(self, sampler_index: int, action: Any) -> List[RLStepResult]:\n        \"\"\"Step in the index_process task in the vector.\n\n        # Parameters\n\n        sampler_index : Index of the sampler to be reset.\n        action : The action to take.\n\n        # Returns\n\n        List containing the output of step method on the task in the indexed process.\n        \"\"\"\n        return [\n            self.command_at(\n                sampler_index=sampler_index, command=STEP_COMMAND, data=action\n            )\n        ]\n\n    def async_step(self, actions: Sequence[Any]) -> None:\n        \"\"\"Asynchronously step in the vectorized Tasks.\n\n        # Parameters\n\n        actions : actions to be performed in the vectorized Tasks.\n        \"\"\"\n        self._is_waiting = True\n        for write_fn, action in zip(\n            self._connection_write_fns, self._partition_to_processes(actions)\n        ):\n            write_fn((STEP_COMMAND, action))\n\n    def wait_step(self) -> List[Dict[str, Any]]:\n        \"\"\"Wait until all the asynchronized processes have synchronized.\"\"\"\n        observations = []\n        for read_fn in self._connection_read_fns:\n            observations.extend(read_fn())\n        self._is_waiting = False\n        return observations\n\n    def step(self, actions: Sequence[Any]):\n        \"\"\"Perform actions in the vectorized tasks.\n\n        # Parameters\n\n        actions: List of size _num_samplers containing action to be taken in each task.\n\n        # Returns\n\n        List of outputs from the step method of tasks.\n        \"\"\"\n        self.async_step(actions)\n        return self.wait_step()\n\n    def reset_all(self):\n        \"\"\"Reset all task samplers to their initial state (except for the RNG\n        seed).\"\"\"\n        self.command(commands=RESET_COMMAND, data_list=None)\n\n    def set_seeds(self, seeds: List[int]):\n        \"\"\"Sets new tasks' RNG seeds.\n\n        # Parameters\n\n        seeds: List of size _num_samplers containing new RNG seeds.\n        \"\"\"\n        self.command(commands=SEED_COMMAND, data_list=seeds)\n\n    def close(self) -> None:\n        if self._is_closed:\n            return\n\n        if self._is_waiting:\n            for read_fn in self._connection_read_fns:\n                try:\n                    # noinspection PyArgumentList\n                    read_fn(0)  # Time out immediately\n                except Exception:\n                    pass\n\n        for write_fn in self._connection_write_fns:\n            try:\n                write_fn((CLOSE_COMMAND, None))\n            except Exception:\n                pass\n\n        for process in self._workers:\n            try:\n                process.join(timeout=0.1)\n            except Exception:\n                pass\n\n        for process in self._workers:\n            if process.is_alive():\n                process.kill()\n\n        self._is_closed = True\n\n    def pause_at(self, sampler_index: int) -> None:\n        \"\"\"Pauses computation on the Task in process `index` without destroying\n        the Task. This is useful for not needing to call steps on all Tasks\n        when only some are active (for example during the last samples of\n        running eval).\n\n        # Parameters\n\n        index : which process to pause. All indexes after this\n            one will be shifted down by one.\n        \"\"\"\n        if self._is_waiting:\n            for read_fn in self._connection_read_fns:\n                read_fn()\n\n        (\n            process_ind,\n            subprocess_ind,\n        ) = self.sampler_index_to_process_ind_and_subprocess_ind[sampler_index]\n\n        self.command_at(sampler_index=sampler_index, command=PAUSE_COMMAND, data=None)\n\n        for i in range(\n            sampler_index + 1, len(self.sampler_index_to_process_ind_and_subprocess_ind)\n        ):\n            other_process_and_sub_process_inds = (\n                self.sampler_index_to_process_ind_and_subprocess_ind[i]\n            )\n            if other_process_and_sub_process_inds[0] == process_ind:\n                other_process_and_sub_process_inds[1] -= 1\n            else:\n                break\n\n        self.sampler_index_to_process_ind_and_subprocess_ind.pop(sampler_index)\n\n        self.npaused_per_process[process_ind] += 1\n\n    def resume_all(self) -> None:\n        \"\"\"Resumes any paused processes.\"\"\"\n        self._is_waiting = True\n        for connection_write_fn in self._connection_write_fns:\n            connection_write_fn((RESUME_COMMAND, None))\n\n        for connection_read_fn in self._connection_read_fns:\n            connection_read_fn()\n\n        self._is_waiting = False\n\n        self._reset_sampler_index_to_process_ind_and_subprocess_ind()\n\n        for i in range(len(self.npaused_per_process)):\n            self.npaused_per_process[i] = 0\n\n    def command(\n        self, commands: Union[List[str], str], data_list: Optional[List]\n    ) -> List[Any]:\n        \"\"\"\"\"\"\n        self._is_waiting = True\n\n        if isinstance(commands, str):\n            commands = [commands] * self.num_unpaused_tasks\n\n        if data_list is None:\n            data_list = [None] * self.num_unpaused_tasks\n\n        for write_fn, subcommands, subdata_list in zip(\n            self._connection_write_fns,\n            self._partition_to_processes(commands),\n            self._partition_to_processes(data_list),\n        ):\n            write_fn((subcommands, subdata_list))\n        results = []\n        for read_fn in self._connection_read_fns:\n            results.extend(read_fn())\n        self._is_waiting = False\n        return results\n\n    def call(\n        self,\n        function_names: Union[str, List[str]],\n        function_args_list: Optional[List[Any]] = None,\n    ) -> List[Any]:\n        \"\"\"Calls a list of functions (which are passed by name) on the\n        corresponding task (by index).\n\n        # Parameters\n\n        function_names : The name of the functions to call on the tasks.\n        function_args_list : List of function args for each function.\n            If provided, len(function_args_list) should be as long as  len(function_names).\n\n        # Returns\n\n        List of results of calling the functions.\n        \"\"\"\n        self._is_waiting = True\n\n        if isinstance(function_names, str):\n            function_names = [function_names] * self.num_unpaused_tasks\n\n        if function_args_list is None:\n            function_args_list = [None] * len(function_names)\n        assert len(function_names) == len(function_args_list)\n        func_names_and_args_list = zip(function_names, function_args_list)\n        for write_fn, func_names_and_args in zip(\n            self._connection_write_fns,\n            self._partition_to_processes(func_names_and_args_list),\n        ):\n            write_fn((CALL_COMMAND, func_names_and_args))\n        results = []\n        for read_fn in self._connection_read_fns:\n            results.extend(read_fn())\n        self._is_waiting = False\n        return results\n\n    def attr_at(self, sampler_index: int, attr_name: str) -> Any:\n        \"\"\"Gets the attribute (specified by name) on the selected task and\n        returns it.\n\n        # Parameters\n\n        index : Which task to call the function on.\n        attr_name : The name of the function to call on the task.\n\n        # Returns\n\n         Result of calling the function.\n        \"\"\"\n        return self.command_at(sampler_index, command=ATTR_COMMAND, data=attr_name)\n\n    def attr(self, attr_names: Union[List[str], str]) -> List[Any]:\n        \"\"\"Gets the attributes (specified by name) on the tasks.\n\n        # Parameters\n\n        attr_names : The name of the functions to call on the tasks.\n\n        # Returns\n\n        List of results of calling the functions.\n        \"\"\"\n        if isinstance(attr_names, str):\n            attr_names = [attr_names] * self.num_unpaused_tasks\n\n        return self.command(commands=ATTR_COMMAND, data_list=attr_names)\n\n    def render(\n        self, mode: str = \"human\", *args, **kwargs\n    ) -> Union[np.ndarray, None, List[np.ndarray]]:\n        \"\"\"Render observations from all Tasks in a tiled image or list of\n        images.\"\"\"\n\n        images = self.command(\n            commands=RENDER_COMMAND,\n            data_list=[(args, {\"mode\": \"rgb\", **kwargs})] * self.num_unpaused_tasks,\n        )\n\n        if mode == \"raw_rgb_list\":\n            return images\n\n        tile = tile_images(images)\n        if mode == \"human\":\n            import cv2\n\n            cv2.imshow(\"vectask\", tile[:, :, ::-1])\n            cv2.waitKey(1)\n            return None\n        elif mode == \"rgb_array\":\n            return tile\n        else:\n            raise NotImplementedError\n\n    @property\n    def _valid_start_methods(self) -> Set[str]:\n        return {\"forkserver\", \"spawn\", \"fork\"}\n\n    def __del__(self):\n        self.close()\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        self.close()\n\n\nclass SingleProcessVectorSampledTasks(object):\n    \"\"\"Vectorized collection of tasks.\n\n    Simultaneously handles the state of multiple TaskSamplers and their associated tasks.\n    Allows for interacting with these tasks in a vectorized manner. When a task completes,\n    another task is sampled from the appropriate task sampler. All the tasks are\n    synchronized (for step and new_task methods).\n\n    # Attributes\n\n    make_sampler_fn : function which creates a single TaskSampler.\n    sampler_fn_args : sequence of dictionaries describing the args\n        to pass to make_sampler_fn on each individual process.\n    auto_resample_when_done : automatically sample a new Task from the TaskSampler when\n        the Task completes. If False, a new Task will not be resampled until all\n        Tasks on all processes have completed. This functionality is provided for seamless training\n        of vectorized Tasks.\n    \"\"\"\n\n    observation_space: SpaceDict\n    _vector_task_generators: List[Generator]\n    _num_task_samplers: int\n    _auto_resample_when_done: bool\n\n    def __init__(\n        self,\n        make_sampler_fn: Callable[..., TaskSampler],\n        sampler_fn_args_list: Sequence[Dict[str, Any]] = None,\n        callback_sensor_suite: Optional[SensorSuite] = None,\n        auto_resample_when_done: bool = True,\n        should_log: bool = True,\n    ) -> None:\n\n        self._is_closed = True\n\n        assert (\n            sampler_fn_args_list is not None and len(sampler_fn_args_list) > 0\n        ), \"number of processes to be created should be greater than 0\"\n\n        self._num_task_samplers = len(sampler_fn_args_list)\n        self._auto_resample_when_done = auto_resample_when_done\n\n        self.should_log = should_log\n\n        self._vector_task_generators: List[Generator] = self._create_generators(\n            make_sampler_fn=make_sampler_fn,\n            sampler_fn_args=[{\"mp_ctx\": None, **args} for args in sampler_fn_args_list],\n            callback_sensor_suite=callback_sensor_suite,\n        )\n\n        self._is_closed = False\n\n        observation_spaces = [\n            vsi.send((OBSERVATION_SPACE_COMMAND, None))\n            for vsi in self._vector_task_generators\n        ]\n\n        if any(os is None for os in observation_spaces):\n            raise NotImplementedError(\n                \"It appears that the `all_observation_spaces_equal`\"\n                \" is not True for some task sampler created by\"\n                \" VectorSampledTasks. This is not currently supported.\"\n            )\n\n        if any(observation_spaces[0] != os for os in observation_spaces):\n            raise NotImplementedError(\n                \"It appears that the observation spaces of the samplers\"\n                \" created in VectorSampledTasks are not equal.\"\n                \" This is not currently supported.\"\n            )\n\n        self.observation_space = observation_spaces[0]\n        self.action_spaces = [\n            vsi.send((ACTION_SPACE_COMMAND, None))\n            for vsi in self._vector_task_generators\n        ]\n        self._paused: List[Tuple[int, Generator]] = []\n\n    @property\n    def is_closed(self) -> bool:\n        \"\"\"Has the vector task been closed.\"\"\"\n        return self._is_closed\n\n    @property\n    def mp_ctx(self) -> Optional[BaseContext]:\n        return None\n\n    @property\n    def num_unpaused_tasks(self) -> int:\n        \"\"\"Number of unpaused processes.\n\n        # Returns\n\n        Number of unpaused processes.\n        \"\"\"\n        return self._num_task_samplers - len(self._paused)\n\n    @staticmethod\n    def _task_sampling_loop_generator_fn(\n        worker_id: int,\n        make_sampler_fn: Callable[..., TaskSampler],\n        sampler_fn_args: Dict[str, Any],\n        callback_sensor_suite: Optional[SensorSuite],\n        auto_resample_when_done: bool,\n        should_log: bool,\n    ) -> Generator:\n        \"\"\"Generator for working with Tasks/TaskSampler.\"\"\"\n\n        task_sampler = make_sampler_fn(**sampler_fn_args)\n        current_task = task_sampler.next_task()\n\n        if current_task is None:\n            raise RuntimeError(\n                \"Newly created task sampler had `None` as it's first task. This likely means that\"\n                \" it was not provided with any tasks to generate. This can happen if, e.g., during testing\"\n                \" you have started more processes than you had tasks to test. Currently this is not supported:\"\n                \" every task sampler must be able to generate at least one task.\"\n            )\n\n        try:\n            command, data = yield \"started\"\n\n            while command != CLOSE_COMMAND:\n                if command == STEP_COMMAND:\n                    step_result: RLStepResult = current_task.step(data)\n                    if current_task.is_done():\n                        metrics = current_task.metrics()\n                        if metrics is not None and len(metrics) != 0:\n                            if step_result.info is None:\n                                step_result = step_result.clone({\"info\": {}})\n                            step_result.info[COMPLETE_TASK_METRICS_KEY] = metrics\n\n                        if callback_sensor_suite is not None:\n                            task_callback_data = callback_sensor_suite.get_observations(\n                                env=current_task.env, task=current_task\n                            )\n                            if step_result.info is None:\n                                step_result = step_result.clone({\"info\": {}})\n                            step_result.info[COMPLETE_TASK_CALLBACK_KEY] = (\n                                task_callback_data\n                            )\n\n                        if auto_resample_when_done:\n                            current_task = task_sampler.next_task()\n                            if current_task is None:\n                                step_result = step_result.clone({\"observation\": None})\n                            else:\n                                step_result = step_result.clone(\n                                    {\"observation\": current_task.get_observations()}\n                                )\n\n                    command, data = yield step_result\n\n                elif command == NEXT_TASK_COMMAND:\n                    if data is not None:\n                        current_task = task_sampler.next_task(**data)\n                    else:\n                        current_task = task_sampler.next_task()\n                    observations = current_task.get_observations()\n\n                    command, data = yield observations\n\n                elif command == RENDER_COMMAND:\n                    command, data = yield current_task.render(*data[0], **data[1])\n\n                elif (\n                    command == OBSERVATION_SPACE_COMMAND\n                    or command == ACTION_SPACE_COMMAND\n                ):\n                    res = getattr(current_task, command)\n                    command, data = yield res\n\n                elif command == CALL_COMMAND:\n                    function_name, function_args = data\n                    if function_args is None or len(function_args) == 0:\n                        result = getattr(current_task, function_name)()\n                    else:\n                        result = getattr(current_task, function_name)(*function_args)\n                    command, data = yield result\n\n                elif command == SAMPLER_COMMAND:\n                    function_name, function_args = data\n                    if function_args is None or len(function_args) == 0:\n                        result = getattr(task_sampler, function_name)()\n                    else:\n                        result = getattr(task_sampler, function_name)(*function_args)\n\n                    command, data = yield result\n\n                elif command == ATTR_COMMAND:\n                    property_name = data\n                    result = getattr(current_task, property_name)\n\n                    command, data = yield result\n\n                elif command == SAMPLER_ATTR_COMMAND:\n                    property_name = data\n                    result = getattr(task_sampler, property_name)\n\n                    command, data = yield result\n\n                elif command == RESET_COMMAND:\n                    task_sampler.reset()\n                    current_task = task_sampler.next_task()\n\n                    if current_task is None:\n                        raise RuntimeError(\n                            \"After resetting the task sampler it seems to have\"\n                            \" no new tasks (the `task_sampler.next_task()` call\"\n                            \" returned `None` after the reset). This suggests that\"\n                            \" the task sampler's reset method was not implemented\"\n                            f\" correctly (task sampler type is {type(task_sampler)}).\"\n                        )\n\n                    command, data = yield \"done\"\n                elif command == SEED_COMMAND:\n                    task_sampler.set_seed(data)\n\n                    command, data = yield \"done\"\n                else:\n                    raise NotImplementedError()\n\n        except KeyboardInterrupt:\n            if should_log:\n                get_logger().info(\n                    \"SingleProcessVectorSampledTask {} KeyboardInterrupt\".format(\n                        worker_id\n                    )\n                )\n        except Exception as e:\n            get_logger().error(traceback.format_exc())\n            raise e\n        finally:\n            if should_log:\n                get_logger().info(\n                    \"SingleProcessVectorSampledTask {} closing.\".format(worker_id)\n                )\n            task_sampler.close()\n\n    def _create_generators(\n        self,\n        make_sampler_fn: Callable[..., TaskSampler],\n        sampler_fn_args: Sequence[Dict[str, Any]],\n        callback_sensor_suite: Optional[SensorSuite],\n    ) -> List[Generator]:\n\n        generators = []\n        for id, current_sampler_fn_args in enumerate(sampler_fn_args):\n            if self.should_log:\n                get_logger().info(\n                    f\"Starting {id}-th SingleProcessVectorSampledTasks generator with args {current_sampler_fn_args}.\"\n                )\n            generators.append(\n                self._task_sampling_loop_generator_fn(\n                    worker_id=id,\n                    make_sampler_fn=make_sampler_fn,\n                    sampler_fn_args=current_sampler_fn_args,\n                    callback_sensor_suite=callback_sensor_suite,\n                    auto_resample_when_done=self._auto_resample_when_done,\n                    should_log=self.should_log,\n                )\n            )\n\n            if next(generators[-1]) != \"started\":\n                raise RuntimeError(\"Generator failed to start.\")\n\n        return generators\n\n    def next_task(self, **kwargs):\n        \"\"\"Move to the the next Task for all TaskSamplers.\n\n        # Parameters\n\n        kwargs : key word arguments passed to the `next_task` function of the samplers.\n\n        # Returns\n\n        List of initial observations for each of the new tasks.\n        \"\"\"\n        return [\n            g.send((NEXT_TASK_COMMAND, kwargs)) for g in self._vector_task_generators\n        ]\n\n    def get_observations(self):\n        \"\"\"Get observations for all unpaused tasks.\n\n        # Returns\n\n        List of observations for each of the unpaused tasks.\n        \"\"\"\n        return self.call(\n            [\"get_observations\"] * self.num_unpaused_tasks,\n        )\n\n    def next_task_at(self, index_process: int) -> List[RLStepResult]:\n        \"\"\"Move to the the next Task from the TaskSampler in index_process\n        process in the vector.\n\n        # Parameters\n\n        index_process : Index of the generator to be reset.\n\n        # Returns\n\n        List of length one containing the observations the newly sampled task.\n        \"\"\"\n        return [\n            self._vector_task_generators[index_process].send((NEXT_TASK_COMMAND, None))\n        ]\n\n    def step_at(self, index_process: int, action: int) -> List[RLStepResult]:\n        \"\"\"Step in the index_process task in the vector.\n\n        # Parameters\n\n        index_process : Index of the process to be reset.\n        action : The action to take.\n\n        # Returns\n\n        List containing the output of step method on the task in the indexed process.\n        \"\"\"\n        return self._vector_task_generators[index_process].send((STEP_COMMAND, action))\n\n    def step(self, actions: List[List[int]]):\n        \"\"\"Perform actions in the vectorized tasks.\n\n        # Parameters\n\n        actions: List of size _num_samplers containing action to be taken in each task.\n\n        # Returns\n\n        List of outputs from the step method of tasks.\n        \"\"\"\n        return [\n            g.send((STEP_COMMAND, action))\n            for g, action in zip(self._vector_task_generators, actions)\n        ]\n\n    def reset_all(self):\n        \"\"\"Reset all task samplers to their initial state (except for the RNG\n        seed).\"\"\"\n        return [g.send((RESET_COMMAND, None)) for g in self._vector_task_generators]\n\n    def set_seeds(self, seeds: List[int]):\n        \"\"\"Sets new tasks' RNG seeds.\n\n        # Parameters\n\n        seeds: List of size _num_samplers containing new RNG seeds.\n        \"\"\"\n        return [\n            g.send((SEED_COMMAND, seed))\n            for g, seed in zip(self._vector_task_generators, seeds)\n        ]\n\n    def close(self) -> None:\n        if self._is_closed:\n            return\n\n        for g in self._vector_task_generators:\n            try:\n                try:\n                    g.send((CLOSE_COMMAND, None))\n                except StopIteration:\n                    pass\n            except KeyboardInterrupt:\n                pass\n\n        self._is_closed = True\n\n    def pause_at(self, sampler_index: int) -> None:\n        \"\"\"Pauses computation on the Task in process `index` without destroying\n        the Task. This is useful for not needing to call steps on all Tasks\n        when only some are active (for example during the last samples of\n        running eval).\n\n        # Parameters\n\n        index : which process to pause. All indexes after this\n            one will be shifted down by one.\n        \"\"\"\n        generator = self._vector_task_generators.pop(sampler_index)\n        self._paused.append((sampler_index, generator))\n\n    def resume_all(self) -> None:\n        \"\"\"Resumes any paused processes.\"\"\"\n        for index, generator in reversed(self._paused):\n            self._vector_task_generators.insert(index, generator)\n        self._paused = []\n\n    def command_at(\n        self, sampler_index: int, command: str, data: Optional[Any] = None\n    ) -> Any:\n        \"\"\"Calls a function (which is passed by name) on the selected task and\n        returns the result.\n\n        # Parameters\n\n        index : Which task to call the function on.\n        function_name : The name of the function to call on the task.\n        function_args : Optional function args.\n\n        # Returns\n\n        Result of calling the function.\n        \"\"\"\n        return self._vector_task_generators[sampler_index].send((command, data))\n\n    def command(\n        self, commands: Union[List[str], str], data_list: Optional[List]\n    ) -> List[Any]:\n        \"\"\"\"\"\"\n        if isinstance(commands, str):\n            commands = [commands] * self.num_unpaused_tasks\n\n        if data_list is None:\n            data_list = [None] * self.num_unpaused_tasks\n\n        return [\n            g.send((command, data))\n            for g, command, data in zip(\n                self._vector_task_generators, commands, data_list\n            )\n        ]\n\n    def call_at(\n        self,\n        sampler_index: int,\n        function_name: str,\n        function_args: Optional[List[Any]] = None,\n    ) -> Any:\n        \"\"\"Calls a function (which is passed by name) on the selected task and\n        returns the result.\n\n        # Parameters\n\n        index : Which task to call the function on.\n        function_name : The name of the function to call on the task.\n        function_args : Optional function args.\n\n        # Returns\n\n        Result of calling the function.\n        \"\"\"\n        return self._vector_task_generators[sampler_index].send(\n            (CALL_COMMAND, (function_name, function_args))\n        )\n\n    def call(\n        self,\n        function_names: Union[str, List[str]],\n        function_args_list: Optional[List[Any]] = None,\n    ) -> List[Any]:\n        \"\"\"Calls a list of functions (which are passed by name) on the\n        corresponding task (by index).\n\n        # Parameters\n\n        function_names : The name of the functions to call on the tasks.\n        function_args_list : List of function args for each function.\n            If provided, len(function_args_list) should be as long as  len(function_names).\n\n        # Returns\n\n        List of results of calling the functions.\n        \"\"\"\n        if isinstance(function_names, str):\n            function_names = [function_names] * self.num_unpaused_tasks\n\n        if function_args_list is None:\n            function_args_list = [None] * len(function_names)\n\n        assert len(function_names) == len(function_args_list)\n\n        return [\n            g.send((CALL_COMMAND, args))\n            for g, args in zip(\n                self._vector_task_generators, zip(function_names, function_args_list)\n            )\n        ]\n\n    def attr_at(self, sampler_index: int, attr_name: str) -> Any:\n        \"\"\"Gets the attribute (specified by name) on the selected task and\n        returns it.\n\n        # Parameters\n\n        index : Which task to call the function on.\n        attr_name : The name of the function to call on the task.\n\n        # Returns\n\n         Result of calling the function.\n        \"\"\"\n        return self._vector_task_generators[sampler_index].send(\n            (ATTR_COMMAND, attr_name)\n        )\n\n    def attr(self, attr_names: Union[List[str], str]) -> List[Any]:\n        \"\"\"Gets the attributes (specified by name) on the tasks.\n\n        # Parameters\n\n        attr_names : The name of the functions to call on the tasks.\n\n        # Returns\n\n        List of results of calling the functions.\n        \"\"\"\n        if isinstance(attr_names, str):\n            attr_names = [attr_names] * self.num_unpaused_tasks\n\n        return [\n            g.send((ATTR_COMMAND, attr_name))\n            for g, attr_name in zip(self._vector_task_generators, attr_names)\n        ]\n\n    def render(\n        self, mode: str = \"human\", *args, **kwargs\n    ) -> Union[np.ndarray, None, List[np.ndarray]]:\n        \"\"\"Render observations from all Tasks in a tiled image or a list of\n        images.\"\"\"\n\n        images = [\n            g.send((RENDER_COMMAND, (args, {\"mode\": \"rgb\", **kwargs})))\n            for g in self._vector_task_generators\n        ]\n\n        if mode == \"raw_rgb_list\":\n            return images\n\n        for index, _ in reversed(self._paused):\n            images.insert(index, np.zeros_like(images[0]))\n\n        tile = tile_images(images)\n        if mode == \"human\":\n            import cv2\n\n            cv2.imshow(\"vectask\", tile[:, :, ::-1])\n            cv2.waitKey(1)\n            return None\n        elif mode == \"rgb_array\":\n            return tile\n        else:\n            raise NotImplementedError\n\n    def __del__(self):\n        self.close()\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        self.close()\n"
  },
  {
    "path": "allenact/base_abstractions/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/base_abstractions/callbacks.py",
    "content": "import abc\nfrom typing import List, Dict, Any, Sequence, Optional\n\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig\nfrom allenact.base_abstractions.sensor import Sensor\n\ntry:\n    from typing import Literal\nexcept ImportError:\n    from typing_extensions import Literal\n\n\nclass Callback(abc.ABC):\n    def setup(\n        self,\n        name: str,\n        config: ExperimentConfig,\n        mode: Literal[\"train\", \"valid\", \"test\"],\n        **kwargs,\n    ) -> None:\n        \"\"\"Called once before training begins.\"\"\"\n\n    def on_train_log(\n        self,\n        *,\n        metrics: List[Dict[str, Any]],\n        metric_means: Dict[str, float],\n        tasks_data: List[Any],\n        step: int,\n        scalar_name_to_total_experiences_key: Dict[str, str],\n        checkpoint_file_name: str,\n        **kwargs,\n    ) -> None:\n        \"\"\"Called once train is supposed to log.\"\"\"\n\n    def on_valid_log(\n        self,\n        *,\n        metrics: Dict[str, Any],\n        metric_means: Dict[str, float],\n        tasks_data: List[Any],\n        step: int,\n        scalar_name_to_total_experiences_key: Dict[str, str],\n        checkpoint_file_name: str,\n        **kwargs,\n    ) -> None:\n        \"\"\"Called after validation ends.\"\"\"\n\n    def on_test_log(\n        self,\n        *,\n        metrics: Dict[str, Any],\n        metric_means: Dict[str, float],\n        tasks_data: List[Any],\n        step: int,\n        scalar_name_to_total_experiences_key: Dict[str, str],\n        checkpoint_file_name: str,\n        **kwargs,\n    ) -> None:\n        \"\"\"Called after test ends.\"\"\"\n\n    def after_save_project_state(self, base_dir: str) -> None:\n        \"\"\"Called after saving the project state in base_dir.\"\"\"\n\n    def callback_sensors(self) -> Optional[Sequence[Sensor]]:\n        \"\"\"Determines the data returned to the `tasks_data` parameter in the\n        above *_log functions.\"\"\"\n"
  },
  {
    "path": "allenact/base_abstractions/distributions.py",
    "content": "import abc\nfrom collections import OrderedDict\nfrom typing import Any, Union, Callable, TypeVar, Dict, Optional, cast, Protocol\n\nimport gym\nimport torch\nimport torch.nn as nn\nfrom torch.distributions.utils import lazy_property\n\nfrom allenact.algorithms.onpolicy_sync.misc import TrackingInfoType\nfrom allenact.base_abstractions.sensor import AbstractExpertActionSensor as Expert\nfrom allenact.utils import spaces_utils as su\nfrom allenact.utils.misc_utils import all_unique\n\nTeacherForcingAnnealingType = TypeVar(\"TeacherForcingAnnealingType\")\n\n\"\"\"\nModify standard PyTorch distributions so they are compatible with this code.\n\"\"\"\n\n\nclass Distr(abc.ABC):\n    @abc.abstractmethod\n    def log_prob(self, actions: Any):\n        \"\"\"Return the log probability/ies of the provided action/s.\"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def entropy(self):\n        \"\"\"Return the entropy or entropies.\"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def sample(self, sample_shape=torch.Size()):\n        \"\"\"Sample actions.\"\"\"\n        raise NotImplementedError()\n\n    def mode(self):\n        \"\"\"If available, return the action(s) with highest probability.\n\n        It will only be called if using deterministic agents.\n        \"\"\"\n        raise NotImplementedError()\n\n\nclass CategoricalDistr(torch.distributions.Categorical, Distr):\n    \"\"\"A categorical distribution extending PyTorch's Categorical.\n\n    probs or logits are assumed to be passed with step and sampler\n    dimensions as in: [step, samplers, ...]\n    \"\"\"\n\n    def mode(self):\n        return self._param.argmax(dim=-1, keepdim=False)  # match sample()'s shape\n\n    def log_prob(self, value: torch.Tensor):\n        if value.shape == self.logits.shape[:-1]:\n            return super(CategoricalDistr, self).log_prob(value=value)\n        elif value.shape == self.logits.shape[:-1] + (1,):\n            return (\n                super(CategoricalDistr, self)\n                .log_prob(value=value.squeeze(-1))\n                .unsqueeze(-1)\n            )\n        else:\n            raise NotImplementedError(\n                \"Broadcasting in categorical distribution is disabled as it often leads\"\n                f\" to unexpected results. We have that `value.shape == {value.shape}` but\"\n                f\" expected a shape of \"\n                f\" `self.logits.shape[:-1] == {self.logits.shape[:-1]}` or\"\n                f\" `self.logits.shape[:-1] + (1,) == {self.logits.shape[:-1] + (1,)}`\"\n            )\n\n    @lazy_property\n    def log_probs_tensor(self):\n        return torch.log_softmax(self.logits, dim=-1)\n\n    @lazy_property\n    def probs_tensor(self):\n        return torch.softmax(self.logits, dim=-1)\n\n\nclass ConditionalDistr(Distr):\n    \"\"\"Action distribution conditional which is conditioned on other\n    information (i.e. part of a hierarchical distribution)\n\n    # Attributes\n    action_group_name : the identifier of the group of actions (`OrderedDict`) produced by this `ConditionalDistr`\n    \"\"\"\n\n    action_group_name: str\n\n    def __init__(\n        self,\n        distr_conditioned_on_input_fn_or_instance: Union[Callable, Distr],\n        action_group_name: str,\n        *distr_conditioned_on_input_args,\n        **distr_conditioned_on_input_kwargs,\n    ):\n        \"\"\"Initialize an ConditionalDistr.\n\n        # Parameters\n        distr_conditioned_on_input_fn_or_instance : Callable to generate `ConditionalDistr` given sampled actions,\n            or given `Distr`.\n        action_group_name : the identifier of the group of actions (`OrderedDict`) produced by this `ConditionalDistr`\n        distr_conditioned_on_input_args : positional arguments for Callable `distr_conditioned_on_input_fn_or_instance`\n        distr_conditioned_on_input_kwargs : keyword arguments for Callable `distr_conditioned_on_input_fn_or_instance`\n        \"\"\"\n\n        self.distr: Optional[Distr] = None\n        self.distr_conditioned_on_input_fn: Optional[Callable] = None\n        self.distr_conditioned_on_input_args = distr_conditioned_on_input_args\n        self.distr_conditioned_on_input_kwargs = distr_conditioned_on_input_kwargs\n\n        if isinstance(distr_conditioned_on_input_fn_or_instance, Distr):\n            self.distr = distr_conditioned_on_input_fn_or_instance\n        else:\n            self.distr_conditioned_on_input_fn = (\n                distr_conditioned_on_input_fn_or_instance\n            )\n\n        self.action_group_name = action_group_name\n\n    def log_prob(self, actions):\n        return self.distr.log_prob(actions)\n\n    def entropy(self):\n        return self.distr.entropy()\n\n    def condition_on_input(self, **ready_actions):\n        if self.distr is None:\n            assert all(\n                key not in self.distr_conditioned_on_input_kwargs\n                for key in ready_actions\n            )\n            self.distr = self.distr_conditioned_on_input_fn(\n                *self.distr_conditioned_on_input_args,\n                **self.distr_conditioned_on_input_kwargs,\n                **ready_actions,\n            )\n\n    def reset(self):\n        if (self.distr is not None) and (\n            self.distr_conditioned_on_input_fn is not None\n        ):\n            self.distr = None\n\n    def sample(self, sample_shape=torch.Size()) -> OrderedDict:\n        return OrderedDict([(self.action_group_name, self.distr.sample(sample_shape))])\n\n    def mode(self) -> OrderedDict:\n        return OrderedDict([(self.action_group_name, self.distr.mode())])\n\n\nclass SequentialDistr(Distr):\n    def __init__(self, *conditional_distrs: ConditionalDistr):\n        action_group_names = [cd.action_group_name for cd in conditional_distrs]\n        assert all_unique(\n            action_group_names\n        ), f\"All conditional distribution `action_group_name`, must be unique, given names {action_group_names}\"\n        self.conditional_distrs = conditional_distrs\n\n    def sample(self, sample_shape=torch.Size()):\n        actions = OrderedDict()\n        for cd in self.conditional_distrs:\n            cd.condition_on_input(**actions)\n            actions.update(cd.sample(sample_shape=sample_shape))\n        return actions\n\n    def mode(self):\n        actions = OrderedDict()\n        for cd in self.conditional_distrs:\n            cd.condition_on_input(**actions)\n            actions.update(cd.mode())\n        return actions\n\n    def conditional_entropy(self):\n        total = 0\n        for cd in self.conditional_distrs:\n            total = total + cd.entropy()\n        return total\n\n    def entropy(self):\n        raise NotImplementedError(\n            \"Please use 'conditional_entropy' instead of 'entropy' as the `entropy_method_name` \"\n            \"parameter in your loss when using `SequentialDistr`.\"\n        )\n\n    def log_prob(\n        self, actions: Dict[str, Any], return_dict: bool = False\n    ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:\n        assert len(actions) == len(\n            self.conditional_distrs\n        ), f\"{len(self.conditional_distrs)} conditional distributions for {len(actions)} action groups\"\n\n        res: Union[int, torch.Tensor, Dict[str, torch.Tensor]] = (\n            0 if not return_dict else OrderedDict()\n        )\n\n        for cd in self.conditional_distrs:\n            cd.condition_on_input(**actions)\n            current_log_prob = cd.log_prob(actions[cd.action_group_name])\n\n            if not return_dict:\n                res = res + current_log_prob\n            else:\n                res[cd.action_group_name] = current_log_prob\n\n        return res\n\n\nclass TrackingCallback(Protocol):\n    def __call__(self, type: TrackingInfoType, info: Dict[str, Any], n: int): ...\n\n\nclass TeacherForcingDistr(Distr):\n    def __init__(\n        self,\n        distr: Distr,\n        obs: Dict[str, Any],\n        action_space: gym.spaces.Space,\n        num_active_samplers: Optional[int],\n        approx_steps: Optional[int],\n        teacher_forcing: Optional[TeacherForcingAnnealingType],\n        tracking_callback: Optional[TrackingCallback],\n        always_enforce: bool = False,\n    ):\n        self.distr = distr\n        self.is_sequential = isinstance(self.distr, SequentialDistr)\n\n        # action_space is a gym.spaces.Dict for SequentialDistr, or any gym.Space for other Distr\n        self.action_space = action_space\n        self.num_active_samplers = num_active_samplers\n        self.approx_steps = approx_steps\n        self.teacher_forcing = teacher_forcing\n        self.tracking_callback = tracking_callback\n        self.always_enforce = always_enforce\n\n        assert (\n            \"expert_action\" in obs\n        ), \"When using teacher forcing, obs must contain an `expert_action` uuid\"\n\n        obs_space = Expert.flagged_space(\n            self.action_space, use_dict_as_groups=self.is_sequential\n        )\n        self.expert = su.unflatten(obs_space, obs[\"expert_action\"])\n\n    def enforce(\n        self,\n        sample: Any,\n        action_space: gym.spaces.Space,\n        teacher: OrderedDict,\n        teacher_force_info: Optional[Dict[str, Any]],\n        action_name: Optional[str] = None,\n    ):\n        actions = su.flatten(action_space, sample)\n\n        assert (\n            len(actions.shape) == 3\n        ), f\"Got flattened actions with shape {actions.shape} (it should be [1 x `samplers` x `flatdims`])\"\n\n        if self.num_active_samplers is not None:\n            assert actions.shape[1] == self.num_active_samplers\n\n        expert_actions = su.flatten(action_space, teacher[Expert.ACTION_POLICY_LABEL])\n        assert (\n            expert_actions.shape == actions.shape\n        ), f\"expert actions shape {expert_actions.shape} doesn't match the model's {actions.shape}\"\n\n        # expert_success is 0 if the expert action could not be computed and otherwise equals 1.\n        expert_action_exists_mask = teacher[Expert.EXPERT_SUCCESS_LABEL]\n\n        if not self.always_enforce:\n            teacher_forcing_mask = (\n                torch.distributions.bernoulli.Bernoulli(\n                    torch.tensor(self.teacher_forcing(self.approx_steps))\n                )\n                .sample(expert_action_exists_mask.shape)\n                .long()\n                .to(actions.device)\n            ) * expert_action_exists_mask\n        else:\n            teacher_forcing_mask = expert_action_exists_mask\n\n        if teacher_force_info is not None:\n            teacher_force_info[\n                \"teacher_ratio/sampled{}\".format(\n                    f\"_{action_name}\" if action_name is not None else \"\"\n                )\n            ] = (teacher_forcing_mask.float().mean().item())\n\n        extended_shape = teacher_forcing_mask.shape + (1,) * (\n            len(actions.shape) - len(teacher_forcing_mask.shape)\n        )\n\n        actions = torch.where(\n            teacher_forcing_mask.byte().view(extended_shape), expert_actions, actions\n        )\n\n        return su.unflatten(action_space, actions)\n\n    def log_prob(self, actions: Any):\n        return self.distr.log_prob(actions)\n\n    def entropy(self):\n        return self.distr.entropy()\n\n    def conditional_entropy(self):\n        if hasattr(self.distr, \"conditional_entropy\"):\n            return self.distr.conditional_entropy()\n\n        raise NotImplementedError(\n            f\"`conditional_entropy` is not defined for {self.distr}.\"\n        )\n\n    def sample(self, sample_shape=torch.Size()):\n        teacher_force_info: Optional[Dict[str, Any]] = None\n        if self.approx_steps is not None:\n            teacher_force_info = {\n                \"teacher_ratio/enforced\": self.teacher_forcing(self.approx_steps),\n            }\n\n        if self.is_sequential:\n            res = OrderedDict()\n            for cd in cast(SequentialDistr, self.distr).conditional_distrs:\n                cd.condition_on_input(**res)\n                action_group_name = cd.action_group_name\n                res[action_group_name] = self.enforce(\n                    cd.sample(sample_shape)[action_group_name],\n                    cast(gym.spaces.Dict, self.action_space)[action_group_name],\n                    self.expert[action_group_name],\n                    teacher_force_info,\n                    action_group_name,\n                )\n        else:\n            res = self.enforce(\n                self.distr.sample(sample_shape),\n                self.action_space,\n                self.expert,\n                teacher_force_info,\n            )\n\n        if self.tracking_callback is not None and self.num_active_samplers is not None:\n            self.tracking_callback(\n                type=TrackingInfoType.TEACHER_FORCING,\n                info=teacher_force_info,\n                n=self.num_active_samplers,\n            )\n\n        return res\n\n\nclass AddBias(nn.Module):\n    \"\"\"Adding bias parameters to input values.\"\"\"\n\n    def __init__(self, bias: torch.FloatTensor):\n        \"\"\"Initializer.\n\n        # Parameters\n\n        bias : data to use as the initial values of the bias.\n        \"\"\"\n        super(AddBias, self).__init__()\n        self._bias = nn.Parameter(bias.unsqueeze(1), requires_grad=True)\n\n    def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:  # type: ignore\n        \"\"\"Adds the stored bias parameters to `x`.\"\"\"\n        assert x.dim() in [2, 4]\n\n        if x.dim() == 2:\n            bias = self._bias.t().view(1, -1)\n        else:\n            bias = self._bias.t().view(1, -1, 1, 1)\n\n        return x + bias  # type:ignore\n"
  },
  {
    "path": "allenact/base_abstractions/experiment_config.py",
    "content": "\"\"\"Defines the `ExperimentConfig` abstract class used as the basis of all\nexperiments.\"\"\"\n\nimport abc\nfrom typing import Dict, Any, Optional, List, Union, Sequence, Tuple, cast\n\nimport torch\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import SensorPreprocessorGraph\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import TrainingPipeline, Builder\nfrom allenact.utils.system import get_logger\nfrom allenact.utils.viz_utils import VizSuite\n\n\ndef split_processes_onto_devices(nprocesses: int, ndevices: int):\n    assert (\n        nprocesses == 0 or nprocesses >= ndevices\n    ), \"NUM_PROCESSES {} < ndevices {}\".format(nprocesses, ndevices)\n    res = [0] * ndevices\n    for it in range(nprocesses):\n        res[it % ndevices] += 1\n    return res\n\n\nclass MachineParams(object):\n    def __init__(\n        self,\n        nprocesses: Union[int, Sequence[int]],\n        devices: Union[\n            None, int, str, torch.device, Sequence[Union[int, str, torch.device]]\n        ] = None,\n        sensor_preprocessor_graph: Optional[\n            Union[SensorPreprocessorGraph, Builder[SensorPreprocessorGraph]]\n        ] = None,\n        sampler_devices: Union[\n            None, int, str, torch.device, Sequence[Union[int, str, torch.device]]\n        ] = None,\n        visualizer: Optional[Union[VizSuite, Builder[VizSuite]]] = None,\n        gpu_ids: Union[int, Sequence[int]] = None,\n        local_worker_ids: Optional[List[int]] = None,\n    ):\n        assert (\n            gpu_ids is None or devices is None\n        ), \"only one of `gpu_ids` or `devices` should be set.\"\n        if gpu_ids is not None:\n            get_logger().warning(\n                \"The `gpu_ids` parameter will be deprecated, use `devices` instead.\"\n            )\n            devices = gpu_ids\n\n        self.nprocesses = (\n            nprocesses if isinstance(nprocesses, Sequence) else (nprocesses,)\n        )\n\n        self.devices: Tuple[torch.device, ...] = self._standardize_devices(\n            devices=devices, nworkers=len(self.nprocesses)\n        )\n\n        self._sensor_preprocessor_graph_maybe_builder = sensor_preprocessor_graph\n        self.sampler_devices: Tuple[torch.device, ...] = (\n            None\n            if sampler_devices is None\n            else self._standardize_devices(\n                devices=sampler_devices, nworkers=len(self.nprocesses)\n            )\n        )\n        self._visualizer_maybe_builder = visualizer\n\n        self._sensor_preprocessor_graph_cached: Optional[SensorPreprocessorGraph] = None\n        self._visualizer_cached: Optional[VizSuite] = None\n\n        self.local_worker_ids: Optional[List[int]] = None\n        self.set_local_worker_ids(local_worker_ids)\n\n    def set_local_worker_ids(self, local_worker_ids: Optional[List[int]]):\n        self.local_worker_ids = local_worker_ids or list(range(len(self.devices)))\n\n        assert all(0 <= id < len(self.devices) for id in self.local_worker_ids), (\n            f\"Passed {len(self.local_worker_ids)} local worker ids {self.local_worker_ids}\"\n            f\" for {len(self.devices)} total devices (workers)\"\n        )\n\n    @classmethod\n    def instance_from(\n        cls, machine_params: Union[\"MachineParams\", Dict[str, Any]]\n    ) -> \"MachineParams\":\n        if isinstance(machine_params, cls):\n            return machine_params\n        assert isinstance(machine_params, Dict)\n        return cls(**machine_params)\n\n    @staticmethod\n    def _standardize_devices(\n        devices: Optional[\n            Union[int, str, torch.device, Sequence[Union[int, str, torch.device]]]\n        ],\n        nworkers: int,\n    ) -> Tuple[torch.device, ...]:\n        if devices is None or (isinstance(devices, Sequence) and len(devices) == 0):\n            devices = torch.device(\"cpu\")\n\n        if not isinstance(devices, Sequence):\n            devices = (devices,) * nworkers\n\n        assert len(devices) == nworkers, (\n            f\"The number of devices (len({devices})={len(devices)})\"\n            f\" must equal the number of workers ({nworkers})\"\n        )\n\n        devices = tuple(\n            torch.device(\"cpu\") if d == -1 else torch.device(d) for d in devices  # type: ignore\n        )\n        for d in devices:\n            if d != torch.device(\"cpu\"):\n                try:\n                    torch.cuda.get_device_capability(d)  # type: ignore\n                except Exception:\n                    raise RuntimeError(\n                        f\"It appears the cuda device {d} is not available on your system.\"\n                    )\n\n        return cast(Tuple[torch.device, ...], devices)\n\n    @property\n    def sensor_preprocessor_graph(self) -> Optional[SensorPreprocessorGraph]:\n        if self._sensor_preprocessor_graph_maybe_builder is None:\n            return None\n\n        if self._sensor_preprocessor_graph_cached is None:\n            if isinstance(self._sensor_preprocessor_graph_maybe_builder, Builder):\n                self._sensor_preprocessor_graph_cached = (\n                    self._sensor_preprocessor_graph_maybe_builder()\n                )\n            else:\n                self._sensor_preprocessor_graph_cached = (\n                    self._sensor_preprocessor_graph_maybe_builder\n                )\n\n        return self._sensor_preprocessor_graph_cached\n\n    def set_visualizer(self, viz: VizSuite):\n        if self._visualizer_cached is None:\n            self._visualizer_maybe_builder = viz\n        else:\n            get_logger().warning(\"Ignoring viz (already instantiated)\")\n\n    @property\n    def visualizer(self) -> Optional[VizSuite]:\n        if self._visualizer_maybe_builder is None:\n            return None\n\n        if self._visualizer_cached is None:\n            if isinstance(self._visualizer_maybe_builder, Builder):\n                self._visualizer_cached = self._visualizer_maybe_builder()\n            else:\n                self._visualizer_cached = self._visualizer_maybe_builder\n\n        return self._visualizer_cached\n\n\nclass FrozenClassVariables(abc.ABCMeta):\n    \"\"\"Metaclass for ExperimentConfig.\n\n    Ensures ExperimentConfig class-level attributes cannot be modified.\n    ExperimentConfig attributes can still be modified at the object\n    level.\n    \"\"\"\n\n    def __setattr__(cls, attr, value):\n        if isinstance(cls, type) and (\n            attr != \"__abstractmethods__\" and not attr.startswith(\"_abc_\")\n        ):\n            raise RuntimeError(\n                \"Cannot edit class-level attributes.\\n\"\n                \"Changing the values of class-level attributes is disabled in ExperimentConfig classes.\\n\"\n                \"This is to prevent problems that can occur otherwise when using multiprocessing.\\n\"\n                \"If you wish to change the value of a configuration, please do so for an instance of that\"\n                \" configuration.\\nTriggered by attempting to modify {}\".format(\n                    cls.__name__\n                )\n            )\n        else:\n            super().__setattr__(attr, value)\n\n\nclass ExperimentConfig(metaclass=FrozenClassVariables):\n    \"\"\"Abstract class used to define experiments.\n\n    Instead of using yaml or text files, experiments in our framework\n    are defined as a class. In particular, to define an experiment one\n    must define a new class inheriting from this class which implements\n    all of the below methods. The below methods will then be called when\n    running the experiment.\n    \"\"\"\n\n    @abc.abstractmethod\n    def tag(self) -> str:\n        \"\"\"A string describing the experiment.\"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        \"\"\"Creates the training pipeline.\n\n        # Parameters\n\n        kwargs : Extra kwargs. Currently unused.\n\n        # Returns\n\n        An instantiate `TrainingPipeline` object.\n        \"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def machine_params(\n        self, mode=\"train\", **kwargs\n    ) -> Union[MachineParams, Dict[str, Any]]:\n        \"\"\"Parameters used to specify machine information.\n\n        Machine information includes at least (1) the number of processes\n        to train with and (2) the gpu devices indices to use.\n\n        mode : Whether or not the machine parameters should be those for\n            \"train\", \"valid\", or \"test\".\n        kwargs : Extra kwargs.\n\n        # Returns\n\n        A dictionary of the form `{\"nprocesses\": ..., \"gpu_ids\": ..., ...}`.\n        Here `nprocesses` must be a non-negative integer, `gpu_ids` must\n        be a sequence of non-negative integers (if empty, then everything\n        will be run on the cpu).\n        \"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def create_model(self, **kwargs) -> nn.Module:\n        \"\"\"Create the neural model.\"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def make_sampler_fn(self, **kwargs) -> TaskSampler:\n        \"\"\"Create the TaskSampler given keyword arguments.\n\n        These `kwargs` will be generated by one of\n        `ExperimentConfig.train_task_sampler_args`,\n        `ExperimentConfig.valid_task_sampler_args`, or\n        `ExperimentConfig.test_task_sampler_args` depending on whether\n        the user has chosen to train, validate, or test.\n        \"\"\"\n        raise NotImplementedError()\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        \"\"\"Specifies the training parameters for the `process_ind`th training\n        process.\n\n        These parameters are meant be passed as keyword arguments to `ExperimentConfig.make_sampler_fn`\n        to generate a task sampler.\n\n        # Parameters\n\n        process_ind : The unique index of the training process (`0 ≤ process_ind < total_processes`).\n        total_processes : The total number of training processes.\n        devices : Gpu devices (if any) to use.\n        seeds : The seeds to use, if any.\n        deterministic_cudnn : Whether or not to use deterministic cudnn.\n\n        # Returns\n\n        The parameters for `make_sampler_fn`\n        \"\"\"\n        raise NotImplementedError()\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        \"\"\"Specifies the validation parameters for the `process_ind`th\n        validation process.\n\n        See `ExperimentConfig.train_task_sampler_args` for parameter\n        definitions.\n        \"\"\"\n        raise NotImplementedError()\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        \"\"\"Specifies the test parameters for the `process_ind`th test process.\n\n        See `ExperimentConfig.train_task_sampler_args` for parameter\n        definitions.\n        \"\"\"\n        raise NotImplementedError()\n"
  },
  {
    "path": "allenact/base_abstractions/misc.py",
    "content": "import abc\nfrom typing import (\n    Dict,\n    Any,\n    TypeVar,\n    Sequence,\n    NamedTuple,\n    Optional,\n    List,\n    Union,\n    Generic,\n)\n\nimport attr\nimport torch\n\nEnvType = TypeVar(\"EnvType\")\nDistributionType = TypeVar(\"DistributionType\")\nModelType = TypeVar(\"ModelType\")\nObservationType = Dict[str, Union[torch.Tensor, Dict[str, Any]]]\n\n\nclass RLStepResult(NamedTuple):\n    observation: Optional[Any]\n    reward: Optional[Union[float, List[float]]]\n    done: Optional[bool]\n    info: Optional[Dict[str, Any]]\n\n    def clone(self, new_info: Dict[str, Any]):\n        return RLStepResult(\n            observation=(\n                self.observation\n                if \"observation\" not in new_info\n                else new_info[\"observation\"]\n            ),\n            reward=self.reward if \"reward\" not in new_info else new_info[\"reward\"],\n            done=self.done if \"done\" not in new_info else new_info[\"done\"],\n            info=self.info if \"info\" not in new_info else new_info[\"info\"],\n        )\n\n    def merge(self, other: \"RLStepResult\"):\n        return RLStepResult(\n            observation=(\n                self.observation if other.observation is None else other.observation\n            ),\n            reward=self.reward if other.reward is None else other.reward,\n            done=self.done if other.done is None else other.done,\n            info={\n                **(self.info if self.info is not None else {}),\n                **(other.info if other is not None else {}),\n            },\n        )\n\n\nclass ActorCriticOutput(tuple, Generic[DistributionType]):\n    distributions: DistributionType\n    values: torch.FloatTensor\n    extras: Dict[str, Any]\n\n    # noinspection PyTypeChecker\n    def __new__(\n        cls,\n        distributions: DistributionType,\n        values: torch.FloatTensor,\n        extras: Dict[str, Any],\n    ):\n        self = tuple.__new__(cls, (distributions, values, extras))\n        self.distributions = distributions\n        self.values = values\n        self.extras = extras\n        return self\n\n    def __repr__(self) -> str:\n        return (\n            f\"Group(distributions={self.distributions},\"\n            f\" values={self.values},\"\n            f\" extras={self.extras})\"\n        )\n\n\nclass Memory(Dict):\n    def __init__(self, *args, **kwargs):\n        super().__init__()\n        if len(args) > 0:\n            assert len(args) == 1, (\n                \"Only one of Sequence[Tuple[str, Tuple[torch.Tensor, int]]]\"\n                \"or Dict[str, Tuple[torch.Tensor, int]] accepted as unnamed args\"\n            )\n            if isinstance(args[0], Sequence):\n                for key, tensor_dim in args[0]:\n                    assert (\n                        len(tensor_dim) == 2\n                    ), \"Only Tuple[torch.Tensor, int]] accepted as second item in Tuples\"\n                    tensor, dim = tensor_dim\n                    self.check_append(key, tensor, dim)\n            elif isinstance(args[0], Dict):\n                for key in args[0]:\n                    assert (\n                        len(args[0][key]) == 2\n                    ), \"Only Tuple[torch.Tensor, int]] accepted as values in Dict\"\n                    tensor, dim = args[0][key]\n                    self.check_append(key, tensor, dim)\n        elif len(kwargs) > 0:\n            for key in kwargs:\n                assert (\n                    len(kwargs[key]) == 2\n                ), \"Only Tuple[torch.Tensor, int]] accepted as keyword arg\"\n                tensor, dim = kwargs[key]\n                self.check_append(key, tensor, dim)\n\n    def check_append(\n        self, key: str, tensor: torch.Tensor, sampler_dim: int\n    ) -> \"Memory\":\n        \"\"\"Appends a new memory type given its identifier, its memory tensor\n        and its sampler dim.\n\n        # Parameters\n\n        key: string identifier of the memory type\n        tensor: memory tensor\n        sampler_dim: sampler dimension\n\n        # Returns\n\n        Updated Memory\n        \"\"\"\n        assert isinstance(key, str), \"key {} must be str\".format(key)\n        assert isinstance(\n            tensor, torch.Tensor\n        ), \"tensor {} must be torch.Tensor\".format(tensor)\n        assert isinstance(sampler_dim, int), \"sampler_dim {} must be int\".format(\n            sampler_dim\n        )\n\n        assert key not in self, \"Reused key {}\".format(key)\n        assert (\n            0 <= sampler_dim < len(tensor.shape)\n        ), \"Got sampler_dim {} for tensor with shape {}\".format(\n            sampler_dim, tensor.shape\n        )\n\n        self[key] = (tensor, sampler_dim)\n\n        return self\n\n    def tensor(self, key: str) -> torch.Tensor:\n        \"\"\"Returns the memory tensor for a given memory type.\n\n        # Parameters\n\n        key: string identifier of the memory type\n\n        # Returns\n\n        Memory tensor for type `key`\n        \"\"\"\n        assert key in self, \"Missing key {}\".format(key)\n        return self[key][0]\n\n    def sampler_dim(self, key: str) -> int:\n        \"\"\"Returns the sampler dimension for the given memory type.\n\n        # Parameters\n\n        key: string identifier of the memory type\n\n        # Returns\n\n        The sampler dim\n        \"\"\"\n        assert key in self, \"Missing key {}\".format(key)\n        return self[key][1]\n\n    def sampler_select(self, keep: Sequence[int]) -> \"Memory\":\n        \"\"\"Equivalent to PyTorch index_select along the `sampler_dim` of each\n        memory type.\n\n        # Parameters\n\n        keep: a list of sampler indices to keep\n\n        # Returns\n\n        Selected memory\n        \"\"\"\n        res = Memory()\n        valid = False\n        for name in self:\n            sampler_dim = self.sampler_dim(name)\n            tensor = self.tensor(name)\n            assert len(keep) == 0 or (\n                0 <= min(keep) and max(keep) < tensor.shape[sampler_dim]\n            ), \"Got min(keep)={} max(keep)={} for memory type {} with shape {}, dim {}\".format(\n                min(keep), max(keep), name, tensor.shape, sampler_dim\n            )\n            if tensor.shape[sampler_dim] > len(keep):\n                tensor = tensor.index_select(\n                    dim=sampler_dim,\n                    index=torch.as_tensor(\n                        list(keep), dtype=torch.int64, device=tensor.device\n                    ),\n                )\n                res.check_append(name, tensor, sampler_dim)\n                valid = True\n        if valid:\n            return res\n        return self\n\n    def set_tensor(self, key: str, tensor: torch.Tensor) -> \"Memory\":\n        \"\"\"Replaces tensor for given key with an updated version.\n\n        # Parameters\n\n        key: memory type identifier to update\n        tensor: updated tensor\n\n        # Returns\n\n        Updated memory\n        \"\"\"\n        assert key in self, \"Missing key {}\".format(key)\n        assert (\n            tensor.shape == self[key][0].shape\n        ), \"setting tensor with shape {} for former {}\".format(\n            tensor.shape, self[key][0].shape\n        )\n        self[key] = (tensor, self[key][1])\n\n        return self\n\n    def step_select(self, step: int) -> \"Memory\":\n        \"\"\"Equivalent to slicing with length 1 for the `step` (i.e first)\n        dimension in rollouts storage.\n\n        # Parameters\n\n        step: step to keep\n\n        # Returns\n\n        Sliced memory with a single step\n        \"\"\"\n        res = Memory()\n        for key in self:\n            tensor = self.tensor(key)\n            assert (\n                tensor.shape[0] > step\n            ), \"attempting to access step {} for memory type {} of shape {}\".format(\n                step, key, tensor.shape\n            )\n            if step != -1:\n                res.check_append(\n                    key, self.tensor(key)[step : step + 1, ...], self.sampler_dim(key)\n                )\n            else:\n                res.check_append(\n                    key, self.tensor(key)[step:, ...], self.sampler_dim(key)\n                )\n        return res\n\n    def step_squeeze(self, step: int) -> \"Memory\":\n        \"\"\"Equivalent to simple indexing for the `step` (i.e first) dimension\n        in rollouts storage.\n\n        # Parameters\n\n        step: step to keep\n\n        # Returns\n\n        Sliced memory with a single step (and squeezed step dimension)\n        \"\"\"\n        res = Memory()\n        for key in self:\n            tensor = self.tensor(key)\n            assert (\n                tensor.shape[0] > step\n            ), \"attempting to access step {} for memory type {} of shape {}\".format(\n                step, key, tensor.shape\n            )\n            res.check_append(\n                key, self.tensor(key)[step, ...], self.sampler_dim(key) - 1\n            )\n        return res\n\n    def slice(\n        self,\n        dim: int,\n        start: Optional[int] = None,\n        stop: Optional[int] = None,\n        step: int = 1,\n    ) -> \"Memory\":\n        \"\"\"Slicing for dimensions that have same extents in all memory types.\n        It also accepts negative indices.\n\n        # Parameters\n\n        dim: the dimension to slice\n        start: the index of the first item to keep if given (default 0 if None)\n        stop: the index of the first item to discard if given (default tensor size along `dim` if None)\n        step: the increment between consecutive indices (default 1)\n\n        # Returns\n\n        Sliced memory\n        \"\"\"\n        checked = False\n        total: Optional[int] = None\n\n        res = Memory()\n        for key in self:\n            tensor = self.tensor(key)\n            assert (\n                len(tensor.shape) > dim\n            ), f\"attempting to access dim {dim} for memory type {key} of shape {tensor.shape}\"\n\n            if not checked:\n                total = tensor.shape[dim]\n                checked = True\n\n            assert (\n                total == tensor.shape[dim]\n            ), f\"attempting to slice along non-uniform dimension {dim}\"\n\n            if start is not None or stop is not None or step != 1:\n                slice_tuple = (\n                    (slice(None),) * dim\n                    + (slice(start, stop, step),)\n                    + (slice(None),) * (len(tensor.shape) - (1 + dim))\n                )\n                sliced_tensor = tensor[slice_tuple]\n                res.check_append(\n                    key=key,\n                    tensor=sliced_tensor,\n                    sampler_dim=self.sampler_dim(key),\n                )\n            else:\n                res.check_append(\n                    key,\n                    tensor,\n                    self.sampler_dim(key),\n                )\n\n        return res\n\n    def to(self, device: torch.device) -> \"Memory\":\n        for key in self:\n            tensor = self.tensor(key)\n            if tensor.device != device:\n                self.set_tensor(key, tensor.to(device))\n        return self\n\n\nclass Loss(abc.ABC):\n    pass\n\n\n@attr.s(kw_only=True)\nclass LossOutput:\n    value: torch.Tensor = attr.ib()\n    info: Dict[str, Union[float, int]] = attr.ib()\n    per_epoch_info: Dict[str, Union[float, int]] = attr.ib()\n    batch_memory: Memory = attr.ib()\n    stream_memory: Memory = attr.ib()\n    bsize: int = attr.ib()\n\n\nclass GenericAbstractLoss(Loss):\n    # noinspection PyMethodOverriding\n    @abc.abstractmethod\n    def loss(  # type: ignore\n        self,\n        *,  # No positional arguments\n        model: ModelType,\n        batch: ObservationType,\n        batch_memory: Memory,\n        stream_memory: Memory,\n    ) -> LossOutput:\n        \"\"\"Computes the loss.\n\n        Loss after processing a batch of data with (part of) a model (possibly with memory).\n\n        We support two different types of memory: `batch_memory` and `stream_memory` that can be\n        used to compute losses and share computation.\n\n        ## `batch_memory`\n        During the update phase of training, the following\n        steps happen in order:\n        1. A `batch` of data is sampled from an `ExperienceStorage` (which stores data possibly collected during previous\n             rollout steps).\n        2.  This `batch` is passed to each of the specified `GenericAbstractLoss`'s and is used, along with the `model`,\n             to compute each such loss.\n        3. The losses are summed together, gradients are computed by backpropagation, and an update step is taken.\n        4. The process loops back to (1) with a new batch until.\n        Now supposed that the computation used by a `GenericAbstractLoss` (`LossA`) can be shared across multiple of the\n        `GenericAbstractLoss`'s (`LossB`, ...). For instance, `LossA` might run the visual encoder of `model` across\n        all the images contained in `batch` so that it can compute a classification loss while `LossB` would like to\n        run the same visual encoder on the same images to compute a depth-prediction loss. Without having some sort\n        of memory, you would need to rerun this visual encoder on all images multiple times, wasting computational\n        resources. This is where `batch_memory` comes in: `LossA` is can store the visual representations it computed\n        in `batch_memory` and then `LossB` can access them.  Note that the `batch_memory` will be reinitialized after\n        each new `batch` is sampled.\n\n        ## `stream_memory`\n        As described above, `batch_memory` treats each batch as its own independent collection of data. But what if\n        your `ExperienceStorage` samples its batches in a streaming fashion? E.g. your `ExperienceStorage`\n        might be a fixed collection of expert trajectories for use with imitation learning. In this case you can't\n        simply treat each batch independently: you might want to save information from one batch to use in another.\n        The simplest case of this would be if your agent `model` uses an RNN and produces a recurrent hidden state.\n        In this case, the hidden state from the end of one batch should be used at the start of computations for the\n        next batch. To allow for this, you can use the `stream_memory`. `stream_memory` is not cleared across\n        batches but, **importantly**, `stream_memory` is detached from the computation graph after each backpropagation\n        step so that the size of the computation graph does not grow unboundedly.\n\n        # Parameters\n\n        model: model to run on data batch (both assumed to be on the same device)\n        batch: data to use as input for model (already on the same device as model)\n        batch_memory: See above.\n        stream_memory: See above.\n\n        # Returns\n\n        A tuple with:\n\n        current_loss: total loss\n        current_info: additional information about the current loss\n        batch_memory: `batch_memory` memory after processing current data batch, see above.\n        stream_memory: `stream_memory` memory after processing current data batch, see above.\n        bsize: batch size\n        \"\"\"\n        raise NotImplementedError()\n"
  },
  {
    "path": "allenact/base_abstractions/preprocessor.py",
    "content": "import abc\nfrom typing import List, Any, Dict\nfrom typing import Sequence\nfrom typing import Union\n\nimport gym\nimport networkx as nx\nimport torch\nfrom gym.spaces import Dict as SpaceDict\n\nfrom allenact.utils.experiment_utils import Builder\n\n\nclass Preprocessor(abc.ABC):\n    \"\"\"Represents a preprocessor that transforms data from a sensor or another\n    preprocessor to the input of agents or other preprocessors. The user of\n    this class needs to implement the process method and the user is also\n    required to set the below attributes:\n\n    # Attributes:\n        input_uuids : List of input universally unique ids.\n        uuid : Universally unique id.\n        observation_space : ``gym.Space`` object corresponding to processed observation spaces.\n    \"\"\"\n\n    input_uuids: List[str]\n    uuid: str\n    observation_space: gym.Space\n\n    def __init__(\n        self,\n        input_uuids: List[str],\n        output_uuid: str,\n        observation_space: gym.Space,\n        **kwargs: Any\n    ) -> None:\n        self.uuid = output_uuid\n        self.input_uuids = input_uuids\n        self.observation_space = observation_space\n\n    @abc.abstractmethod\n    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:\n        \"\"\"Returns processed observations from sensors or other preprocessors.\n\n        # Parameters\n\n        obs : Dict with available observations and processed observations.\n\n        # Returns\n\n        Processed observation.\n        \"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def to(self, device: torch.device) -> \"Preprocessor\":\n        raise NotImplementedError()\n\n\nclass SensorPreprocessorGraph:\n    \"\"\"Represents a graph of preprocessors, with each preprocessor being\n    identified through a universally unique id.\n\n    Allows for the construction of observations that are a function of\n    sensor readings. For instance, perhaps rather than giving your agent\n    a raw RGB image, you'd rather first pass that image through a pre-trained\n    convolutional network and only give your agent the resulting features\n    (see e.g. the `ResNetPreprocessor` class).\n\n    # Attributes\n\n    preprocessors : List containing preprocessors with required input uuids, output uuid of each\n        sensor must be unique.\n    observation_spaces: The observation spaces of the values returned when calling `get_observations`.\n        By default (see the `additionally_exposed_uuids` parameter to to change this default) the observations\n        returned by the `SensorPreprocessorGraph` **include only the sink nodes** of the graph (i.e.\n        those that are not used by any other preprocessor).\n        Thus if one of the input preprocessors takes as input the `'YOUR_SENSOR_UUID'` sensor, then\n        `'YOUR_SENSOR_UUID'` will not be returned when calling `get_observations`.\n    device: The `torch.device` upon which the preprocessors are run.\n    \"\"\"\n\n    preprocessors: Dict[str, Preprocessor]\n    observation_spaces: SpaceDict\n    device: torch.device\n\n    def __init__(\n        self,\n        source_observation_spaces: SpaceDict,\n        preprocessors: Sequence[Union[Preprocessor, Builder[Preprocessor]]],\n        additional_output_uuids: Sequence[str] = tuple(),\n    ) -> None:\n        \"\"\"Initializer.\n\n        # Parameters\n\n        source_observation_spaces : The observation spaces of all sensors before preprocessing.\n            This generally should be the output of `SensorSuite.observation_spaces`.\n        preprocessors : The preprocessors that will be included in the graph.\n        additional_output_uuids: As described in the documentation for this class, the observations\n            returned when calling `get_observations` only include, by default, those observations\n            that are not processed by any preprocessor. If you'd like to include observations that\n            would otherwise not be included, the uuids of these sensors should be included as\n            a sequence of strings here.\n        \"\"\"\n        self.device: torch.device = torch.device(\"cpu\")\n\n        obs_spaces: Dict[str, gym.Space] = {\n            k: source_observation_spaces[k] for k in source_observation_spaces\n        }\n\n        self.preprocessors: Dict[str, Preprocessor] = {}\n        for preprocessor in preprocessors:\n            if isinstance(preprocessor, Builder):\n                preprocessor = preprocessor()\n\n            assert (\n                preprocessor.uuid not in self.preprocessors\n            ), \"'{}' is duplicated preprocessor uuid\".format(preprocessor.uuid)\n\n            self.preprocessors[preprocessor.uuid] = preprocessor\n            obs_spaces[preprocessor.uuid] = preprocessor.observation_space\n\n        g = nx.DiGraph()\n        for k in obs_spaces:\n            g.add_node(k)\n        for k in self.preprocessors:\n            for j in self.preprocessors[k].input_uuids:\n                g.add_edge(j, k)\n\n        assert nx.is_directed_acyclic_graph(\n            g\n        ), \"preprocessors do not form a direct acyclic graph\"\n\n        # noinspection PyCallingNonCallable\n        self.observation_spaces = SpaceDict(\n            spaces={\n                uuid: obs_spaces[uuid]\n                for uuid in obs_spaces\n                if uuid in additional_output_uuids or g.out_degree(uuid) == 0\n            }\n        )\n\n        # ensure dependencies are precomputed\n        self.compute_order = [n for n in nx.dfs_preorder_nodes(g)]\n\n    def get(self, uuid: str) -> Preprocessor:\n        \"\"\"Return preprocessor with the given `uuid`.\n\n        # Parameters\n\n        uuid : The unique id of the preprocessor.\n\n        # Returns\n\n        The preprocessor with unique id `uuid`.\n        \"\"\"\n        return self.preprocessors[uuid]\n\n    def to(self, device: torch.device) -> \"SensorPreprocessorGraph\":\n        for k, v in self.preprocessors.items():\n            self.preprocessors[k] = v.to(device)\n        self.device = device\n        return self\n\n    def get_observations(\n        self, obs: Dict[str, Any], *args: Any, **kwargs: Any\n    ) -> Dict[str, Any]:\n        \"\"\"Get processed observations.\n\n        # Returns\n\n        Collect observations processed from all sensors and return them packaged inside a Dict.\n        \"\"\"\n\n        for uuid in self.compute_order:\n            if uuid not in obs:\n                obs[uuid] = self.preprocessors[uuid].process(obs)\n\n        return {uuid: obs[uuid] for uuid in self.observation_spaces}\n\n\nclass PreprocessorGraph(SensorPreprocessorGraph):\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        raise DeprecationWarning(\n            \"`PreprocessorGraph` has been deprecated, use `SensorPreprocessorGraph` instead.\"\n        )\n\n\nclass ObservationSet:\n    def __init__(self, *args, **kwargs) -> None:\n        raise DeprecationWarning(\n            \"`ObservationSet` has been deprecated. Use `SensorPreprocessorGraph` instead.\"\n        )\n"
  },
  {
    "path": "allenact/base_abstractions/sensor.py",
    "content": "# Original work Copyright (c) Facebook, Inc. and its affiliates.\n# Modified work Copyright (c) Allen Institute for AI\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\nfrom collections import OrderedDict\nfrom typing import (\n    Generic,\n    Dict,\n    Any,\n    Optional,\n    TYPE_CHECKING,\n    TypeVar,\n    Sequence,\n    Union,\n    Tuple,\n    cast,\n)\nimport abc\n\nimport gym\nimport gym.spaces as gyms\nimport numpy as np\nfrom torch.distributions.utils import lazy_property\n\nfrom allenact.base_abstractions.misc import EnvType\nfrom allenact.utils import spaces_utils as su\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom allenact.utils.system import get_logger\n\nif TYPE_CHECKING:\n    from allenact.base_abstractions.task import SubTaskType\nelse:\n    SubTaskType = TypeVar(\"SubTaskType\", bound=\"Task\")\n\nSpaceDict = gyms.Dict\n\n\nclass Sensor(Generic[EnvType, SubTaskType]):\n    \"\"\"Represents a sensor that provides data from the environment to agent.\n    The user of this class needs to implement the get_observation method and\n    the user is also required to set the below attributes:\n\n    # Attributes\n\n    uuid : universally unique id.\n    observation_space : ``gym.Space`` object corresponding to observation of\n        sensor.\n    \"\"\"\n\n    uuid: str\n    observation_space: gym.Space\n\n    def __init__(self, uuid: str, observation_space: gym.Space, **kwargs: Any) -> None:\n        self.uuid = uuid\n        self.observation_space = observation_space\n\n    def get_observation(\n        self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any\n    ) -> Any:\n        \"\"\"Returns observations from the environment (or task).\n\n        # Parameters\n\n        env : The environment the sensor is used upon.\n        task : (Optionally) a Task from which the sensor should get data.\n\n        # Returns\n\n        Current observation for Sensor.\n        \"\"\"\n        raise NotImplementedError()\n\n\nclass SensorSuite(Generic[EnvType]):\n    \"\"\"Represents a set of sensors, with each sensor being identified through a\n    unique id.\n\n    # Attributes\n\n    sensors: list containing sensors for the environment, uuid of each\n        sensor must be unique.\n    \"\"\"\n\n    sensors: Dict[str, Sensor[EnvType, Any]]\n    observation_spaces: gyms.Dict\n\n    def __init__(self, sensors: Sequence[Sensor]) -> None:\n        \"\"\"Initializer.\n\n        # Parameters\n\n        param sensors: the sensors that will be included in the suite.\n        \"\"\"\n        self.sensors = OrderedDict()\n        spaces: OrderedDict[str, gym.Space] = OrderedDict()\n        for sensor in sensors:\n            assert (\n                sensor.uuid not in self.sensors\n            ), \"'{}' is duplicated sensor uuid\".format(sensor.uuid)\n            self.sensors[sensor.uuid] = sensor\n            spaces[sensor.uuid] = sensor.observation_space\n        self.observation_spaces = SpaceDict(spaces=spaces)\n\n    def get(self, uuid: str) -> Sensor:\n        \"\"\"Return sensor with the given `uuid`.\n\n        # Parameters\n\n        uuid : The unique id of the sensor\n\n        # Returns\n\n        The sensor with unique id `uuid`.\n        \"\"\"\n        return self.sensors[uuid]\n\n    def get_observations(\n        self, env: EnvType, task: Optional[SubTaskType], **kwargs: Any\n    ) -> Dict[str, Any]:\n        \"\"\"Get all observations corresponding to the sensors in the suite.\n\n        # Parameters\n\n        env : The environment from which to get the observation.\n        task : (Optionally) the task from which to get the observation.\n\n        # Returns\n\n        Data from all sensors packaged inside a Dict.\n        \"\"\"\n        return {\n            uuid: sensor.get_observation(env=env, task=task, **kwargs)  # type: ignore\n            for uuid, sensor in self.sensors.items()\n        }\n\n\nclass AbstractExpertSensor(Sensor[EnvType, SubTaskType], abc.ABC):\n    \"\"\"Base class for sensors that obtain the expert action for a given task\n    (if available).\"\"\"\n\n    ACTION_POLICY_LABEL: str = \"action_or_policy\"\n    EXPERT_SUCCESS_LABEL: str = \"expert_success\"\n    _NO_GROUPS_LABEL: str = \"__dummy_expert_group__\"\n\n    def __init__(\n        self,\n        action_space: Optional[Union[gym.Space, int]] = None,\n        uuid: str = \"expert_sensor_type_uuid\",\n        expert_args: Optional[Dict[str, Any]] = None,\n        nactions: Optional[int] = None,\n        use_dict_as_groups: bool = True,\n        **kwargs: Any,\n    ) -> None:\n        \"\"\"Initialize an `ExpertSensor`.\n\n        # Parameters\n        action_space : The action space of the agent. This is necessary in order for this sensor\n            to know what its output observation space is.\n        uuid : A string specifying the unique ID of this sensor.\n        expert_args : This sensor obtains an expert action from the task by calling the `query_expert`\n            method of the task. `expert_args` are any keyword arguments that should be passed to the\n            `query_expert` method when called.\n        nactions : [DEPRECATED] The number of actions available to the agent, corresponds to an `action_space`\n            of `gym.spaces.Discrete(nactions)`.\n        use_dict_as_groups : Whether to use the top-level action_space of type `gym.spaces.Dict` as action groups.\n        \"\"\"\n        if isinstance(action_space, int):\n            action_space = gym.spaces.Discrete(action_space)\n        elif action_space is None:\n            assert (\n                nactions is not None\n            ), \"One of `action_space` or `nactions` must be not `None`.\"\n            get_logger().warning(\n                \"The `nactions` parameter to `AbstractExpertSensor` is deprecated and will be removed, please use\"\n                \" the `action_space` parameter instead.\"\n            )\n            action_space = gym.spaces.Discrete(nactions)\n\n        self.action_space = action_space\n\n        self.use_groups = (\n            isinstance(action_space, gym.spaces.Dict) and use_dict_as_groups\n        )\n\n        self.group_spaces = (\n            self.action_space\n            if self.use_groups\n            else OrderedDict(\n                [\n                    (\n                        self._NO_GROUPS_LABEL,\n                        self.action_space,\n                    )\n                ]\n            )\n        )\n\n        self.expert_args: Dict[str, Any] = expert_args or {}\n\n        assert (\n            \"expert_sensor_group_name\" not in self.expert_args\n        ), \"`expert_sensor_group_name` is reserved for `AbstractExpertSensor`\"\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    @classmethod\n    @abc.abstractmethod\n    def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict:\n        \"\"\"gym space resulting from wrapping the given action space (or a\n        derived space, as in `AbstractExpertPolicySensor`) together with a\n        binary action space corresponding to an expert success flag, in a Dict\n        space.\n\n        # Parameters\n        group_space : The source action space to be (optionally used to derive a policy space,) flagged and wrapped\n        \"\"\"\n        raise NotImplementedError\n\n    @classmethod\n    def flagged_space(\n        cls, action_space: gym.spaces.Space, use_dict_as_groups: bool = True\n    ) -> gym.spaces.Dict:\n        \"\"\"gym space resulting from wrapping the given action space (or every\n        highest-level entry in a Dict action space), together with binary\n        action space corresponding to an expert success flag, in a Dict space.\n\n        # Parameters\n        action_space : The agent's action space (to be flagged and wrapped)\n        use_dict_as_groups : Flag enabling every highest-level entry in a Dict action space to be independently flagged.\n        \"\"\"\n        use_groups = isinstance(action_space, gym.spaces.Dict) and use_dict_as_groups\n\n        if not use_groups:\n            return cls.flagged_group_space(action_space)\n        else:\n            return gym.spaces.Dict(\n                [\n                    (\n                        group_space,\n                        cls.flagged_group_space(action_space[group_space]),\n                    )\n                    for group_space in cast(gym.spaces.Dict, action_space)\n                ]\n            )\n\n    def _get_observation_space(self) -> gym.spaces.Dict:\n        \"\"\"The observation space of the expert sensor.\n\n        For the most basic discrete agent's ExpertActionSensor, it will\n        equal `gym.spaces.Dict([ (self.ACTION_POLICY_LABEL,\n        self.action_space), (self.EXPERT_SUCCESS_LABEL,\n        gym.spaces.Discrete(2))])`, where the first entry hosts the\n        expert action index and the second equals 0 if and only if the\n        expert failed to generate a true expert action.\n        \"\"\"\n        return self.flagged_space(self.action_space, use_dict_as_groups=self.use_groups)\n\n    @lazy_property\n    def _zeroed_observation(self) -> Union[OrderedDict, Tuple]:\n        # AllenAct-style flattened space (to easily generate an all-zeroes action as an array)\n        flat_space = su.flatten_space(self.observation_space)\n        # torch point to correctly unflatten `Discrete` for zeroed output\n        flat_zeroed = su.torch_point(flat_space, np.zeros_like(flat_space.sample()))\n        # unflatten zeroed output and convert to numpy\n        return su.numpy_point(\n            self.observation_space, su.unflatten(self.observation_space, flat_zeroed)\n        )\n\n    def flatten_output(self, unflattened):\n        return (\n            su.flatten(\n                self.observation_space,\n                su.torch_point(self.observation_space, unflattened),\n            )\n            .cpu()\n            .numpy()\n        )\n\n    @abc.abstractmethod\n    def query_expert(\n        self,\n        task: SubTaskType,\n        expert_sensor_group_name: Optional[str],\n    ) -> Tuple[Any, bool]:\n        \"\"\"Query the expert for the given task (and optional group name).\n\n        # Returns\n\n         A tuple (x, y) where x is the expert action or policy and y is False \\\n            if the expert could not determine the optimal action (otherwise True). Here y \\\n            is used for masking. Even when y is False, x should still lie in the space of \\\n            possible values (e.g. if x is the expert policy then x should be the correct length, \\\n            sum to 1, and have non-negative entries).\n        \"\"\"\n        raise NotImplementedError\n\n    def get_observation(\n        self, env: EnvType, task: SubTaskType, *args: Any, **kwargs: Any\n    ) -> Union[OrderedDict, Tuple]:\n        # If the task is completed, we needn't (perhaps can't) find the expert\n        # action from the (current) terminal state.\n        if task.is_done():\n            return self.flatten_output(self._zeroed_observation)\n\n        actions_or_policies = OrderedDict()\n        for group_name in self.group_spaces:\n            action_or_policy, expert_was_successful = self.query_expert(\n                task=task, expert_sensor_group_name=group_name\n            )\n\n            actions_or_policies[group_name] = OrderedDict(\n                [\n                    (self.ACTION_POLICY_LABEL, action_or_policy),\n                    (self.EXPERT_SUCCESS_LABEL, expert_was_successful),\n                ]\n            )\n\n        return self.flatten_output(\n            actions_or_policies\n            if self.use_groups\n            else actions_or_policies[self._NO_GROUPS_LABEL]\n        )\n\n\nclass AbstractExpertActionSensor(AbstractExpertSensor, abc.ABC):\n    def __init__(\n        self,\n        action_space: Optional[Union[gym.Space, int]] = None,\n        uuid: str = \"expert_action\",\n        expert_args: Optional[Dict[str, Any]] = None,\n        nactions: Optional[int] = None,\n        use_dict_as_groups: bool = True,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    @classmethod\n    def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict:\n        \"\"\"gym space resulting from wrapping the given action space, together\n        with a binary action space corresponding to an expert success flag, in\n        a Dict space.\n\n        # Parameters\n        group_space : The action space to be flagged and wrapped\n        \"\"\"\n        return gym.spaces.Dict(\n            [\n                (cls.ACTION_POLICY_LABEL, group_space),\n                (cls.EXPERT_SUCCESS_LABEL, gym.spaces.Discrete(2)),\n            ]\n        )\n\n\nclass ExpertActionSensor(AbstractExpertActionSensor):\n    \"\"\"(Deprecated) A sensor that obtains the expert action from a given task\n    (if available).\"\"\"\n\n    def query_expert(\n        self, task: SubTaskType, expert_sensor_group_name: Optional[str]\n    ) -> Tuple[Any, bool]:\n        return task.query_expert(\n            **self.expert_args, expert_sensor_group_name=expert_sensor_group_name\n        )\n\n\nclass AbstractExpertPolicySensor(AbstractExpertSensor, abc.ABC):\n    def __init__(\n        self,\n        action_space: Optional[Union[gym.Space, int]] = None,\n        uuid: str = \"expert_policy\",\n        expert_args: Optional[Dict[str, Any]] = None,\n        nactions: Optional[int] = None,\n        use_dict_as_groups: bool = True,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    @classmethod\n    def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict:\n        \"\"\"gym space resulting from wrapping the policy space corresponding to\n        `allenact.utils.spaces_utils.policy_space(group_space)` together with a\n        binary action space corresponding to an expert success flag, in a Dict\n        space.\n\n        # Parameters\n        group_space : The source action space to be used to derive a policy space, flagged and wrapped\n        \"\"\"\n        return gym.spaces.Dict(\n            [\n                (cls.ACTION_POLICY_LABEL, su.policy_space(group_space)),\n                (cls.EXPERT_SUCCESS_LABEL, gym.spaces.Discrete(2)),\n            ]\n        )\n\n\nclass ExpertPolicySensor(AbstractExpertPolicySensor):\n    \"\"\"(Deprecated) A sensor that obtains the expert policy from a given task\n    (if available).\"\"\"\n\n    def query_expert(\n        self, task: SubTaskType, expert_sensor_group_name: Optional[str]\n    ) -> Tuple[Any, bool]:\n        return task.query_expert(\n            **self.expert_args, expert_sensor_group_name=expert_sensor_group_name\n        )\n"
  },
  {
    "path": "allenact/base_abstractions/task.py",
    "content": "# Original work Copyright (c) Facebook, Inc. and its affiliates.\n# Modified work Copyright (c) Allen Institute for AI\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n\n\"\"\"Defines the primary data structures by which agents interact with their\nenvironment.\"\"\"\n\nimport abc\nfrom typing import Any, Dict, Generic, List, Optional, Sequence, Tuple, TypeVar, Union\n\nimport gym\nimport numpy as np\nfrom gym.spaces.dict import Dict as SpaceDict\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import Sensor, SensorSuite\nfrom allenact.utils.misc_utils import deprecated\n\nEnvType = TypeVar(\"EnvType\")\n\n\nclass Task(Generic[EnvType]):\n    \"\"\"An abstract class defining a, goal directed, 'task.' Agents interact\n    with their environment through a task by taking a `step` after which they\n    receive new observations, rewards, and (potentially) other useful\n    information.\n\n    A Task is a helpful generalization of the OpenAI gym's `Env` class\n    and allows for multiple tasks (e.g. point and object navigation) to\n    be defined on a single environment (e.g. AI2-THOR).\n\n    # Attributes\n\n    env : The environment.\n    sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer.\n    task_info : Dictionary of (k, v) pairs defining task goals and other task information.\n    max_steps : The maximum number of steps an agent can take an in the task before it is considered failed.\n    observation_space: The observation space returned on each step from the sensors.\n    \"\"\"\n\n    env: EnvType\n    sensor_suite: SensorSuite[EnvType]\n    task_info: Dict[str, Any]\n    max_steps: int\n    observation_space: SpaceDict\n\n    def __init__(\n        self,\n        env: EnvType,\n        sensors: Union[SensorSuite, Sequence[Sensor]],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        **kwargs\n    ) -> None:\n        self.env = env\n        self.sensor_suite = (\n            SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors\n        )\n        self.task_info = task_info\n        self.max_steps = max_steps\n        self.observation_space = self.sensor_suite.observation_spaces\n        self._num_steps_taken = 0\n        self._total_reward: Union[float, List[float]] = 0.0\n\n    def get_observations(self, **kwargs) -> Any:\n        return self.sensor_suite.get_observations(env=self.env, task=self, **kwargs)\n\n    @property\n    @abc.abstractmethod\n    def action_space(self) -> gym.Space:\n        \"\"\"Task's action space.\n\n        # Returns\n\n        The action space for the task.\n        \"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        \"\"\"Render the current task state.\n\n        Rendered task state can come in any supported modes.\n\n        # Parameters\n\n        mode : The mode in which to render. For example, you might have a 'rgb'\n            mode that renders the agent's egocentric viewpoint or a 'dev' mode\n            returning additional information.\n        args : Extra args.\n        kwargs : Extra kwargs.\n\n        # Returns\n\n        An numpy array corresponding to the requested render.\n        \"\"\"\n        raise NotImplementedError()\n\n    def _increment_num_steps_taken(self) -> None:\n        \"\"\"Helper function that increases the number of steps counter by\n        one.\"\"\"\n        self._num_steps_taken += 1\n\n    def step(self, action: Any) -> RLStepResult:\n        \"\"\"Take an action in the environment (one per agent).\n\n        Takes the action in the environment and returns\n        observations (& rewards and any additional information)\n        corresponding to the agent's new state. Note that this function\n        should not be overwritten without care (instead\n        implement the `_step` function).\n\n        # Parameters\n\n        action : The action to take, should be of the same form as specified by `self.action_space`.\n\n        # Returns\n\n        A `RLStepResult` object encoding the new observations, reward, and\n        (possibly) additional information.\n        \"\"\"\n        assert not self.is_done()\n        sr = self._step(action=action)\n\n        # If reward is Sequence, it's assumed to follow the same order imposed by spaces' flatten operation\n        if isinstance(sr.reward, Sequence):\n            if isinstance(self._total_reward, Sequence):\n                for it, rew in enumerate(sr.reward):\n                    self._total_reward[it] += float(rew)\n            else:\n                self._total_reward = [float(r) for r in sr.reward]\n        else:\n            self._total_reward += float(sr.reward)  # type:ignore\n\n        self._increment_num_steps_taken()\n        # TODO: We need a better solution to the below. It's not a good idea\n        #   to pre-increment the step counter as this might play poorly with `_step`\n        #   if it relies on some aspect of the current number of steps taken.\n        return sr.clone({\"done\": sr.done or self.is_done()})\n\n    @abc.abstractmethod\n    def _step(self, action: Any) -> RLStepResult:\n        \"\"\"Helper function called by `step` to take a step by each agent in the\n        environment.\n\n        Takes the action in the environment and returns\n        observations (& rewards and any additional information)\n        corresponding to the agent's new state. This function is called\n        by the (public) `step` function and is what should be implemented\n        when defining your new task. Having separate `_step` be separate from `step`\n        is useful as this allows the `step` method to perform bookkeeping (e.g.\n        keeping track of the number of steps), without having `_step` as a separate\n        method, everyone implementing `step` would need to copy this bookkeeping code.\n\n        # Parameters\n\n        action : The action to take.\n\n        # Returns\n\n        A `RLStepResult` object encoding the new observations, reward, and\n        (possibly) additional information.\n        \"\"\"\n        raise NotImplementedError()\n\n    def reached_max_steps(self) -> bool:\n        \"\"\"Has the agent reached the maximum number of steps.\"\"\"\n        return self.num_steps_taken() >= self.max_steps\n\n    @abc.abstractmethod\n    def reached_terminal_state(self) -> bool:\n        \"\"\"Has the agent reached a terminal state (excluding reaching the\n        maximum number of steps).\"\"\"\n        raise NotImplementedError()\n\n    def is_done(self) -> bool:\n        \"\"\"Did the agent reach a terminal state or performed the maximum number\n        of steps.\"\"\"\n        return self.reached_terminal_state() or self.reached_max_steps()\n\n    def num_steps_taken(self) -> int:\n        \"\"\"Number of steps taken by the agent in the task so far.\"\"\"\n        return self._num_steps_taken\n\n    @deprecated\n    def action_names(self) -> Tuple[str, ...]:\n        \"\"\"Action names of the Task instance.\n\n        This function has been deprecated and will be removed.\n\n        This function is a hold-over from when the `Task`\n        abstraction only considered `gym.space.Discrete` action spaces (in which\n        case it makes sense name these actions).\n\n        This implementation of `action_names` requires that a `class_action_names`\n        method has been defined. This method should be overwritten if `class_action_names`\n        requires key word arguments to determine the number of actions.\n        \"\"\"\n        if hasattr(self, \"class_action_names\"):\n            return self.class_action_names()\n        else:\n            raise NotImplementedError(\n                \"`action_names` requires that a function `class_action_names` be defined.\"\n                \" This said, please do not use this functionality as it has been deprecated and will be removed.\"\n                \" If you would like an `action_names` function for your task, feel free to define one\"\n                \" with the knowledge that the AllenAct internals will ignore it.\"\n            )\n\n    @abc.abstractmethod\n    def close(self) -> None:\n        \"\"\"Closes the environment and any other files opened by the Task (if\n        applicable).\"\"\"\n        raise NotImplementedError()\n\n    def metrics(self) -> Dict[str, Any]:\n        \"\"\"Computes metrics related to the task after the task's completion.\n\n        By default this function is automatically called during training\n        and the reported metrics logged to tensorboard.\n\n        # Returns\n\n        A dictionary where every key is a string (the metric's\n            name) and the value is the value of the metric.\n        \"\"\"\n        return {\n            \"ep_length\": self.num_steps_taken(),\n            \"reward\": self.cumulative_reward,\n            \"task_info\": self.task_info,\n        }\n\n    def query_expert(self, **kwargs) -> Tuple[Any, bool]:\n        \"\"\"(Deprecated) Query the expert policy for this task.\n\n        The new correct way to include this functionality is through the definition of a class\n        derived from `allenact.base_abstractions.sensor.AbstractExpertActionSensor` or\n        `allenact.base_abstractions.sensor.AbstractExpertPolicySensor`, where a\n        `query_expert` method must be defined.\n\n        # Returns\n\n        A tuple (x, y) where x is the expert action (or policy) and y is False \\\n            if the expert could not determine the optimal action (otherwise True). Here y \\\n            is used for masking. Even when y is False, x should still lie in the space of \\\n            possible values (e.g. if x is the expert policy then x should be the correct length, \\\n            sum to 1, and have non-negative entries).\n        \"\"\"\n        return None, False\n\n    @property\n    def cumulative_reward(self) -> float:\n        \"\"\"Mean per-agent total cumulative in the task so far.\n\n        # Returns\n\n        Mean per-agent cumulative reward as a float.\n        \"\"\"\n        return (\n            np.mean(self._total_reward).item()\n            if isinstance(self._total_reward, Sequence)\n            else self._total_reward\n        )\n\n\nSubTaskType = TypeVar(\"SubTaskType\", bound=Task)\n\n\nclass TaskSampler(abc.ABC):\n    \"\"\"Abstract class defining a how new tasks are sampled.\"\"\"\n\n    @property\n    @abc.abstractmethod\n    def length(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled. Can be\n            float('inf').\n        \"\"\"\n        raise NotImplementedError()\n\n    @property\n    @abc.abstractmethod\n    def last_sampled_task(self) -> Optional[Task]:\n        \"\"\"Get the most recently sampled Task.\n\n        # Returns\n\n        The most recently sampled Task.\n        \"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def next_task(self, force_advance_scene: bool = False) -> Optional[Task]:\n        \"\"\"Get the next task in the sampler's stream.\n\n        # Parameters\n\n        force_advance_scene : Used to (if applicable) force the task sampler to\n            use a new scene for the next task. This is useful if, during training,\n            you would like to train with one scene for some number of steps and\n            then explicitly control when you begin training with the next scene.\n\n        # Returns\n\n        The next Task in the sampler's stream if a next task exists. Otherwise None.\n        \"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def close(self) -> None:\n        \"\"\"Closes any open environments or streams.\n\n        Should be run when done sampling.\n        \"\"\"\n        raise NotImplementedError()\n\n    @property\n    @abc.abstractmethod\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"Checks if all observation spaces of tasks that can be sampled are\n        equal.\n\n        This will almost always simply return `True`. A case in which it should\n        return `False` includes, for example, a setting where you design\n        a `TaskSampler` that can generate different types of tasks, i.e.\n        point navigation tasks and object navigation tasks. In this case, these\n        different tasks may output different types of observations.\n\n        # Returns\n\n        True if all Tasks that can be sampled by this sampler have the\n            same observation space. Otherwise False.\n        \"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def reset(self) -> None:\n        \"\"\"Resets task sampler to its original state (except for any seed).\"\"\"\n        raise NotImplementedError()\n\n    @abc.abstractmethod\n    def set_seed(self, seed: int) -> None:\n        \"\"\"Sets new RNG seed.\n\n        # Parameters\n\n        seed : New seed.\n        \"\"\"\n        raise NotImplementedError()\n"
  },
  {
    "path": "allenact/embodiedai/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/embodiedai/aux_losses/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/embodiedai/aux_losses/losses.py",
    "content": "# Original work Copyright (c) Facebook, Inc. and its affiliates.\n# Modified work Copyright (c) Allen Institute for AI\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n\"\"\"Defining the auxiliary loss for actor critic type models.\n\nSeveral of the losses defined in this file are modified versions of those found in\n    https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/\n\"\"\"\n\n\nimport abc\nfrom typing import Dict, cast, Tuple, Sequence\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n    ObservationType,\n)\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput\n\n\ndef _bernoulli_subsample_mask_like(masks, p=0.1):\n    return (torch.rand_like(masks) <= p).float()\n\n\nclass MultiAuxTaskNegEntropyLoss(AbstractActorCriticLoss):\n    \"\"\"Used in multiple auxiliary tasks setting.\n\n    Add a negative entropy loss over all the task weights.\n    \"\"\"\n\n    UUID = \"multitask_entropy\"  # make sure this is unique\n\n    def __init__(self, task_names: Sequence[str], *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.num_tasks = len(task_names)\n        self.task_names = task_names\n\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n        *args,\n        **kwargs,\n    ) -> Tuple[torch.FloatTensor, Dict[str, float]]:\n        task_weights = actor_critic_output.extras[self.UUID]\n        task_weights = task_weights.view(-1, self.num_tasks)\n        entropy = CategoricalDistr(task_weights).entropy()\n\n        avg_loss = (-entropy).mean()\n        avg_task_weights = task_weights.mean(dim=0)  # (K)\n\n        outputs = {\"entropy_loss\": cast(torch.Tensor, avg_loss).item()}\n        for i in range(self.num_tasks):\n            outputs[\"weight_\" + self.task_names[i]] = cast(\n                torch.Tensor, avg_task_weights[i]\n            ).item()\n\n        return (\n            avg_loss,\n            outputs,\n        )\n\n\nclass AuxiliaryLoss(AbstractActorCriticLoss):\n    \"\"\"Base class of auxiliary loss.\n\n    Any auxiliary task loss should inherit from it, and implement the\n    `get_aux_loss` function.\n    \"\"\"\n\n    def __init__(self, auxiliary_uuid: str, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n\n        self.auxiliary_uuid = auxiliary_uuid\n\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n        *args,\n        **kwargs,\n    ) -> Tuple[torch.Tensor, Dict[str, float]]:\n\n        # auxiliary loss\n        return self.get_aux_loss(\n            **actor_critic_output.extras[self.auxiliary_uuid],\n            observations=batch[\"observations\"],\n            actions=batch[\"actions\"],\n            masks=batch[\"masks\"],\n        )\n\n    @abc.abstractmethod\n    def get_aux_loss(\n        self,\n        aux_model: nn.Module,\n        observations: ObservationType,\n        obs_embeds: torch.Tensor,\n        actions: torch.Tensor,\n        beliefs: torch.Tensor,\n        masks: torch.Tensor,\n        *args,\n        **kwargs,\n    ):\n        raise NotImplementedError()\n\n\ndef _propagate_final_beliefs_to_all_steps(\n    beliefs: torch.Tensor,\n    masks: torch.Tensor,\n    num_sampler: int,\n    num_steps: int,\n):\n    final_beliefs = torch.zeros_like(beliefs)  # (T, B, *)\n    start_locs_list = []\n    end_locs_list = []\n\n    for i in range(num_sampler):\n        # right shift: to locate the 1 before 0 and ignore the 1st element\n        end_locs = torch.where(masks[1:, i] == 0)[0]  # maybe [], dtype=torch.Long\n\n        start_locs = torch.cat(\n            [torch.tensor([0]).to(end_locs), end_locs + 1]\n        )  # add the first element\n        start_locs_list.append(start_locs)\n\n        end_locs = torch.cat(\n            [end_locs, torch.tensor([num_steps - 1]).to(end_locs)]\n        )  # add the last element\n        end_locs_list.append(end_locs)\n\n        for st, ed in zip(start_locs, end_locs):\n            final_beliefs[st : ed + 1, i] = beliefs[ed, i]\n\n    return final_beliefs, start_locs_list, end_locs_list\n\n\nclass InverseDynamicsLoss(AuxiliaryLoss):\n    \"\"\"Auxiliary task of Inverse Dynamics from Auxiliary Tasks Speed Up\n    Learning PointGoal Navigation (Ye, 2020) https://arxiv.org/abs/2007.04561\n    originally from Curiosity-driven Exploration by Self-supervised Prediction\n    (Pathak, 2017) https://arxiv.org/abs/1705.05363.\"\"\"\n\n    UUID = \"InvDyn\"\n\n    def __init__(\n        self, subsample_rate: float = 0.2, subsample_min_num: int = 10, *args, **kwargs\n    ):\n        \"\"\"Subsample the valid samples by the rate of `subsample_rate`, if the\n        total num of the valid samples is larger than `subsample_min_num`.\"\"\"\n        super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)\n\n        self.cross_entropy_loss = nn.CrossEntropyLoss(reduction=\"none\")\n        self.subsample_rate = subsample_rate\n        self.subsample_min_num = subsample_min_num\n\n    def get_aux_loss(\n        self,\n        aux_model: nn.Module,\n        observations: ObservationType,\n        obs_embeds: torch.FloatTensor,\n        actions: torch.FloatTensor,\n        beliefs: torch.FloatTensor,\n        masks: torch.FloatTensor,\n        *args,\n        **kwargs,\n    ):\n        ## we discard the last action in the batch\n        num_steps, num_sampler = actions.shape  # T, B\n        actions = cast(torch.LongTensor, actions)\n        actions = actions[:-1]  # (T-1, B)\n\n        ## find the final belief state based on masks\n        # we did not compute loss here as model.forward is compute-heavy\n        masks = masks.squeeze(-1)  # (T, B)\n\n        final_beliefs, _, _ = _propagate_final_beliefs_to_all_steps(\n            beliefs,\n            masks,\n            num_sampler,\n            num_steps,\n        )\n\n        ## compute CE loss\n        decoder_in = torch.cat(\n            [obs_embeds[:-1], obs_embeds[1:], final_beliefs[:-1]], dim=2\n        )  # (T-1, B, *)\n\n        preds = aux_model(decoder_in)  # (T-1, B, A)\n        # cross entropy loss require class dim at 1\n        loss = self.cross_entropy_loss(\n            preds.view((num_steps - 1) * num_sampler, -1),  # ((T-1)*B, A)\n            actions.flatten(),  #  ((T-1)*B,)\n        )\n        loss = loss.view(num_steps - 1, num_sampler)  # (T-1, B)\n\n        # def vanilla_valid_losses(loss, num_sampler, end_locs_batch):\n        #     ##  this is just used to verify the vectorized version works correctly.\n        #     ##  not used for experimentation\n        #     valid_losses = []\n        #     for i in range(num_sampler):\n        #         end_locs = end_locs_batch[i]\n        #         for j in range(len(end_locs)):\n        #             if j == 0:\n        #                 start_loc = 0\n        #             else:\n        #                 start_loc = end_locs[j - 1] + 1\n        #             end_loc = end_locs[j]\n        #             if end_loc - start_loc <= 0:  # the episode only 1-step\n        #                 continue\n        #             valid_losses.append(loss[start_loc:end_loc, i])\n\n        #     if len(valid_losses) == 0:\n        #         valid_losses = torch.zeros(1, dtype=torch.float).to(loss)\n        #     else:\n        #         valid_losses = torch.cat(valid_losses)  # (sum m, )\n        #     return valid_losses\n\n        # valid_losses = masks[1:] * loss # (T-1, B)\n        # valid_losses0 = vanilla_valid_losses(loss, num_sampler, end_locs_batch)\n        # assert valid_losses0.sum() == valid_losses.sum()\n\n        num_valid_losses = torch.count_nonzero(masks[1:])\n        if num_valid_losses < self.subsample_min_num:  # don't subsample\n            subsample_rate = 1.0\n        else:\n            subsample_rate = self.subsample_rate\n\n        loss_masks = masks[1:] * _bernoulli_subsample_mask_like(\n            masks[1:], subsample_rate\n        )\n        num_valid_losses = torch.count_nonzero(loss_masks)\n        avg_loss = (loss * loss_masks).sum() / torch.clamp(num_valid_losses, min=1.0)\n\n        return (\n            avg_loss,\n            {\n                \"total\": cast(torch.Tensor, avg_loss).item(),\n            },\n        )\n\n\nclass TemporalDistanceLoss(AuxiliaryLoss):\n    \"\"\"Auxiliary task of Temporal Distance from Auxiliary Tasks Speed Up\n    Learning PointGoal Navigation (Ye, 2020)\n    https://arxiv.org/abs/2007.04561.\"\"\"\n\n    UUID = \"TempDist\"\n\n    def __init__(self, num_pairs: int = 8, epsiode_len_min: int = 5, *args, **kwargs):\n        super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)\n        self.num_pairs = num_pairs\n        self.epsiode_len_min = float(epsiode_len_min)\n\n    def get_aux_loss(\n        self,\n        aux_model: nn.Module,\n        observations: ObservationType,\n        obs_embeds: torch.FloatTensor,\n        actions: torch.FloatTensor,\n        beliefs: torch.FloatTensor,\n        masks: torch.FloatTensor,\n        *args,\n        **kwargs,\n    ):\n        ## we discard the last action in the batch\n        num_steps, num_sampler = actions.shape  # T, B\n\n        ## find the final belief state based on masks\n        # we did not compute loss here as model.forward is compute-heavy\n        masks = masks.squeeze(-1)  # (T, B)\n\n        (\n            final_beliefs,\n            start_locs_list,\n            end_locs_list,\n        ) = _propagate_final_beliefs_to_all_steps(\n            beliefs,\n            masks,\n            num_sampler,\n            num_steps,\n        )\n\n        ## also find the locs_batch of shape (M, 3)\n        # the last dim: [0] is on num_sampler loc, [1] and [2] is start and end locs\n        # of one episode\n        # in other words: at locs_batch[m, 0] in num_sampler dim, there exists one episode\n        # starting from locs_batch[m, 1], ends at locs_batch[m, 2] (included)\n        locs_batch = []\n        for i in range(num_sampler):\n            locs_batch.append(\n                torch.stack(\n                    [\n                        i * torch.ones_like(start_locs_list[i]),\n                        start_locs_list[i],\n                        end_locs_list[i],\n                    ],\n                    dim=-1,\n                )\n            )  # shape (M[i], 3)\n        locs_batch = torch.cat(locs_batch)  # shape (M, 3)\n\n        temporal_dist_max = (\n            locs_batch[:, 2] - locs_batch[:, 1]\n        ).float()  # end - start, (M)\n        # create normalizer that ignores too short episode, otherwise 1/T\n        normalizer = torch.where(\n            temporal_dist_max > self.epsiode_len_min,\n            1.0 / temporal_dist_max,\n            torch.tensor([0]).to(temporal_dist_max),\n        )  # (M)\n\n        # sample valid pairs: sampled_pairs shape (M, num_pairs, 3)\n        # where M is the num of total episodes in the batch\n        locs = locs_batch.cpu().numpy()  # as torch.randint only support int, not tensor\n        sampled_pairs = np.random.randint(\n            np.repeat(locs[:, [1]], 2 * self.num_pairs, axis=-1),  # (M, 2*k)\n            np.repeat(locs[:, [2]] + 1, 2 * self.num_pairs, axis=-1),  # (M, 2*k)\n        ).reshape(\n            (-1, self.num_pairs, 2)\n        )  # (M, k, 2)\n        sampled_pairs_batch = torch.from_numpy(sampled_pairs).to(\n            locs_batch\n        )  # (M, k, 2)\n\n        num_sampler_batch = locs_batch[:, [0]].expand(\n            -1, 2 * self.num_pairs\n        )  # (M, 1) -> (M, 2*k)\n        num_sampler_batch = num_sampler_batch.reshape(\n            -1, self.num_pairs, 2\n        )  # (M, k, 2)\n\n        sampled_obs_embeds = obs_embeds[\n            sampled_pairs_batch, num_sampler_batch\n        ]  # (M, k, 2, H1)\n        sampled_final_beliefs = final_beliefs[\n            sampled_pairs_batch, num_sampler_batch\n        ]  # (M, k, 2, H2)\n        features = torch.cat(\n            [\n                sampled_obs_embeds[:, :, 0],\n                sampled_obs_embeds[:, :, 1],\n                sampled_final_beliefs[:, :, 0],\n            ],\n            dim=-1,\n        )  # (M, k, 2*H1 + H2)\n\n        pred_temp_dist = aux_model(features).squeeze(-1)  # (M, k)\n        true_temp_dist = (\n            sampled_pairs_batch[:, :, 1] - sampled_pairs_batch[:, :, 0]\n        ).float()  # (M, k)\n\n        pred_error = (pred_temp_dist - true_temp_dist) * normalizer.unsqueeze(1)\n        loss = 0.5 * (pred_error).pow(2)\n        avg_loss = loss.mean()\n\n        return (\n            avg_loss,\n            {\n                \"total\": cast(torch.Tensor, avg_loss).item(),\n            },\n        )\n\n\nclass CPCALoss(AuxiliaryLoss):\n    \"\"\"Auxiliary task of CPC|A from Auxiliary Tasks Speed Up Learning PointGoal\n    Navigation (Ye, 2020) https://arxiv.org/abs/2007.04561 originally from\n    Neural Predictive Belief Representations (Guo, 2018)\n    https://arxiv.org/abs/1811.06407.\"\"\"\n\n    UUID = \"CPCA\"\n\n    def __init__(\n        self, planning_steps: int = 8, subsample_rate: float = 0.2, *args, **kwargs\n    ):\n        super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)\n        self.planning_steps = planning_steps\n        self.subsample_rate = subsample_rate\n        self.cross_entropy_loss = nn.BCEWithLogitsLoss(reduction=\"none\")\n\n    def get_aux_loss(\n        self,\n        aux_model: nn.Module,\n        observations: ObservationType,\n        obs_embeds: torch.Tensor,\n        actions: torch.Tensor,\n        beliefs: torch.Tensor,\n        masks: torch.Tensor,\n        *args,\n        **kwargs,\n    ):\n        # prepare for autoregressive inputs: c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) <-> z_{t+k}\n        ## where b_t = RNN(b_{t-1}, z_t, a_{t-1}), prev action is optional\n        num_steps, num_sampler, obs_embed_size = obs_embeds.shape  # T, N, H_O\n        assert 0 < self.planning_steps <= num_steps\n\n        ## prepare positive and negatives that sample from all the batch\n        positives = obs_embeds  # (T, N, -1)\n        negative_inds = torch.randperm(num_steps * num_sampler).to(positives.device)\n        negatives = torch.gather(  # input[index[i,j]][j]\n            positives.view(num_steps * num_sampler, -1),\n            dim=0,\n            index=negative_inds.view(num_steps * num_sampler, 1).expand(\n                num_steps * num_sampler, positives.shape[-1]\n            ),\n        ).view(\n            num_steps, num_sampler, -1\n        )  # (T, N, -1)\n\n        ## prepare action sequences and initial beliefs\n        action_embedding = aux_model.action_embedder(actions)  # (T, N, -1)\n        action_embed_size = action_embedding.size(-1)\n        action_padding = torch.zeros(\n            self.planning_steps - 1, num_sampler, action_embed_size\n        ).to(\n            action_embedding\n        )  # (k-1, N, -1)\n        action_padded = torch.cat(\n            (action_embedding, action_padding), dim=0\n        )  # (T+k-1, N, -1)\n\n        ## unfold function will create consecutive action sequences\n        action_seq = (\n            action_padded.unfold(dimension=0, size=self.planning_steps, step=1)\n            .permute(3, 0, 1, 2)\n            .view(self.planning_steps, num_steps * num_sampler, action_embed_size)\n        )  # (k, T*N, -1)\n\n        ## beliefs GRU output\n        beliefs = beliefs.view(num_steps * num_sampler, -1).unsqueeze(0)  # (1, T*N, -1)\n\n        # get future contexts c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1})\n        future_contexts_all, _ = aux_model.context_model(\n            action_seq, beliefs\n        )  # (k, T*N, -1)\n\n        ## NOTE: future_contexts_all starting from next step t+1 to t+k, not t to t+k-1\n        future_contexts_all = future_contexts_all.view(\n            self.planning_steps, num_steps, num_sampler, -1\n        ).permute(\n            1, 0, 2, 3\n        )  # (k, T, N, -1)\n\n        # get all the classifier scores I(c_{t+1:t+k}; z_{t+1:t+k})\n        positives_padding = torch.zeros(\n            self.planning_steps, num_sampler, obs_embed_size\n        ).to(\n            positives\n        )  # (k, N, -1)\n        positives_padded = torch.cat(\n            (positives[1:], positives_padding), dim=0\n        )  # (T+k-1, N, -1)\n        positives_expanded = positives_padded.unfold(\n            dimension=0, size=self.planning_steps, step=1\n        ).permute(\n            0, 3, 1, 2\n        )  # (T, k, N, -1)\n        positives_logits = aux_model.classifier(\n            torch.cat([positives_expanded, future_contexts_all], -1)\n        )  # (T, k, N, 1)\n        positive_loss = self.cross_entropy_loss(\n            positives_logits, torch.ones_like(positives_logits)\n        )  # (T, k, N, 1)\n\n        negatives_padding = torch.zeros(\n            self.planning_steps, num_sampler, obs_embed_size\n        ).to(\n            negatives\n        )  # (k, N, -1)\n        negatives_padded = torch.cat(\n            (negatives[1:], negatives_padding), dim=0\n        )  # (T+k-1, N, -1)\n        negatives_expanded = negatives_padded.unfold(\n            dimension=0, size=self.planning_steps, step=1\n        ).permute(\n            0, 3, 1, 2\n        )  # (T, k, N, -1)\n        negatives_logits = aux_model.classifier(\n            torch.cat([negatives_expanded, future_contexts_all], -1)\n        )  # (T, k, N, 1)\n        negative_loss = self.cross_entropy_loss(\n            negatives_logits, torch.zeros_like(negatives_logits)\n        )  # (T, k, N, 1)\n\n        # Masking to get valid scores\n        ## masks: Note which timesteps [1, T+k+1] could have valid queries, at distance (k) (note offset by 1)\n        ## we will extract the **diagonals** as valid_masks from masks later as below\n        ## the vertical axis is (absolute) real timesteps, the horizontal axis is (relative) planning timesteps\n        ## | - - - - - |\n        ## | .         |\n        ## | , .       |\n        ## | . , .     |\n        ## | , . , .   |\n        ## |   , . , . |\n        ## |     , . , |\n        ## |       , . |\n        ## |         , |\n        ## | - - - - - |\n        masks = masks.squeeze(-1)  # (T, N)\n        pred_masks = torch.ones(\n            num_steps + self.planning_steps,\n            self.planning_steps,\n            num_sampler,\n            1,\n            dtype=torch.bool,\n        ).to(\n            beliefs.device\n        )  # (T+k, k, N, 1)\n\n        pred_masks[num_steps - 1 :] = (\n            False  # GRU(b_t, a_{t:t+k-1}) is invalid when t >= T, as we don't have real z_{t+1}\n        )\n        for j in range(1, self.planning_steps + 1):  # for j-step predictions\n            pred_masks[: j - 1, j - 1] = (\n                False  # Remove the upper triangle above the diagnonal (but I think this is unnecessary for valid_masks)\n            )\n            for n in range(num_sampler):\n                has_zeros_batch = torch.where(masks[:, n] == 0)[0]\n                # in j-step prediction, timesteps z -> z + j are disallowed as those are the first j timesteps of a new episode\n                # z-> z-1 because of pred_masks being offset by 1\n                for z in has_zeros_batch:\n                    pred_masks[z - 1 : z - 1 + j, j - 1, n] = (\n                        False  # can affect j timesteps\n                    )\n\n        # instead of the whole range, we actually are only comparing a window i:i+k for each query/target i - for each, select the appropriate k\n        # we essentially gather diagonals from this full mask, t of them, k long\n        valid_diagonals = [\n            torch.diagonal(pred_masks, offset=-i) for i in range(num_steps)\n        ]  # pull the appropriate k per timestep\n        valid_masks = (\n            torch.stack(valid_diagonals, dim=0).permute(0, 3, 1, 2).float()\n        )  # (T, N, 1, k) -> (T, k, N, 1)\n        # print(valid_masks.int().squeeze(-1)); print(masks) # verify its correctness\n\n        loss_masks = valid_masks * _bernoulli_subsample_mask_like(\n            valid_masks, self.subsample_rate\n        )  # (T, k, N, 1)\n        num_valid_losses = torch.count_nonzero(loss_masks)\n        avg_positive_loss = (positive_loss * loss_masks).sum() / torch.clamp(\n            num_valid_losses, min=1.0\n        )\n        avg_negative_loss = (negative_loss * loss_masks).sum() / torch.clamp(\n            num_valid_losses, min=1.0\n        )\n\n        avg_loss = avg_positive_loss + avg_negative_loss\n\n        return (\n            avg_loss,\n            {\n                \"total\": cast(torch.Tensor, avg_loss).item(),\n                \"positive_loss\": cast(torch.Tensor, avg_positive_loss).item(),\n                \"negative_loss\": cast(torch.Tensor, avg_negative_loss).item(),\n            },\n        )\n\n\nclass CPCASoftMaxLoss(AuxiliaryLoss):\n    \"\"\"Auxiliary task of CPC|A with multi class softmax.\"\"\"\n\n    UUID = \"cpcA_SOFTMAX\"\n\n    def __init__(\n        self,\n        planning_steps: int = 8,\n        subsample_rate: float = 1,\n        allow_skipping: bool = True,\n        *args,\n        **kwargs,\n    ):\n        super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs)\n        self.planning_steps = planning_steps\n        self.subsample_rate = subsample_rate\n        self.cross_entropy_loss = nn.CrossEntropyLoss(\n            reduction=\"none\"\n        )  # nn.BCEWithLogitsLoss(reduction=\"none\")\n        self.allow_skipping = allow_skipping\n\n    def get_aux_loss(\n        self,\n        aux_model: nn.Module,\n        observations: ObservationType,\n        obs_embeds: torch.Tensor,\n        actions: torch.Tensor,\n        beliefs: torch.Tensor,\n        masks: torch.Tensor,\n        *args,\n        **kwargs,\n    ):\n        # prepare for autoregressive inputs: c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) <-> z_{t+k}\n        ## where b_t = RNN(b_{t-1}, z_t, a_{t-1}), prev action is optional\n        num_steps, num_samplers, obs_embed_size = obs_embeds.shape  # T, N, H_O\n        ##visual observation of all num_steps\n\n        if not (0 < self.planning_steps <= num_steps):\n            if self.allow_skipping:\n                return 0, {}\n            else:\n                raise RuntimeError(\n                    f\"Insufficient planning steps: self.planning_steps {self.planning_steps} must\"\n                    f\" be greater than zero and less than or equal to num_steps {num_steps}.\"\n                )\n\n        ## prepare action sequences and initial beliefs\n        action_embedding = aux_model.action_embedder(actions)  # (T, N, -1)\n        action_embed_size = action_embedding.size(-1)\n        action_padding = torch.zeros(\n            self.planning_steps - 1,\n            num_samplers,\n            action_embed_size,\n            device=action_embedding.device,\n        )  # (k-1, N, -1)\n        action_padded = torch.cat(\n            (action_embedding, action_padding), dim=0\n        )  # (T+k-1, N, -1)\n\n        ## unfold function will create consecutive action sequences\n        action_seq = (\n            action_padded.unfold(dimension=0, size=self.planning_steps, step=1)\n            .permute(3, 0, 1, 2)\n            .view(self.planning_steps, num_steps * num_samplers, action_embed_size)\n        )  # (k, T*N, -1)\n\n        ## beliefs GRU output\n        obs_embeds = aux_model.visual_mlp(obs_embeds)  # (T, N, 128)\n\n        beliefs = beliefs.view(1, num_steps * num_samplers, -1)  # (1, T*N, -1)\n\n        # get future contexts c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1})\n        future_contexts_all, _ = aux_model.context_model(\n            action_seq, beliefs\n        )  # (k, T*N, -1)\n\n        future_contexts_all = aux_model.belief_mlp(future_contexts_all)  # (k, T*N, 128)\n        future_contexts_all = future_contexts_all.view(-1, 128)  # (k*T*N, 128)\n\n        obs_embeds = obs_embeds.view(\n            num_steps * num_samplers, obs_embeds.shape[-1]\n        ).permute(\n            1, 0\n        )  # (-1, T*N)\n\n        visual_logits = torch.matmul(future_contexts_all, obs_embeds)\n        visual_log_probs = F.log_softmax(visual_logits, dim=1)  ## (k*T*N, T*N)\n\n        target = torch.zeros(\n            (self.planning_steps, num_steps, num_samplers),\n            dtype=torch.long,\n            device=beliefs.device,\n        )  # (k, T, N)\n        loss_mask = torch.zeros(\n            (self.planning_steps, num_steps, num_samplers), device=beliefs.device\n        )  # (k, T, N)\n\n        num_valid_before = 0\n        for j in range(num_samplers):\n            for i in range(num_steps):\n                index = i * num_samplers + j\n\n                if i == 0 or masks[i, j].item() == 0:\n                    num_valid_before = 0\n                    continue\n\n                num_valid_before += 1\n                for back in range(min(num_valid_before, self.planning_steps)):\n                    target[back, i - (back + 1), j] = index\n                    loss_mask[back, i - (back + 1), j] = 1.0\n\n        target = target.view(-1)  # (k*T*N,)\n\n        loss_value = self.cross_entropy_loss(visual_log_probs, target)\n        loss_value = loss_value.view(\n            self.planning_steps, num_steps, num_samplers, 1\n        )  # (k, T, N, 1)\n\n        loss_mask = loss_mask.unsqueeze(-1)  # (k, T, N, 1)\n        loss_valid_masks = loss_mask * _bernoulli_subsample_mask_like(\n            loss_mask, self.subsample_rate\n        )  # (k, T, N, 1)\n\n        num_valid_losses = torch.count_nonzero(loss_valid_masks)\n\n        avg_multi_class_loss = (loss_value * loss_valid_masks).sum() / torch.clamp(\n            num_valid_losses, min=1.0\n        )\n\n        return (\n            avg_multi_class_loss,\n            {\n                \"total\": cast(torch.Tensor, avg_multi_class_loss).item(),\n            },\n        )\n\n\n######## CPCA Softmax variants ######\n\n\nclass CPCA1SoftMaxLoss(CPCASoftMaxLoss):\n    UUID = \"cpcA_SOFTMAX_1\"\n\n    def __init__(self, subsample_rate: float = 1, *args, **kwargs):\n        super().__init__(\n            planning_steps=1, subsample_rate=subsample_rate, *args, **kwargs\n        )\n\n\nclass CPCA2SoftMaxLoss(CPCASoftMaxLoss):\n    UUID = \"cpcA_SOFTMAX_2\"\n\n    def __init__(self, subsample_rate: float = 1, *args, **kwargs):\n        super().__init__(\n            planning_steps=2, subsample_rate=subsample_rate, *args, **kwargs\n        )\n\n\nclass CPCA4SoftMaxLoss(CPCASoftMaxLoss):\n    UUID = \"cpcA_SOFTMAX_4\"\n\n    def __init__(self, subsample_rate: float = 1, *args, **kwargs):\n        super().__init__(\n            planning_steps=4, subsample_rate=subsample_rate, *args, **kwargs\n        )\n\n\nclass CPCA8SoftMaxLoss(CPCASoftMaxLoss):\n    UUID = \"cpcA_SOFTMAX_8\"\n\n    def __init__(self, subsample_rate: float = 1, *args, **kwargs):\n        super().__init__(\n            planning_steps=8, subsample_rate=subsample_rate, *args, **kwargs\n        )\n\n\nclass CPCA16SoftMaxLoss(CPCASoftMaxLoss):\n    UUID = \"cpcA_SOFTMAX_16\"\n\n    def __init__(self, subsample_rate: float = 1, *args, **kwargs):\n        super().__init__(\n            planning_steps=16, subsample_rate=subsample_rate, *args, **kwargs\n        )\n\n\n###########\n\n\nclass CPCA1Loss(CPCALoss):\n    UUID = \"CPCA_1\"\n\n    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):\n        super().__init__(\n            planning_steps=1, subsample_rate=subsample_rate, *args, **kwargs\n        )\n\n\nclass CPCA2Loss(CPCALoss):\n    UUID = \"CPCA_2\"\n\n    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):\n        super().__init__(\n            planning_steps=2, subsample_rate=subsample_rate, *args, **kwargs\n        )\n\n\nclass CPCA4Loss(CPCALoss):\n    UUID = \"CPCA_4\"\n\n    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):\n        super().__init__(\n            planning_steps=4, subsample_rate=subsample_rate, *args, **kwargs\n        )\n\n\nclass CPCA8Loss(CPCALoss):\n    UUID = \"CPCA_8\"\n\n    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):\n        super().__init__(\n            planning_steps=8, subsample_rate=subsample_rate, *args, **kwargs\n        )\n\n\nclass CPCA16Loss(CPCALoss):\n    UUID = \"CPCA_16\"\n\n    def __init__(self, subsample_rate: float = 0.2, *args, **kwargs):\n        super().__init__(\n            planning_steps=16, subsample_rate=subsample_rate, *args, **kwargs\n        )\n"
  },
  {
    "path": "allenact/embodiedai/mapping/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/embodiedai/mapping/mapping_losses.py",
    "content": "import torch\nfrom torch.nn import functional as F\n\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n)\nfrom allenact.algorithms.onpolicy_sync.policy import ObservationType\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput\n\n\nclass BinnedPointCloudMapLoss(AbstractActorCriticLoss):\n    \"\"\"A (binary cross entropy) loss for training metric maps for free space\n    prediction.\"\"\"\n\n    def __init__(\n        self,\n        binned_pc_uuid: str,\n        map_logits_uuid: str,\n    ):\n        \"\"\"Initializer.\n\n        # Parameters\n        binned_pc_uuid : The uuid of a sensor returning\n            a dictionary with an \"egocentric_update\"\n            key with the same format as returned by\n            `allenact.embodied_ai.mapping_utils.map_builders.BinnedPointCloudMapBuilder`. Such a sensor\n            can be found in the `allenact_plugins` library: see\n            `allenact_plugins.ithor_plugin.ithor_sensors.BinnedPointCloudMapTHORSensor`.\n        map_logits_uuid : key used to index into `actor_critic_output.extras` (returned by the model)\n            whose value should be a tensor of the same shape as the tensor corresponding to the above\n            \"egocentric_update\" key.\n        \"\"\"\n        super().__init__()\n        self.binned_pc_uuid = binned_pc_uuid\n        self.map_logits_uuid = map_logits_uuid\n\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n        *args,\n        **kwargs,\n    ):\n        ego_map_gt = batch[\"observations\"][self.binned_pc_uuid][\n            \"egocentric_update\"\n        ].float()\n        *_, h, w, c = ego_map_gt.shape\n        ego_map_gt = ego_map_gt.view(-1, h, w, c).permute(0, 3, 1, 2).contiguous()\n\n        ego_map_logits = actor_critic_output.extras[self.map_logits_uuid]\n        vision_range = ego_map_logits.shape[-1]\n        ego_map_logits = ego_map_logits.view(-1, c, vision_range, vision_range)\n\n        assert ego_map_gt.shape == ego_map_logits.shape\n\n        ego_map_gt_thresholded = (ego_map_gt > 0.5).float()\n        total_loss = F.binary_cross_entropy_with_logits(\n            ego_map_logits, ego_map_gt_thresholded\n        )\n\n        return (\n            total_loss,\n            {\"binned_pc_map_ce\": total_loss.item()},\n        )\n\n        # FOR DEBUGGING: Save all the ground-truth & predicted maps side by side\n        # import numpy as np\n        # import imageio\n        # for i in range(ego_map_gt_thresholded.shape[0]):\n        #     a = ego_map_gt_thresholded[i].permute(1, 2, 0).flip(0).detach().numpy()\n        #     b = torch.sigmoid(ego_map_logits)[i].permute(1, 2, 0).flip(0).detach().numpy()\n        #\n        #     imageio.imwrite(\n        #         f\"z_occupancy_maps/{i}.png\",\n        #         np.concatenate((a, 1 + 0 * a[:, :10], b), axis=1),\n        #     )\n\n\nclass SemanticMapFocalLoss(AbstractActorCriticLoss):\n    \"\"\"A (focal-loss based) loss for training metric maps for free space\n    prediction.\n\n    As semantic maps tend to be quite sparse this loss uses the focal\n    loss (https://arxiv.org/abs/1708.02002) rather than binary cross\n    entropy (BCE). If the `gamma` parameter is 0.0 then this is just the\n    normal BCE, larger values of `gamma` result less and less emphasis\n    being paid to examples that are already well classified.\n    \"\"\"\n\n    def __init__(\n        self, semantic_map_uuid: str, map_logits_uuid: str, gamma: float = 2.0\n    ):\n        \"\"\"Initializer.\n\n        # Parameters\n        semantic_map_uuid : The uuid of a sensor returning\n            a dictionary with an \"egocentric_update\"\n            key with the same format as returned by\n            `allenact.embodied_ai.mapping_utils.map_builders.SemanticMapBuilder`. Such a sensor\n            can be found in the `allenact_plugins` library: see\n            `allenact_plugins.ithor_plugin.ithor_sensors.SemanticMapTHORSensor`.\n        map_logits_uuid : key used to index into `actor_critic_output.extras` (returned by the model)\n            whose value should be a tensor of the same shape as the tensor corresponding to the above\n            \"egocentric_update\" key.\n        \"\"\"\n        super().__init__()\n        assert gamma >= 0, f\"`gamma` (=={gamma}) must be >= 0\"\n        self.semantic_map_uuid = semantic_map_uuid\n        self.map_logits_uuid = map_logits_uuid\n        self.gamma = gamma\n\n    def loss(  # type: ignore\n        self,\n        step_count: int,\n        batch: ObservationType,\n        actor_critic_output: ActorCriticOutput[CategoricalDistr],\n        *args,\n        **kwargs,\n    ):\n        ego_map_gt = batch[\"observations\"][self.semantic_map_uuid][\"egocentric_update\"]\n        ego_map_gt = (\n            ego_map_gt.view(-1, *ego_map_gt.shape[-3:]).permute(0, 3, 1, 2).contiguous()\n        )\n\n        ego_map_logits = actor_critic_output.extras[self.map_logits_uuid]\n        ego_map_logits = ego_map_logits.view(-1, *ego_map_logits.shape[-3:])\n\n        assert ego_map_gt.shape == ego_map_logits.shape\n\n        p = torch.sigmoid(ego_map_logits)\n        one_minus_p = torch.sigmoid(-ego_map_logits)\n\n        log_p = F.logsigmoid(ego_map_logits)\n        log_one_minus_p = F.logsigmoid(-ego_map_logits)\n\n        ego_map_gt = ego_map_gt.float()\n        total_loss = -(\n            ego_map_gt * (log_p * (one_minus_p**self.gamma))\n            + (1 - ego_map_gt) * (log_one_minus_p * (p**self.gamma))\n        ).mean()\n\n        return (\n            total_loss,\n            {\"sem_map_focal_loss\": total_loss.item()},\n        )\n\n        # FOR DEBUGGING: Save all the ground-truth & predicted maps side by side\n        # import numpy as np\n        # import imageio\n        # from allenact.embodiedai.mapping.mapping_utils.map_builders import SemanticMapBuilder\n        #\n        # print(\"\\n\" * 3)\n        # for i in range(ego_map_gt.shape[0]):\n        #     pred_sem_map = torch.sigmoid(ego_map_logits)[i].permute(1, 2, 0).flip(0).detach()\n        #     a = SemanticMapBuilder.randomly_color_semantic_map(ego_map_gt[i].permute(1, 2, 0).flip(0).detach())\n        #     b = SemanticMapBuilder.randomly_color_semantic_map(pred_sem_map)\n        #     imageio.imwrite(\n        #         f\"z_semantic_maps/{i}.png\",\n        #         np.concatenate((a, 255 + a[:, :10] * 0, b), axis=1),\n        #     )\n        #\n"
  },
  {
    "path": "allenact/embodiedai/mapping/mapping_models/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/embodiedai/mapping/mapping_models/active_neural_slam.py",
    "content": "# MIT License\n#\n# Original Copyright (c) 2020 Devendra Chaplot\n#\n# Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\nimport math\nfrom typing import Optional, Tuple, Dict, Any, cast\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torchvision.models as models\n\nfrom allenact.utils.model_utils import simple_conv_and_linear_weights_init\n\nDEGREES_TO_RADIANS = np.pi / 180.0\nRADIANS_TO_DEGREES = 180.0 / np.pi\n\n\ndef _inv_sigmoid(x: torch.Tensor):\n    return torch.log(x) - torch.log1p(-x)\n\n\nclass ActiveNeuralSLAM(nn.Module):\n    \"\"\"Active Neural SLAM module.\n\n    This is an implementation of the Active Neural SLAM module\n    from:\n    ```\n    Chaplot, D.S., Gandhi, D., Gupta, S., Gupta, A. and Salakhutdinov, R., 2020.\n    Learning To Explore Using Active Neural SLAM.\n    In International Conference on Learning Representations (ICLR).\n    ```\n    Note that this is purely the mapping component and does not include the planning\n    components from the above paper.\n\n    This implementation is adapted from `https://github.com/devendrachaplot/Neural-SLAM`,\n    we have extended this implementation to allow for an arbitrary number of output map\n    channels (enabling semantic mapping).\n\n    At a high level, this model takes as input RGB egocentric images and outputs metric\n    map tensors of shape (# channels) x height x width where height/width correspond to the\n    ground plane of the environment.\n    \"\"\"\n\n    def __init__(\n        self,\n        frame_height: int,\n        frame_width: int,\n        n_map_channels: int,\n        resolution_in_cm: int = 5,\n        map_size_in_cm: int = 2400,\n        vision_range_in_cm: int = 300,\n        use_pose_estimation: bool = False,\n        pretrained_resnet: bool = True,\n        freeze_resnet_batchnorm: bool = True,\n        use_resnet_layernorm: bool = False,\n    ):\n        \"\"\"Initialize an Active Neural SLAM module.\n\n        # Parameters\n\n        frame_height : The height of the RGB images given to this module on calls to `forward`.\n        frame_width : The width of the RGB images given to this module on calls to `forward`.\n        n_map_channels : The number of output channels in the output maps.\n        resolution_in_cm : The resolution of the output map, see `map_size_in_cm`.\n        map_size_in_cm : The height & width of the map in centimeters. The size of the map\n            tensor returned on calls to forward will be `map_size_in_cm/resolution_in_cm`. Note\n            that `map_size_in_cm` must be an divisible by resolution_in_cm.\n        vision_range_in_cm : Given an RGB image input, this module will transform this image into\n            an \"egocentric map\" with height and width equaling `vision_range_in_cm/resolution_in_cm`.\n            This egocentr map corresponds to the area of the world directly in front of the agent.\n            This \"egocentric map\" will be rotated/translated into the allocentric reference frame and\n            used to update the larger, allocentric, map whose\n            height and width equal `map_size_in_cm/resolution_in_cm`. Thus this parameter controls\n            how much of the map will be updated on every step.\n        use_pose_estimation : Whether or not we should estimate the agent's change in position/rotation.\n            If `False`, you'll need to provide the ground truth changes in position/rotation.\n        pretrained_resnet : Whether or not to use ImageNet pre-trained model weights for the ResNet18\n            backbone.\n        freeze_resnet_batchnorm : Whether or not the batch normalization layers in the ResNet18 backbone\n            should be frozen and batchnorm updates disabled. You almost certainly want this to be `True`\n            as using batch normalization during RL training results in all sorts of issues unless you're\n            very careful.\n        use_resnet_layernorm : If you've enabled `freeze_resnet_batchnorm` (recommended) you'll likely want\n            to normalize the output from the ResNet18 model as we've found that these values can otherwise\n            grow quite large harming learning.\n        \"\"\"\n        super(ActiveNeuralSLAM, self).__init__()\n        self.frame_height = frame_height\n        self.frame_width = frame_width\n        self.n_map_channels = n_map_channels\n        self.resolution_in_cm = resolution_in_cm\n        self.map_size_in_cm = map_size_in_cm\n        self.input_channels = 3\n        self.vision_range_in_cm = vision_range_in_cm\n        self.dropout = 0.5\n        self.use_pose_estimation = use_pose_estimation\n        self.freeze_resnet_batchnorm = freeze_resnet_batchnorm\n\n        self.max_abs_map_logit_value = 20\n\n        # Visual Encoding\n        resnet = models.resnet18(pretrained=pretrained_resnet)\n        self.resnet_l5 = nn.Sequential(*list(resnet.children())[0:8])\n        self.conv = nn.Sequential(\n            *filter(bool, [nn.Conv2d(512, 64, (1, 1), stride=(1, 1)), nn.ReLU()])\n        )\n        self.bn_modules = [\n            module\n            for module in self.resnet_l5.modules()\n            if \"BatchNorm\" in type(module).__name__\n        ]\n        if freeze_resnet_batchnorm:\n            for bn in self.bn_modules:\n                bn.momentum = 0\n\n        # Layernorm (if requested)\n        self.use_resnet_layernorm = use_resnet_layernorm\n        if self.use_resnet_layernorm:\n            assert (\n                self.freeze_resnet_batchnorm\n            ), \"When using layernorm, we require that set `freeze_resnet_batchnorm` to True.\"\n            self.resnet_normalizer = nn.Sequential(\n                nn.Conv2d(512, 512, 1),\n                nn.LayerNorm(\n                    normalized_shape=[512, 7, 7],\n                    elementwise_affine=True,\n                ),\n            )\n            self.resnet_normalizer.apply(simple_conv_and_linear_weights_init)\n        else:\n            self.resnet_normalizer = nn.Identity()\n\n        # convolution output size\n        input_test = torch.randn(\n            1, self.input_channels, self.frame_height, self.frame_width\n        )\n        # Have to explicitly call .forward to get past LGTM checks as it thinks nn.Sequential isn't callable\n        conv_output = self.conv.forward(self.resnet_l5.forward(input_test))\n\n        self.conv_output_size = conv_output.view(-1).size(0)\n\n        # projection layer\n        self.proj1 = nn.Linear(self.conv_output_size, 1024)\n        assert self.vision_range % 8 == 0\n        self.deconv_in_height = self.vision_range // 8\n        self.deconv_in_width = self.deconv_in_height\n        self.n_input_channels_for_deconv = 64\n        proj2_out_size = 64 * self.deconv_in_height * self.deconv_in_width\n        self.proj2 = nn.Linear(1024, proj2_out_size)\n\n        if self.dropout > 0:\n            self.dropout1 = nn.Dropout(self.dropout)\n            self.dropout2 = nn.Dropout(self.dropout)\n\n        # Deconv layers to predict map\n        self.deconv = nn.Sequential(\n            *filter(\n                bool,\n                [\n                    nn.ConvTranspose2d(\n                        self.n_input_channels_for_deconv,\n                        32,\n                        (4, 4),\n                        stride=(2, 2),\n                        padding=(1, 1),\n                    ),\n                    nn.ReLU(),\n                    nn.ConvTranspose2d(32, 16, (4, 4), stride=(2, 2), padding=(1, 1)),\n                    nn.ReLU(),\n                    nn.ConvTranspose2d(\n                        16, self.n_map_channels, (4, 4), stride=(2, 2), padding=(1, 1)\n                    ),\n                ],\n            )\n        )\n\n        # Pose Estimator\n        self.pose_conv = nn.Sequential(\n            nn.Conv2d(2 * self.n_map_channels, 64, (4, 4), stride=(2, 2)),\n            nn.ReLU(inplace=True),\n            nn.Conv2d(64, 32, (4, 4), stride=(2, 2)),\n            nn.ReLU(inplace=True),\n            nn.Conv2d(32, 16, (3, 3), stride=(1, 1)),\n            nn.ReLU(inplace=True),\n            nn.Flatten(),\n        )\n\n        self.pose_conv_output_dim = (\n            self.pose_conv.forward(\n                torch.zeros(\n                    1, 2 * self.n_map_channels, self.vision_range, self.vision_range\n                )\n            )\n            .view(-1)\n            .size(0)\n        )\n\n        # projection layer\n        self.pose_proj1 = nn.Linear(self.pose_conv_output_dim, 1024)\n        self.pose_proj2_x = nn.Linear(1024, 128)\n        self.pose_proj2_z = nn.Linear(1024, 128)\n        self.pose_proj2_o = nn.Linear(1024, 128)\n        self.pose_proj3_x = nn.Linear(128, 1)\n        self.pose_proj3_y = nn.Linear(128, 1)\n        self.pose_proj3_o = nn.Linear(128, 1)\n\n        if self.dropout > 0:\n            self.pose_dropout1 = nn.Dropout(self.dropout)\n\n        self.train()\n\n    @property\n    def device(self):\n        d = self.pose_proj1.weight.get_device()\n        if d < 0:\n            return torch.device(\"cpu\")\n        return torch.device(d)\n\n    def train(self, mode: bool = True):\n        super().train(mode=mode)\n        if mode and self.freeze_resnet_batchnorm:\n            for module in self.bn_modules:\n                module.eval()\n\n    @property\n    def map_size(self):\n        return self.map_size_in_cm // self.resolution_in_cm\n\n    @property\n    def vision_range(self):\n        return self.vision_range_in_cm // self.resolution_in_cm\n\n    def image_to_egocentric_map_logits(\n        self,\n        images: Optional[torch.Tensor],\n        resnet_image_features: Optional[torch.Tensor] = None,\n    ):\n        if resnet_image_features is None:\n            bs, _, _, _ = images.size()\n            resnet_image_features = self.resnet_normalizer(\n                self.resnet_l5(images[:, :3, :, :])\n            )\n        else:\n            bs = resnet_image_features.shape[0]\n\n        conv_output = self.conv(resnet_image_features)\n\n        proj1 = F.relu(self.proj1(conv_output.reshape(-1, self.conv_output_size)))\n        if self.dropout > 0:\n            proj1 = self.dropout1(proj1)\n        proj3 = F.relu(self.proj2(proj1))\n\n        deconv_input = proj3.view(\n            bs,\n            self.n_input_channels_for_deconv,\n            self.deconv_in_height,\n            self.deconv_in_width,\n        )\n        deconv_output = self.deconv(deconv_input)\n        return deconv_output\n\n    def allocentric_map_to_egocentric_view(\n        self, allocentric_map: torch.Tensor, xzr: torch.Tensor, padding_mode: str\n    ):\n        # Index the egocentric viewpoints at the given xzr locations\n        with torch.no_grad():\n            allocentric_map = allocentric_map.float()\n            xzr = xzr.float()\n\n            theta = xzr[:, 2].float() * float(np.pi / 180)\n\n            # Here form the rotation matrix\n            cos_theta = torch.cos(theta)\n            sin_theta = torch.sin(theta)\n            rot_mat = torch.stack(\n                (\n                    torch.stack((cos_theta, -sin_theta), -1),\n                    torch.stack((sin_theta, cos_theta), -1),\n                ),\n                1,\n            )\n\n            scaler = 2 * (100 / (self.resolution_in_cm * self.map_size))\n            offset_to_center_the_agent = scaler * xzr[:, :2].unsqueeze(-1) - 1\n\n            offset_to_top_of_image = rot_mat @ torch.FloatTensor([0, 1.0]).unsqueeze(\n                1\n            ).to(self.device)\n            rotation_and_translate_mat = torch.cat(\n                (\n                    rot_mat,\n                    offset_to_top_of_image + offset_to_center_the_agent,\n                ),\n                dim=-1,\n            )\n\n            ego_map = F.grid_sample(\n                allocentric_map,\n                F.affine_grid(\n                    rotation_and_translate_mat.to(self.device),\n                    allocentric_map.shape,\n                ),\n                padding_mode=padding_mode,\n                align_corners=False,\n            )\n\n            vr = self.vision_range\n            half_vr = vr // 2\n            center = self.map_size_in_cm // (2 * self.resolution_in_cm)\n            cropped = ego_map[:, :, :vr, (center - half_vr) : (center + half_vr)]\n            return cropped\n\n    def estimate_egocentric_dx_dz_dr(\n        self,\n        map_probs_egocentric: torch.Tensor,\n        last_map_probs_egocentric: torch.Tensor,\n    ):\n        assert last_map_probs_egocentric.shape == map_probs_egocentric.shape\n\n        pose_est_input = torch.cat(\n            (map_probs_egocentric.detach(), last_map_probs_egocentric.detach()), dim=1\n        )\n        pose_conv_output = self.pose_conv(pose_est_input)\n\n        proj1 = F.relu(self.pose_proj1(pose_conv_output))\n\n        if self.dropout > 0:\n            proj1 = self.pose_dropout1(proj1)\n\n        proj2_x = F.relu(self.pose_proj2_x(proj1))\n        pred_dx = self.pose_proj3_x(proj2_x)\n\n        proj2_z = F.relu(self.pose_proj2_z(proj1))\n        pred_dz = self.pose_proj3_y(proj2_z)\n\n        proj2_o = F.relu(self.pose_proj2_o(proj1))\n        pred_do = self.pose_proj3_o(proj2_o)\n\n        return torch.cat((pred_dx, pred_dz, pred_do), dim=1)\n\n    @staticmethod\n    def update_allocentric_xzrs_with_egocentric_movement(\n        last_xzrs_allocentric: torch.Tensor,\n        dx_dz_drs_egocentric: torch.Tensor,\n    ):\n        new_xzrs_allocentric = last_xzrs_allocentric.clone()\n\n        theta = new_xzrs_allocentric[:, 2] * DEGREES_TO_RADIANS\n        sin_theta = torch.sin(theta)\n        cos_theta = torch.cos(theta)\n        new_xzrs_allocentric[:, :2] += torch.matmul(\n            torch.stack([cos_theta, -sin_theta, sin_theta, cos_theta], dim=-1).view(\n                -1, 2, 2\n            ),\n            dx_dz_drs_egocentric[:, :2].unsqueeze(-1),\n        ).squeeze(-1)\n\n        new_xzrs_allocentric[:, 2] += dx_dz_drs_egocentric[:, 2]\n        new_xzrs_allocentric[:, 2] = (\n            torch.fmod(new_xzrs_allocentric[:, 2] - 180.0, 360.0) + 180.0\n        )\n        new_xzrs_allocentric[:, 2] = (\n            torch.fmod(new_xzrs_allocentric[:, 2] + 180.0, 360.0) - 180.0\n        )\n\n        return new_xzrs_allocentric\n\n    def forward(\n        self,\n        images: Optional[torch.Tensor],\n        last_map_probs_allocentric: Optional[torch.Tensor],\n        last_xzrs_allocentric: Optional[torch.Tensor],\n        dx_dz_drs_egocentric: Optional[torch.Tensor],\n        last_map_logits_egocentric: Optional[torch.Tensor],\n        return_allocentric_maps=True,\n        resnet_image_features: Optional[torch.Tensor] = None,\n    ) -> Dict[str, Any]:\n        \"\"\"Create allocentric/egocentric maps predictions given RGB image\n        inputs.\n\n        Here it is assumed that `last_xzrs_allocentric` has been re-centered so that (x, z) == (0,0)\n        corresponds to the top left of the returned map (with increasing x/z moving to the bottom right of the map).\n\n        Note that all maps are oriented so that:\n        * **Increasing x values** correspond to **increasing columns** in the map(s).\n        * **Increasing z values** correspond to **increasing rows** in the map(s).\n        Note that this may seem a bit weird as:\n        * \"north\" is pointing downwards in the map,\n        * if you picture yourself as the agent facing north (i.e. down) in the map, then moving to the right from\n            the agent's perspective will correspond to **increasing** which column the agent is at:\n        ```\n        agent facing downwards - - > (dir. to the right of the agent, i.e. moving right corresponds to +cols)\n            |\n            |\n            v (dir. agent faces, i.e. moving ahead corresponds to +rows)\n        ```\n            This may be the opposite of what you expect.\n\n        # Parameters\n        images : A (# batches) x 3 x height x width tensor of RGB images. These should be\n            normalized for use with a resnet model. See [here](https_DOC_COLON_//pytorch.org/vision/stable/models.html)\n            for information (see also the `use_resnet_normalization` parameter of the\n            `allenact.base_abstractions.sensor.RGBSensor` sensor).\n        last_map_probs_allocentric : A (# batches) x (map channels) x (map height) x (map width)\n            tensor representing the colllection of allocentric maps to be updated.\n        last_xzrs_allocentric : A (# batches) x 3 tensor where `last_xzrs_allocentric[_DOC_COLON_, 0]`\n            are the agent's (allocentric) x-coordinates on the previous step,\n            `last_xzrs_allocentric[_DOC_COLON_, 1]` are the agent's (allocentric) z-coordinates from the previous\n            step, and `last_xzrs_allocentric[_DOC_COLON_, 2]` are the agent's rotations (allocentric, in degrees)\n            from the prevoius step.\n        dx_dz_drs_egocentric : A (# batches) x 3 tensor representing the agent's change in x (in meters), z (in meters),\n            and rotation (in degrees) from the previous step. Note that these changes are \"egocentric\" so that if the\n            agent moved 1 meter ahead from it's perspective this should correspond to a dz of +1.0 regardless of\n            the agent's orientation (similarly moving right would result in a dx of +1.0). This\n            is ignored (and thus can be `None`) if you are using pose estimation\n            (i.e. `self.use_pose_estimation` is `True`) or if `return_allocentric_maps` is `False`.\n        last_map_logits_egocentric : The \"egocentric_update\" output when calling this function\n            on the last agent's step. I.e. this should be the egocentric map view of the agent\n            from the last step. This is used to compute the change in the agent's position rotation.\n            This is ignored (and thus can be `None`) if you do not wish to estimate the agent's pose\n            (i.e. `self.use_pose_estimation` is `False`).\n        return_allocentric_maps : Whether or not to generate new allocentric maps given `last_map_probs_allocentric`\n            and the new map estimates. Creating these new allocentric maps is expensive so better avoided when\n            not needed.\n        resnet_image_features : Sometimes you may wish to compute the ResNet image features yourself for use\n            in another part of your model. Rather than having to recompute them multiple times, you can\n            instead compute them once and pass them into this forward call (in this case the input `images`\n            parameter is ignored). Note that if you're using the `self.resnet_l5` module to compute these\n            features, be sure to also normalize them with `self.resnet_normalizer` if you have opted to\n            `use_resnet_layernorm` when initializing this module).\n\n        # Returns\n        A dictionary with keys/values:\n        * \"egocentric_update\" - The egocentric map view for the given RGB image. This is what should\n            be used for computing losses in general.\n        * \"map_logits_probs_update_no_grad\" - The egocentric map view after it has been\n            rotated, translated, and moved into a full-sized allocentric map. This map has been\n            detached from the computation graph and so should not be used for gradient computations.\n            This will be `None` if `return_allocentric_maps` was `False`.\n        * \"map_logits_probs_no_grad\" - The newly updated allocentric map, this corresponds to\n            performing a pointwise maximum between `last_map_probs_allocentric` and the\n            above returned `map_probs_allocentric_update_no_grad`.\n            This will be `None` if `return_allocentric_maps` was `False`.\n        * \"dx_dz_dr_egocentric_preds\" - The predicted change in x, z, and rotation of the agent (from the\n            egocentric perspective of the agent).\n        *  \"xzr_allocentric_preds\" - The (predicted if `self.use_pose_estimation == True`) allocentric\n            (x, z) position and rotation of the agent. This will equal `None` if `self.use_pose_estimation == False`\n            and `dx_dz_drs_egocentric` is `None`.\n        \"\"\"\n        # TODO: For consistency we should update things so that:\n        #  \"Furthermore, the rotation component of `last_xzrs_allocentric` and `dx_dz_drs_egocentric`\n        #  should be specified in **degrees* with positive rotation corresponding to a **CLOCKWISE**\n        #  rotation (this is the default used by the many game engines).\"\n        map_logits_egocentric = self.image_to_egocentric_map_logits(\n            images=images, resnet_image_features=resnet_image_features\n        )\n        map_probs_egocentric = torch.sigmoid(map_logits_egocentric)\n\n        dx_dz_dr_egocentric_preds = None\n        if last_map_logits_egocentric is not None:\n            dx_dz_dr_egocentric_preds = self.estimate_egocentric_dx_dz_dr(\n                map_probs_egocentric=map_probs_egocentric,\n                last_map_probs_egocentric=torch.sigmoid(last_map_logits_egocentric),\n            )\n\n        if self.use_pose_estimation:\n            updated_xzrs_allocentrc = (\n                self.update_allocentric_xzrs_with_egocentric_movement(\n                    last_xzrs_allocentric=last_xzrs_allocentric,\n                    dx_dz_drs_egocentric=dx_dz_dr_egocentric_preds,\n                )\n            )\n        elif dx_dz_drs_egocentric is not None:\n            updated_xzrs_allocentrc = (\n                self.update_allocentric_xzrs_with_egocentric_movement(\n                    last_xzrs_allocentric=last_xzrs_allocentric,\n                    dx_dz_drs_egocentric=dx_dz_drs_egocentric,\n                )\n            )\n        else:\n            updated_xzrs_allocentrc = None\n\n        if return_allocentric_maps:\n            # Aggregate egocentric map prediction in the allocentric map\n            # using the predicted pose (if `self.use_pose_estimation`) or the ground\n            # truth pose (if not `self.use_pose_estimation`)\n            with torch.no_grad():\n                # Rotate and translate the egocentric map view, we do this grid sampling\n                # at the level of probabilities as bad results can occur at the logit level\n                full_size_allocentric_map_probs_update = (\n                    _move_egocentric_map_view_into_allocentric_position(\n                        map_probs_egocentric=map_probs_egocentric,\n                        xzrs_allocentric=updated_xzrs_allocentrc,\n                        allocentric_map_height_width=(self.map_size, self.map_size),\n                        resolution_in_cm=self.resolution_in_cm,\n                    )\n                )\n\n                map_probs_allocentric = torch.max(\n                    last_map_probs_allocentric, full_size_allocentric_map_probs_update\n                )\n        else:\n            full_size_allocentric_map_probs_update = None\n            map_probs_allocentric = None\n\n        return {\n            \"egocentric_update\": map_logits_egocentric,\n            \"map_probs_allocentric_update_no_grad\": full_size_allocentric_map_probs_update,\n            \"map_probs_allocentric_no_grad\": map_probs_allocentric,\n            \"dx_dz_dr_egocentric_preds\": dx_dz_dr_egocentric_preds,\n            \"xzr_allocentric_preds\": updated_xzrs_allocentrc,\n        }\n\n\ndef _move_egocentric_map_view_into_allocentric_position(\n    map_probs_egocentric: torch.Tensor,\n    xzrs_allocentric: torch.Tensor,\n    allocentric_map_height_width: Tuple[int, int],\n    resolution_in_cm: float,\n):\n    \"\"\"Translate/rotate an egocentric map view into an allocentric map.\n\n    Let's say you have a collection of egocentric maps in a tensor of shape\n    `(# batches) x (# channels) x (# ego rows) x (# ego columns)`\n    where these are \"egocentric\" as we assume the agent is always\n    at the center of the map and facing \"downwards\", namely\n    * **ahead** of the agent should correspond to **increasing rows** in the map(s).\n    * **right** of the agent should correspond to **increasing columns** in the map(s).\n    Note that the above is a bit weird as, if you picture yourself as the agent facing\n    downwards in the map, then moving to the right from the agent perspective. Here's how things\n    should look if you plotted one of these egocentric maps:\n    ```\n    center of map - - > (dir. to the right of the agent, i.e. moving right corresponds to +cols)\n        |\n        |\n        v (dir. agent faces, i.e. moving ahead corresponds to +rows)\n    ```\n\n    This function is used to translate/rotate the above ego maps so that\n    they are in the right position/rotation in an allocentric map of size\n    `(# batches) x (# channels) x (# allocentric_map_height_width[0]) x (# allocentric_map_height_width[1])`.\n\n    Adapted from the get_grid function in https://github.com/devendrachaplot/Neural-SLAM.\n\n    # Parameters\n    map_probs_egocentric : Egocentric map views.\n    xzrs_allocentric : (# batches)x3 tensor with `xzrs_allocentric[:, 0]` being the x-coordinates (in meters),\n        `xzrs_allocentric[:, 1]` being the z-coordinates (in meters), and `xzrs_allocentric[:, 2]` being the rotation\n        (in degrees) of the agent in the allocentric reference frame. Here it is assumed that `xzrs_allocentric` has\n        been re-centered so that (x, z) == (0,0) corresponds to the top left of the returned map (with increasing\n        x/z moving to the bottom right of the map). Note that positive rotations are in the counterclockwise direction.\n    allocentric_map_height_width : Height/width of the allocentric map to be returned\n    resolution_in_cm : Resolution (in cm) of map to be returned (and of map_probs_egocentric). I.e.\n        `map_probs_egocentric[0,0,0:1,0:1]` should correspond to a `resolution_in_cm x resolution_in_cm`\n        square on the ground plane in the world.\n\n    # Returns\n    `(# batches) x (# channels) x (# allocentric_map_height_width[0]) x (# allocentric_map_height_width[1])`\n    tensor where the input `map_probs_egocentric` maps have been rotated/translated so that they\n    are in the positions specified by `xzrs_allocentric`.\n    \"\"\"\n    # TODO: For consistency we should update the rotations so they are in the clockwise direction.\n\n    # First we place the egocentric map view into the center\n    # of a map that has the same size as the allocentric map\n\n    nbatch, c, ego_h, ego_w = cast(\n        Tuple[int, int, int, int], map_probs_egocentric.shape\n    )\n    allo_h, allo_w = allocentric_map_height_width\n\n    max_view_range = math.sqrt((ego_w / 2.0) ** 2 + ego_h**2)\n    if min(allo_h, allo_w) / 2.0 < max_view_range:\n        raise NotImplementedError(\n            f\"The shape of your egocentric view (ego_h, ego_w)==({ego_h, ego_w})\"\n            f\" is too large relative the size of the allocentric map (allo_h, allo_w)==({allo_h}, {allo_w}).\"\n            f\" The height/width of your allocentric map should be at least {2 * max_view_range} to allow\"\n            f\" for no information to be lost when rotating the egocentric map.\"\n        )\n\n    full_size_ego_map_update_probs = map_probs_egocentric.new(\n        nbatch, c, *allocentric_map_height_width\n    ).fill_(0)\n\n    assert (ego_h % 2, ego_w % 2, allo_h % 2, allo_w % 2) == (\n        0,\n    ) * 4, \"All map heights/widths should be divisible by 2.\"\n\n    x1 = allo_w // 2 - ego_w // 2\n    x2 = x1 + ego_w\n    z1 = allo_h // 2\n    z2 = z1 + ego_h\n    full_size_ego_map_update_probs[:, :, z1:z2, x1:x2] = map_probs_egocentric\n\n    # Now we'll rotate and translate `full_size_ego_map_update_probs`\n    # so that the egocentric map view is positioned where it should be\n    # in the allocentric coordinate frame\n\n    # To do this we first need to rescale our allocentric xz coordinates\n    # so that the center of the map is (0,0) and the top left corner is (-1, -1)\n    # as this is what's expected by the `affine_grid` function below.\n    rescaled_xzrs_allocentric = xzrs_allocentric.clone().detach().float()\n    rescaled_xzrs_allocentric[:, :2] *= (\n        100.0 / resolution_in_cm\n    )  # Put x / z into map units rather than meters\n    rescaled_xzrs_allocentric[:, 0] /= allo_w / 2  # x corresponds to columns\n    rescaled_xzrs_allocentric[:, 1] /= allo_h / 2  # z corresponds to rows\n    rescaled_xzrs_allocentric[:, :2] -= 1.0  # Re-center\n\n    x = rescaled_xzrs_allocentric[:, 0]\n    z = rescaled_xzrs_allocentric[:, 1]\n    theta = (\n        -rescaled_xzrs_allocentric[:, 2] * DEGREES_TO_RADIANS\n    )  # Notice the negative sign\n\n    cos_theta = theta.cos()\n    sin_theta = theta.sin()\n    zeroes = torch.zeros_like(cos_theta)\n    ones = torch.ones_like(cos_theta)\n\n    theta11 = torch.stack([cos_theta, -sin_theta, zeroes], 1)\n    theta12 = torch.stack([sin_theta, cos_theta, zeroes], 1)\n    theta1 = torch.stack([theta11, theta12], 1)\n\n    theta21 = torch.stack([ones, zeroes, x], 1)\n    theta22 = torch.stack([zeroes, ones, z], 1)\n    theta2 = torch.stack([theta21, theta22], 1)\n\n    grid_size = [nbatch, c, allo_h, allo_w]\n    rot_grid = F.affine_grid(theta1, grid_size)\n    trans_grid = F.affine_grid(theta2, grid_size)\n\n    return F.grid_sample(\n        F.grid_sample(\n            full_size_ego_map_update_probs,\n            rot_grid,\n            padding_mode=\"zeros\",\n            align_corners=False,\n        ),\n        trans_grid,\n        padding_mode=\"zeros\",\n        align_corners=False,\n    )\n"
  },
  {
    "path": "allenact/embodiedai/mapping/mapping_utils/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/embodiedai/mapping/mapping_utils/map_builders.py",
    "content": "# MIT License\n#\n# Original Copyright (c) 2020 Devendra Chaplot\n#\n# Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\nimport random\nfrom typing import Optional, Sequence, Union, Dict\n\nimport cv2\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\n\nfrom allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import (\n    depth_frame_to_world_space_xyz,\n    project_point_cloud_to_map,\n)\n\n\nclass BinnedPointCloudMapBuilder(object):\n    \"\"\"Class used to iteratively construct a map of \"free space\" based on input\n    depth maps (i.e. pointclouds).\n\n    Adapted from https://github.com/devendrachaplot/Neural-SLAM\n\n    This class can be used to (iteratively) construct a metric map of free space in an environment as\n    an agent moves around. After every step the agent takes, you should call the `update` function and\n    pass the agent's egocentric depth image along with the agent's new position. This depth map will\n    be converted into a pointcloud, binned along the up/down axis, and then projected\n    onto a 3-dimensional tensor of shape (HxWxC) whose where HxW represent the ground plane\n    and where C equals the number of bins the up-down coordinate was binned into. This 3d map counts the\n    number of points in each bin. Thus a lack of points within a region can be used to infer that\n    that region is free space.\n\n    # Attributes\n\n    fov : FOV of the camera used to produce the depth images given when calling `update`.\n    vision_range_in_map_units : The maximum distance (in number of rows/columns) that will\n        be updated when calling `update`, points outside of this map vision range are ignored.\n    map_size_in_cm : Total map size in cm.\n    resolution_in_cm : Number of cm per row/column in the map.\n    height_bins : The bins used to bin the up-down coordinate (for us the y-coordinate). For example,\n        if `height_bins = [0.1, 1]` then\n        all y-values < 0.1 will be mapped to 0, all y values in [0.1, 1) will be mapped to 1, and\n        all y-values >= 1 will be mapped to 2.\n        **Importantly:** these y-values will first be recentered by the `min_xyz` value passed when\n        calling `reset(...)`.\n    device : A `torch.device` on which to run computations. If this device is a GPU you can potentially\n        obtain significant speed-ups.\n    \"\"\"\n\n    def __init__(\n        self,\n        fov: float,\n        vision_range_in_cm: int,\n        map_size_in_cm: int,\n        resolution_in_cm: int,\n        height_bins: Sequence[float],\n        return_egocentric_local_context: bool = False,\n        device: torch.device = torch.device(\"cpu\"),\n    ):\n        assert vision_range_in_cm % resolution_in_cm == 0\n\n        self.fov = fov\n        self.vision_range_in_map_units = vision_range_in_cm // resolution_in_cm\n        self.map_size_in_cm = map_size_in_cm\n        self.resolution_in_cm = resolution_in_cm\n        self.height_bins = height_bins\n        self.device = device\n        self.return_egocentric_local_context = return_egocentric_local_context\n\n        self.binned_point_cloud_map = np.zeros(\n            (\n                self.map_size_in_cm // self.resolution_in_cm,\n                self.map_size_in_cm // self.resolution_in_cm,\n                len(self.height_bins) + 1,\n            ),\n            dtype=np.float32,\n        )\n\n        self.min_xyz: Optional[np.ndarray] = None\n\n    def update(\n        self,\n        depth_frame: np.ndarray,\n        camera_xyz: np.ndarray,\n        camera_rotation: float,\n        camera_horizon: float,\n    ) -> Dict[str, np.ndarray]:\n        \"\"\"Updates the map with the input depth frame from the agent.\n\n        See the `allenact.embodiedai.mapping.mapping_utils.point_cloud_utils.project_point_cloud_to_map`\n        function for more information input parameter definitions. **We assume that the input\n        `depth_frame` has depths recorded in meters**.\n\n        # Returns\n        Let `map_size = self.map_size_in_cm // self.resolution_in_cm`. Returns a dictionary with keys-values:\n\n        * `\"egocentric_update\"` - A tensor of shape\n            `(vision_range_in_map_units)x(vision_range_in_map_units)x(len(self.height_bins) + 1)` corresponding\n            to the binned pointcloud after having been centered on the agent and rotated so that\n            points ahead of the agent correspond to larger row indices and points further to the right of the agent\n            correspond to larger column indices. Note that by \"centered\" we mean that one can picture\n             the agent as being positioned at (0, vision_range_in_map_units/2) and facing downward. Each entry in this tensor\n             is a count equaling the number of points in the pointcloud that, once binned, fell into this\n            entry. This is likely the output you want to use if you want to build a model to predict free space from an image.\n        * `\"allocentric_update\"` - A `(map_size)x(map_size)x(len(self.height_bins) + 1)` corresponding\n            to `\"egocentric_update\"` but rotated to the world-space coordinates. This `allocentric_update`\n             is what is used to update the internally stored representation of the map.\n        *  `\"map\"` -  A `(map_size)x(map_size)x(len(self.height_bins) + 1)` tensor corresponding\n            to the sum of all `\"allocentric_update\"` values since the last `reset()`.\n        ```\n        \"\"\"\n        with torch.no_grad():\n            assert self.min_xyz is not None, \"Please call `reset` before `update`.\"\n\n            camera_xyz = (\n                torch.from_numpy(camera_xyz - self.min_xyz).float().to(self.device)\n            )\n\n            try:\n                depth_frame = torch.from_numpy(depth_frame).to(self.device)\n            except ValueError:\n                depth_frame = torch.from_numpy(depth_frame.copy()).to(self.device)\n\n            depth_frame[\n                depth_frame\n                > self.vision_range_in_map_units * self.resolution_in_cm / 100\n            ] = np.NaN\n\n            world_space_point_cloud = depth_frame_to_world_space_xyz(\n                depth_frame=depth_frame,\n                camera_world_xyz=camera_xyz,\n                rotation=camera_rotation,\n                horizon=camera_horizon,\n                fov=self.fov,\n            )\n\n            world_binned_map_update = project_point_cloud_to_map(\n                xyz_points=world_space_point_cloud,\n                bin_axis=\"y\",\n                bins=self.height_bins,\n                map_size=self.binned_point_cloud_map.shape[0],\n                resolution_in_cm=self.resolution_in_cm,\n                flip_row_col=True,\n            )\n\n            # Center the cloud on the agent\n            recentered_point_cloud = world_space_point_cloud - (\n                torch.FloatTensor([1.0, 0.0, 1.0]).to(self.device) * camera_xyz\n            ).reshape((1, 1, 3))\n            # Rotate the cloud so that positive-z is the direction the agent is looking\n            theta = (\n                np.pi * camera_rotation / 180\n            )  # No negative since THOR rotations are already backwards\n            cos_theta = np.cos(theta)\n            sin_theta = np.sin(theta)\n            rotation_transform = torch.FloatTensor(\n                [\n                    [cos_theta, 0, -sin_theta],\n                    [0, 1, 0],  # unchanged\n                    [sin_theta, 0, cos_theta],\n                ]\n            ).to(self.device)\n            rotated_point_cloud = recentered_point_cloud @ rotation_transform.T\n            xoffset = (self.map_size_in_cm / 100) / 2\n            agent_centric_point_cloud = rotated_point_cloud + torch.FloatTensor(\n                [xoffset, 0, 0]\n            ).to(self.device)\n\n            allocentric_update_numpy = world_binned_map_update.cpu().numpy()\n            self.binned_point_cloud_map = (\n                self.binned_point_cloud_map + allocentric_update_numpy\n            )\n\n            agent_centric_binned_map = project_point_cloud_to_map(\n                xyz_points=agent_centric_point_cloud,\n                bin_axis=\"y\",\n                bins=self.height_bins,\n                map_size=self.binned_point_cloud_map.shape[0],\n                resolution_in_cm=self.resolution_in_cm,\n                flip_row_col=True,\n            )\n            vr = self.vision_range_in_map_units\n            vr_div_2 = self.vision_range_in_map_units // 2\n            width_div_2 = agent_centric_binned_map.shape[1] // 2\n            agent_centric_binned_map = agent_centric_binned_map[\n                :vr, (width_div_2 - vr_div_2) : (width_div_2 + vr_div_2), :\n            ]\n\n            to_return = {\n                \"egocentric_update\": agent_centric_binned_map.cpu().numpy(),\n                \"allocentric_update\": allocentric_update_numpy,\n                \"map\": self.binned_point_cloud_map,\n            }\n\n            if self.return_egocentric_local_context:\n                # See the update function of the semantic map sensor for in depth comments regarding the below\n                # Essentially we are simply rotating the full map into the orientation of the agent and then\n                # selecting a smaller region around the agent.\n                theta = -np.pi * camera_rotation / 180\n                cos_theta = np.cos(theta)\n                sin_theta = np.sin(theta)\n                rot_mat = torch.FloatTensor(\n                    [[cos_theta, -sin_theta], [sin_theta, cos_theta]]\n                ).to(self.device)\n\n                move_back_offset = (\n                    -0.5\n                    * (self.vision_range_in_map_units * self.resolution_in_cm / 100)\n                ) * (\n                    rot_mat\n                    @ torch.tensor(\n                        [0, 1], dtype=torch.float, device=self.device\n                    ).unsqueeze(-1)\n                )\n\n                map_size = self.binned_point_cloud_map.shape[0]\n                scaler = 2 * (100 / (self.resolution_in_cm * map_size))\n                offset_to_center_the_agent = (\n                    scaler\n                    * (\n                        torch.tensor(\n                            [\n                                camera_xyz[0],\n                                camera_xyz[2],\n                            ],\n                            dtype=torch.float,\n                            device=self.device,\n                        ).unsqueeze(-1)\n                        + move_back_offset\n                    )\n                    - 1\n                )\n                offset_to_top_of_image = rot_mat @ torch.FloatTensor(\n                    [0, 1.0]\n                ).unsqueeze(1).to(self.device)\n                rotation_and_translate_mat = torch.cat(\n                    (\n                        rot_mat,\n                        offset_to_top_of_image + offset_to_center_the_agent,\n                    ),\n                    dim=1,\n                )\n\n                full_map_tensor = (\n                    torch.tensor(\n                        self.binned_point_cloud_map,\n                        dtype=torch.float,\n                        device=self.device,\n                    )\n                    .unsqueeze(0)\n                    .permute(0, 3, 1, 2)\n                )\n                full_ego_map = (\n                    F.grid_sample(\n                        full_map_tensor,\n                        F.affine_grid(\n                            rotation_and_translate_mat.to(self.device).unsqueeze(0),\n                            full_map_tensor.shape,\n                            align_corners=False,\n                        ),\n                        align_corners=False,\n                    )\n                    .squeeze(0)\n                    .permute(1, 2, 0)\n                )\n\n                egocentric_local_context = full_ego_map[\n                    :vr, (width_div_2 - vr_div_2) : (width_div_2 + vr_div_2), :\n                ]\n\n                to_return[\"egocentric_local_context\"] = (\n                    egocentric_local_context.cpu().numpy()\n                )\n\n            return to_return\n\n    def reset(self, min_xyz: np.ndarray):\n        \"\"\"Reset the map.\n\n        Resets the internally stored map.\n\n        # Parameters\n        min_xyz : An array of size (3,) corresponding to the minimum possible x, y, and z values that will be observed\n            as a point in a pointcloud when calling `.update(...)`. The (world-space) maps returned by calls to `update`\n            will have been normalized so the (0,0,:) entry corresponds to these minimum values.\n        \"\"\"\n        self.min_xyz = min_xyz\n        self.binned_point_cloud_map = np.zeros_like(self.binned_point_cloud_map)\n\n\nclass ObjectHull2d:\n    def __init__(\n        self,\n        object_id: str,\n        object_type: str,\n        hull_points: Union[np.ndarray, Sequence[Sequence[float]]],\n    ):\n        \"\"\"A class used to represent 2d convex hulls of objects when projected\n        to the ground plane.\n\n        # Parameters\n        object_id : A unique id for the object.\n        object_type : The type of the object.\n        hull_points : A Nx2 matrix with `hull_points[:, 0]` being the x coordinates and `hull_points[:, 1]` being\n            the `z` coordinates (this is using the Unity game engine conventions where the `y` axis is up/down).\n        \"\"\"\n        self.object_id = object_id\n        self.object_type = object_type\n        self.hull_points = (\n            hull_points\n            if isinstance(hull_points, np.ndarray)\n            else np.array(hull_points)\n        )\n\n\nclass SemanticMapBuilder(object):\n    \"\"\"Class used to iteratively construct a semantic map based on input depth\n    maps (i.e. pointclouds).\n\n    Adapted from https://github.com/devendrachaplot/Neural-SLAM\n\n    This class can be used to (iteratively) construct a semantic map of objects in the environment.\n\n    This map is similar to that generated by `BinnedPointCloudMapBuilder` (see its documentation for\n    more information) but the various channels correspond to different object types. Thus\n    if the `(i,j,k)` entry of a map generated by this function is `True`, this means that an\n    object of type `k` is present in position `i,j` in the map. In particular, by \"present\" we mean that,\n    after projecting the object to the ground plane and taking the convex hull of the resulting\n    2d object, a non-trivial portion of this convex hull overlaps the `i,j` position.\n\n    For attribute information, see the documentation of the `BinnedPointCloudMapBuilder` class. The\n    only attribute present in this class that is not present in `BinnedPointCloudMapBuilder` is\n    `ordered_object_types` which corresponds to a list of unique object types where\n    object type `ordered_object_types[i]` will correspond to the `i`th channel of the map\n    generated by this class.\n    \"\"\"\n\n    def __init__(\n        self,\n        fov: float,\n        vision_range_in_cm: int,\n        map_size_in_cm: int,\n        resolution_in_cm: int,\n        ordered_object_types: Sequence[str],\n        device: torch.device = torch.device(\"cpu\"),\n    ):\n        self.fov = fov\n        self.vision_range_in_map_units = vision_range_in_cm // resolution_in_cm\n        self.map_size_in_cm = map_size_in_cm\n        self.resolution_in_cm = resolution_in_cm\n        self.ordered_object_types = tuple(ordered_object_types)\n        self.device = device\n\n        self.object_type_to_index = {\n            ot: i for i, ot in enumerate(self.ordered_object_types)\n        }\n\n        self.ground_truth_semantic_map = np.zeros(\n            (\n                self.map_size_in_cm // self.resolution_in_cm,\n                self.map_size_in_cm // self.resolution_in_cm,\n                len(self.ordered_object_types),\n            ),\n            dtype=np.uint8,\n        )\n        self.explored_mask = np.zeros(\n            (\n                self.map_size_in_cm // self.resolution_in_cm,\n                self.map_size_in_cm // self.resolution_in_cm,\n                1,\n            ),\n            dtype=bool,\n        )\n\n        self.min_xyz: Optional[np.ndarray] = None\n\n    @staticmethod\n    def randomly_color_semantic_map(\n        map: Union[np.ndarray, torch.Tensor], threshold: float = 0.5, seed: int = 1\n    ) -> np.ndarray:\n        if not isinstance(map, np.ndarray):\n            map = np.array(map)\n\n        rnd = random.Random(seed)\n        semantic_int_mat = (\n            (map >= threshold)\n            * np.array(list(range(1, map.shape[-1] + 1))).reshape((1, 1, -1))\n        ).max(-1)\n        # noinspection PyTypeChecker\n        return np.uint8(\n            np.array(\n                [(0, 0, 0)]\n                + [\n                    tuple(rnd.randint(0, 256) for _ in range(3))\n                    for _ in range(map.shape[-1])\n                ]\n            )[semantic_int_mat]\n        )\n\n    def _xzs_to_colrows(self, xzs: np.ndarray):\n        height, width, _ = self.ground_truth_semantic_map.shape\n        return np.clip(\n            np.int32(\n                (\n                    (100 / self.resolution_in_cm)\n                    * (xzs - np.array([[self.min_xyz[0], self.min_xyz[2]]]))\n                )\n            ),\n            a_min=0,\n            a_max=np.array(\n                [width - 1, height - 1]\n            ),  # width then height as we're returns cols then rows\n        )\n\n    def build_ground_truth_map(self, object_hulls: Sequence[ObjectHull2d]):\n        self.ground_truth_semantic_map.fill(0)\n\n        height, width, _ = self.ground_truth_semantic_map.shape\n        for object_hull in object_hulls:\n            ot = object_hull.object_type\n\n            if ot in self.object_type_to_index:\n                ind = self.object_type_to_index[ot]\n\n                self.ground_truth_semantic_map[:, :, ind : (ind + 1)] = (\n                    cv2.fillConvexPoly(\n                        img=np.array(\n                            self.ground_truth_semantic_map[:, :, ind : (ind + 1)],\n                            dtype=np.uint8,\n                        ),\n                        points=self._xzs_to_colrows(np.array(object_hull.hull_points)),\n                        color=255,\n                    )\n                )\n\n    def update(\n        self,\n        depth_frame: np.ndarray,\n        camera_xyz: np.ndarray,\n        camera_rotation: float,\n        camera_horizon: float,\n    ) -> Dict[str, np.ndarray]:\n        \"\"\"Updates the map with the input depth frame from the agent.\n\n        See the documentation for `BinnedPointCloudMapBuilder.update`,\n        the inputs and outputs are similar except that channels are used\n        to represent the presence/absence of objects of given types.\n        Unlike `BinnedPointCloudMapBuilder.update`, this function also\n        returns two masks with keys `\"egocentric_mask\"` and `\"mask\"`\n        that can be used to determine what portions of the map have been\n        observed by the agent so far in the egocentric and world-space\n        reference frames respectively.\n        \"\"\"\n        with torch.no_grad():\n            assert self.min_xyz is not None\n\n            camera_xyz = torch.from_numpy(camera_xyz - self.min_xyz).to(self.device)\n            map_size = self.ground_truth_semantic_map.shape[0]\n\n            depth_frame = torch.from_numpy(depth_frame).to(self.device)\n            depth_frame[\n                depth_frame\n                > self.vision_range_in_map_units * self.resolution_in_cm / 100\n            ] = np.NaN\n\n            world_space_point_cloud = depth_frame_to_world_space_xyz(\n                depth_frame=depth_frame,\n                camera_world_xyz=camera_xyz,\n                rotation=camera_rotation,\n                horizon=camera_horizon,\n                fov=self.fov,\n            )\n\n            world_newly_explored = (\n                project_point_cloud_to_map(\n                    xyz_points=world_space_point_cloud,\n                    bin_axis=\"y\",\n                    bins=[],\n                    map_size=map_size,\n                    resolution_in_cm=self.resolution_in_cm,\n                    flip_row_col=True,\n                )\n                > 0.001\n            )\n            world_update_and_mask = torch.cat(\n                (\n                    torch.logical_and(\n                        torch.from_numpy(self.ground_truth_semantic_map).to(\n                            self.device\n                        ),\n                        world_newly_explored,\n                    ),\n                    world_newly_explored,\n                ),\n                dim=-1,\n            ).float()\n            world_update_and_mask_for_sample = world_update_and_mask.unsqueeze(\n                0\n            ).permute(0, 3, 1, 2)\n\n            # We now use grid sampling to rotate world_update_for_sample into the egocentric coordinate\n            # frame of the agent so that the agent's forward direction is downwards in the tensor\n            # (and it's right side is to the right in the image, this means that right/left\n            # when taking the perspective of the agent in the image). This convention aligns with\n            # what's expected by grid_sample where +x corresponds to +cols and +z corresponds to +rows.\n            # Here also the rows/cols have been normalized so that the center of the image is at (0,0)\n            # and the bottom right is at (1,1).\n\n            # Mentally you can think of the output from the F.affine_grid function as you wanting\n            # rotating/translating an axis-aligned square on the image-to-be-sampled and then\n            # copying whatever is in this square to a new image. Note that the translation always\n            # happens in the global reference frame after the rotation. We'll start by rotating\n            # the square so that the the agent's z direction is downwards in the image.\n            # Since the global axis of the map and the grid sampling are aligned, this requires\n            # rotating the square by the rotation of the agent. As rotation is negative the usual\n            # standard in THOR, we need to negate the rotation of the agent.\n            theta = -np.pi * camera_rotation / 180\n\n            # Here form the rotation matrix\n            cos_theta = np.cos(theta)\n            sin_theta = np.sin(theta)\n            rot_mat = torch.FloatTensor(\n                [[cos_theta, -sin_theta], [sin_theta, cos_theta]]\n            ).to(self.device)\n\n            # Now we need to figure out the translation. For an intuitive understanding, we break this\n            # translation into two different \"offsets\". The first offset centers the square on the\n            # agent's current location:\n            scaler = 2 * (100 / (self.resolution_in_cm * map_size))\n            offset_to_center_the_agent = (\n                scaler\n                * torch.FloatTensor([camera_xyz[0], camera_xyz[2]])\n                .unsqueeze(-1)\n                .to(self.device)\n                - 1\n            )\n            # The second offset moves the square in the direction of the agent's z direction\n            # so that the output image will have the agent's view starting directly at the\n            # top of the image.\n            offset_to_top_of_image = rot_mat @ torch.FloatTensor([0, 1.0]).unsqueeze(\n                1\n            ).to(self.device)\n            rotation_and_translate_mat = torch.cat(\n                (\n                    rot_mat,\n                    offset_to_top_of_image + offset_to_center_the_agent,\n                ),\n                dim=1,\n            )\n\n            ego_update_and_mask = F.grid_sample(\n                world_update_and_mask_for_sample.to(self.device),\n                F.affine_grid(\n                    rotation_and_translate_mat.to(self.device).unsqueeze(0),\n                    world_update_and_mask_for_sample.shape,\n                    align_corners=False,\n                ),\n                align_corners=False,\n            )\n\n            # All that's left now is to crop out the portion of the transformed tensor that we actually\n            # care about (i.e. the portion corresponding to the agent's `self.vision_range_in_map_units`.\n            vr = self.vision_range_in_map_units\n            half_vr = vr // 2\n            center = self.map_size_in_cm // (2 * self.resolution_in_cm)\n            cropped = ego_update_and_mask[\n                :, :, :vr, (center - half_vr) : (center + half_vr)\n            ]\n\n            np.logical_or(\n                self.explored_mask,\n                world_newly_explored.cpu().numpy(),\n                out=self.explored_mask,\n            )\n\n            return {\n                \"egocentric_update\": cropped[0, :-1].permute(1, 2, 0).cpu().numpy(),\n                \"egocentric_mask\": (cropped[0, -1:].view(vr, vr, 1) > 0.001)\n                .cpu()\n                .numpy(),\n                \"explored_mask\": np.array(self.explored_mask),\n                \"map\": np.logical_and(\n                    self.explored_mask, (self.ground_truth_semantic_map > 0)\n                ),\n            }\n\n    def reset(self, min_xyz: np.ndarray, object_hulls: Sequence[ObjectHull2d]):\n        \"\"\"Reset the map.\n\n        Resets the internally stored map.\n\n        # Parameters\n        min_xyz : An array of size (3,) corresponding to the minimum possible x, y, and z values that will be observed\n            as a point in a pointcloud when calling `.update(...)`. The (world-space) maps returned by calls to `update`\n            will have been normalized so the (0,0,:) entry corresponds to these minimum values.\n        object_hulls : The object hulls corresponding to objects in the scene. These will be used to\n            construct the map.\n        \"\"\"\n        self.min_xyz = min_xyz\n        self.build_ground_truth_map(object_hulls=object_hulls)\n"
  },
  {
    "path": "allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.py",
    "content": "# MIT License\n#\n# Original Copyright (c) 2020 Devendra Chaplot\n#\n# Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\nimport math\nfrom typing import Optional, Sequence, cast\n\nimport numpy as np\nimport torch\n\nfrom allenact_plugins.ithor_plugin.ithor_util import vertical_to_horizontal_fov\n\n\ndef camera_space_xyz_to_world_xyz(\n    camera_space_xyzs: torch.Tensor,\n    camera_world_xyz: torch.Tensor,\n    rotation: float,\n    horizon: float,\n) -> torch.Tensor:\n    \"\"\"Transforms xyz coordinates in the camera's coordinate frame to world-\n    space (global) xyz frame.\n\n    This code has been adapted from https://github.com/devendrachaplot/Neural-SLAM.\n\n    **IMPORTANT:** We use the conventions from the Unity game engine. In particular:\n\n    * A rotation of 0 corresponds to facing north.\n    * Positive rotations correspond to CLOCKWISE rotations. That is a rotation of 90 degrees corresponds\n        to facing east. **THIS IS THE OPPOSITE CONVENTION OF THE ONE GENERALLY USED IN MATHEMATICS.**\n    * When facing NORTH (rotation==0) moving ahead by 1 meter results in the the z coordinate\n        increasing by 1. Moving to the right by 1 meter corresponds to increasing the x coordinate by 1.\n         Finally moving upwards by 1 meter corresponds to increasing the y coordinate by 1.\n         **Having x,z as the ground plane in this way is common in computer graphics but is different than\n         the usual mathematical convention of having z be \"up\".**\n    * The horizon corresponds to how far below the horizontal the camera is facing. I.e. a horizon\n        of 30 corresponds to the camera being angled downwards at an angle of 30 degrees.\n\n    # Parameters\n    camera_space_xyzs : A 3xN matrix of xyz coordinates in the camera's reference frame.\n        Here `x, y, z = camera_space_xyzs[:, i]` should equal the xyz coordinates for the ith point.\n    camera_world_xyz : The camera's xyz position in the world reference frame.\n    rotation : The world-space rotation (in degrees) of the camera.\n    horizon : The horizon (in degrees) of the camera.\n\n    # Returns\n    3xN tensor with entry [:, i] is the xyz world-space coordinate corresponding to the camera-space\n    coordinate camera_space_xyzs[:, i]\n    \"\"\"\n    # Adapted from https://github.com/devendrachaplot/Neural-SLAM.\n\n    # First compute the transformation that points undergo\n    # due to the camera's horizon\n    psi = -horizon * np.pi / 180\n    cos_psi = np.cos(psi)\n    sin_psi = np.sin(psi)\n    # fmt: off\n    horizon_transform = camera_space_xyzs.new(\n        [\n            [1, 0, 0], # unchanged\n            [0, cos_psi, sin_psi],\n            [0, -sin_psi, cos_psi,],\n        ],\n    )\n    # fmt: on\n\n    # Next compute the transformation that points undergo\n    # due to the agent's rotation about the y-axis\n    phi = -rotation * np.pi / 180\n    cos_phi = np.cos(phi)\n    sin_phi = np.sin(phi)\n    # fmt: off\n    rotation_transform = camera_space_xyzs.new(\n        [\n            [cos_phi, 0, -sin_phi],\n            [0, 1, 0], # unchanged\n            [sin_phi, 0, cos_phi],],\n    )\n    # fmt: on\n\n    # Apply the above transformations\n    view_points = (rotation_transform @ horizon_transform) @ camera_space_xyzs\n\n    # Translate the points w.r.t. the camera's position in world space.\n    world_points = view_points + camera_world_xyz[:, None]\n    return world_points\n\n\ndef depth_frame_to_camera_space_xyz(\n    depth_frame: torch.Tensor, mask: Optional[torch.Tensor], fov: float = 90\n) -> torch.Tensor:\n    \"\"\"Transforms a input depth map into a collection of xyz points (i.e. a\n    point cloud) in the camera's coordinate frame.\n\n    # Parameters\n    depth_frame : A square depth map, i.e. an MxM matrix with entry `depth_frame[i, j]` equaling\n        the distance from the camera to nearest surface at pixel (i,j).\n    mask : An optional boolean mask of the same size (MxM) as the input depth. Only values\n        where this mask are true will be included in the returned matrix of xyz coordinates. If\n        `None` then no pixels will be masked out (so the returned matrix of xyz points will have\n        dimension 3x(M*M)\n    fov: The field of view of the camera.\n\n    # Returns\n\n    A 3xN matrix with entry [:, i] equalling a the xyz coordinates (in the camera's coordinate\n    frame) of a point in the point cloud corresponding to the input depth frame.\n    \"\"\"\n    h, w = depth_frame.shape[:2]\n    if mask is None:\n        mask = torch.ones_like(depth_frame, dtype=torch.bool)\n\n    # pixel centers\n    camera_space_yx_offsets = (\n        torch.stack(torch.where(mask))\n        + 0.5  # Offset by 0.5 so that we are in the middle of the pixel\n    )\n\n    # Subtract center\n    camera_space_yx_offsets[:1] -= h / 2.0\n    camera_space_yx_offsets[1:] -= w / 2.0\n\n    # Make \"up\" in y be positive\n    camera_space_yx_offsets[0, :] *= -1\n\n    # Put points on the clipping plane\n    camera_space_yx_offsets[:1] *= (2.0 / h) * math.tan((fov / 2) / 180 * math.pi)\n    camera_space_yx_offsets[1:] *= (2.0 / w) * math.tan(\n        (vertical_to_horizontal_fov(fov, height=h, width=w) / 2) / 180 * math.pi\n    )\n\n    # noinspection PyArgumentList\n    camera_space_xyz = torch.cat(\n        [\n            camera_space_yx_offsets[1:, :],  # This is x\n            camera_space_yx_offsets[:1, :],  # This is y\n            torch.ones_like(camera_space_yx_offsets[:1, :]),\n        ],\n        axis=0,\n    )\n\n    return camera_space_xyz * depth_frame[mask][None, :]\n\n\ndef depth_frame_to_world_space_xyz(\n    depth_frame: torch.Tensor,\n    camera_world_xyz: torch.Tensor,\n    rotation: float,\n    horizon: float,\n    fov: float,\n):\n    \"\"\"Transforms a input depth map into a collection of xyz points (i.e. a\n    point cloud) in the world-space coordinate frame.\n\n    **IMPORTANT:** We use the conventions from the Unity game engine. In particular:\n\n    * A rotation of 0 corresponds to facing north.\n    * Positive rotations correspond to CLOCKWISE rotations. That is a rotation of 90 degrees corresponds\n        to facing east. **THIS IS THE OPPOSITE CONVENTION OF THE ONE GENERALLY USED IN MATHEMATICS.**\n    * When facing NORTH (rotation==0) moving ahead by 1 meter results in the the z coordinate\n        increasing by 1. Moving to the right by 1 meter corresponds to increasing the x coordinate by 1.\n         Finally moving upwards by 1 meter corresponds to increasing the y coordinate by 1.\n         **Having x,z as the ground plane in this way is common in computer graphics but is different than\n         the usual mathematical convention of having z be \"up\".**\n    * The horizon corresponds to how far below the horizontal the camera is facing. I.e. a horizon\n        of 30 corresponds to the camera being angled downwards at an angle of 30 degrees.\n\n    # Parameters\n    depth_frame : A square depth map, i.e. an MxM matrix with entry `depth_frame[i, j]` equaling\n        the distance from the camera to nearest surface at pixel (i,j).\n    mask : An optional boolean mask of the same size (MxM) as the input depth. Only values\n        where this mask are true will be included in the returned matrix of xyz coordinates. If\n        `None` then no pixels will be masked out (so the returned matrix of xyz points will have\n        dimension 3x(M*M)\n    camera_space_xyzs : A 3xN matrix of xyz coordinates in the camera's reference frame.\n        Here `x, y, z = camera_space_xyzs[:, i]` should equal the xyz coordinates for the ith point.\n    camera_world_xyz : The camera's xyz position in the world reference frame.\n    rotation : The world-space rotation (in degrees) of the camera.\n    horizon : The horizon (in degrees) of the camera.\n    fov: The field of view of the camera.\n\n    # Returns\n\n    A 3xN matrix with entry [:, i] equalling a the xyz coordinates (in the world coordinate\n    frame) of a point in the point cloud corresponding to the input depth frame.\n    \"\"\"\n\n    camera_space_xyz = depth_frame_to_camera_space_xyz(\n        depth_frame=depth_frame, mask=None, fov=fov\n    )\n\n    world_points = camera_space_xyz_to_world_xyz(\n        camera_space_xyzs=camera_space_xyz,\n        camera_world_xyz=camera_world_xyz,\n        rotation=rotation,\n        horizon=horizon,\n    )\n\n    return world_points.view(3, *depth_frame.shape).permute(1, 2, 0)\n\n\ndef project_point_cloud_to_map(\n    xyz_points: torch.Tensor,\n    bin_axis: str,\n    bins: Sequence[float],\n    map_size: int,\n    resolution_in_cm: int,\n    flip_row_col: bool,\n):\n    \"\"\"Bins an input point cloud into a map tensor with the bins equaling the\n    channels.\n\n    This code has been adapted from https://github.com/devendrachaplot/Neural-SLAM.\n\n    # Parameters\n    xyz_points : (x,y,z) pointcloud(s) as a torch.Tensor of shape (... x height x width x 3).\n        All operations are vectorized across the `...` dimensions.\n    bin_axis : Either \"x\", \"y\", or \"z\", the axis which should be binned by the values in `bins`.\n        If you have generated your point clouds with any of the other functions in the `point_cloud_utils`\n        module you almost certainly want this to be \"y\" as this is the default upwards dimension.\n    bins: The values by which to bin along `bin_axis`, see the `bins` parameter of `np.digitize`\n        for more info.\n    map_size : The axes not specified by `bin_axis` will be be divided by `resolution_in_cm / 100`\n        and then rounded to the nearest integer. They are then expected to have their values\n        within the interval [0, ..., map_size - 1].\n    resolution_in_cm: The resolution_in_cm, in cm, of the map output from this function. Every\n        grid square of the map corresponds to a (`resolution_in_cm`x`resolution_in_cm`) square\n        in space.\n    flip_row_col: Should the rows/cols of the map be flipped? See the 'Returns' section below for more\n        info.\n\n    # Returns\n    A collection of maps of shape (... x map_size x map_size x (len(bins)+1)), note that bin_axis\n    has been moved to the last index of this returned map, the other two axes stay in their original\n    order unless `flip_row_col` has been called in which case they are reversed (useful as often\n    rows should correspond to y or z instead of x).\n    \"\"\"\n    bin_dim = [\"x\", \"y\", \"z\"].index(bin_axis)\n\n    start_shape = xyz_points.shape\n    xyz_points = xyz_points.reshape([-1, *start_shape[-3:]])\n    num_clouds, h, w, _ = xyz_points.shape\n\n    if not flip_row_col:\n        new_order = [i for i in [0, 1, 2] if i != bin_dim] + [bin_dim]\n    else:\n        new_order = [i for i in [2, 1, 0] if i != bin_dim] + [bin_dim]\n\n    uvw_points = cast(\n        torch.Tensor, torch.stack([xyz_points[..., i] for i in new_order], dim=-1)\n    )\n\n    num_bins = len(bins) + 1\n\n    isnotnan = ~torch.isnan(xyz_points[..., 0])\n\n    uvw_points_binned: torch.Tensor = torch.cat(\n        (\n            torch.round(100 * uvw_points[..., :-1] / resolution_in_cm).long(),\n            torch.bucketize(\n                uvw_points[..., -1:].contiguous(), boundaries=uvw_points.new(bins)\n            ),\n        ),\n        dim=-1,\n    )\n\n    maxes = (\n        xyz_points.new()\n        .long()\n        .new([map_size, map_size, num_bins])\n        .reshape((1, 1, 1, 3))\n    )\n\n    isvalid = torch.logical_and(\n        torch.logical_and(\n            (uvw_points_binned >= 0).all(-1),\n            (uvw_points_binned < maxes).all(-1),\n        ),\n        isnotnan,\n    )\n\n    uvw_points_binned_with_index_mat = torch.cat(\n        (\n            torch.repeat_interleave(\n                torch.arange(0, num_clouds).to(xyz_points.device), h * w\n            ).reshape(-1, 1),\n            uvw_points_binned.reshape(-1, 3),\n        ),\n        dim=1,\n    )\n\n    uvw_points_binned_with_index_mat[~isvalid.reshape(-1), :] = 0\n    ind = (\n        uvw_points_binned_with_index_mat[:, 0] * (map_size * map_size * num_bins)\n        + uvw_points_binned_with_index_mat[:, 1] * (map_size * num_bins)\n        + uvw_points_binned_with_index_mat[:, 2] * num_bins\n        + uvw_points_binned_with_index_mat[:, 3]\n    )\n    ind[~isvalid.reshape(-1)] = 0\n    count = torch.bincount(\n        ind.view(-1),\n        isvalid.view(-1).long(),\n        minlength=num_clouds * map_size * map_size * num_bins,\n    )\n\n    return count.view(*start_shape[:-3], map_size, map_size, num_bins)\n\n\n################\n# FOR DEBUGGNG #\n################\n# The below functions are versions of the above which, because of their reliance on\n# numpy functions, cannot use GPU acceleration. These are possibly useful for debugging,\n# performance comparisons, or for validating that the above GPU variants work properly.\n\n\ndef _cpu_only_camera_space_xyz_to_world_xyz(\n    camera_space_xyzs: np.ndarray,\n    camera_world_xyz: np.ndarray,\n    rotation: float,\n    horizon: float,\n):\n    # Adapted from https://github.com/devendrachaplot/Neural-SLAM.\n\n    # view_position = 3, world_points = 3 x N\n    # NOTE: camera_position is not equal to agent_position!!\n\n    # First compute the transformation that points undergo\n    # due to the camera's horizon\n    psi = -horizon * np.pi / 180\n    cos_psi = np.cos(psi)\n    sin_psi = np.sin(psi)\n    # fmt: off\n    horizon_transform = np.array(\n        [\n            [1, 0, 0], # unchanged\n            [0, cos_psi, sin_psi],\n            [0, -sin_psi, cos_psi,],\n        ],\n        np.float64,\n    )\n    # fmt: on\n\n    # Next compute the transformation that points undergo\n    # due to the agent's rotation about the y-axis\n    phi = -rotation * np.pi / 180\n    cos_phi = np.cos(phi)\n    sin_phi = np.sin(phi)\n    # fmt: off\n    rotation_transform = np.array(\n        [\n            [cos_phi, 0, -sin_phi],\n            [0, 1, 0], # unchanged\n            [sin_phi, 0, cos_phi],],\n        np.float64,\n    )\n    # fmt: on\n\n    # Apply the above transformations\n    view_points = (rotation_transform @ horizon_transform) @ camera_space_xyzs\n\n    # Translate the points w.r.t. the camera's position in world space.\n    world_points = view_points + camera_world_xyz[:, None]\n    return world_points\n\n\ndef _cpu_only_depth_frame_to_camera_space_xyz(\n    depth_frame: np.ndarray, mask: Optional[np.ndarray], fov: float = 90\n):\n    \"\"\"\"\"\"\n    assert (\n        len(depth_frame.shape) == 2 and depth_frame.shape[0] == depth_frame.shape[1]\n    ), f\"depth has shape {depth_frame.shape}, we only support (N, N) shapes for now.\"\n\n    resolution = depth_frame.shape[0]\n    if mask is None:\n        mask = np.ones(depth_frame.shape, dtype=bool)\n\n    # pixel centers\n    camera_space_yx_offsets = (\n        np.stack(np.where(mask))\n        + 0.5  # Offset by 0.5 so that we are in the middle of the pixel\n    )\n\n    # Subtract center\n    camera_space_yx_offsets -= resolution / 2.0\n\n    # Make \"up\" in y be positive\n    camera_space_yx_offsets[0, :] *= -1\n\n    # Put points on the clipping plane\n    camera_space_yx_offsets *= (2.0 / resolution) * math.tan((fov / 2) / 180 * math.pi)\n\n    camera_space_xyz = np.concatenate(\n        [\n            camera_space_yx_offsets[1:, :],  # This is x\n            camera_space_yx_offsets[:1, :],  # This is y\n            np.ones_like(camera_space_yx_offsets[:1, :]),\n        ],\n        axis=0,\n    )\n\n    return camera_space_xyz * depth_frame[mask][None, :]\n\n\ndef _cpu_only_depth_frame_to_world_space_xyz(\n    depth_frame: np.ndarray,\n    camera_world_xyz: np.ndarray,\n    rotation: float,\n    horizon: float,\n    fov: float,\n):\n    camera_space_xyz = _cpu_only_depth_frame_to_camera_space_xyz(\n        depth_frame=depth_frame, mask=None, fov=fov\n    )\n\n    world_points = _cpu_only_camera_space_xyz_to_world_xyz(\n        camera_space_xyzs=camera_space_xyz,\n        camera_world_xyz=camera_world_xyz,\n        rotation=rotation,\n        horizon=horizon,\n    )\n\n    return world_points.reshape((3, *depth_frame.shape)).transpose((1, 2, 0))\n\n\ndef _cpu_only_project_point_cloud_to_map(\n    xyz_points: np.ndarray,\n    bin_axis: str,\n    bins: Sequence[float],\n    map_size: int,\n    resolution_in_cm: int,\n    flip_row_col: bool,\n):\n    \"\"\"Bins points into  bins.\n\n    Adapted from https://github.com/devendrachaplot/Neural-SLAM.\n\n    # Parameters\n    xyz_points : (x,y,z) point clouds as a np.ndarray of shape (... x height x width x 3). (x,y,z)\n        should be coordinates specified in meters.\n    bin_axis : Either \"x\", \"y\", or \"z\", the axis which should be binned by the values in `bins`\n    bins: The values by which to bin along `bin_axis`, see the `bins` parameter of `np.digitize`\n        for more info.\n    map_size : The axes not specified by `bin_axis` will be be divided by `resolution_in_cm / 100`\n        and then rounded to the nearest integer. They are then expected to have their values\n        within the interval [0, ..., map_size - 1].\n    resolution_in_cm: The resolution_in_cm, in cm, of the map output from this function. Every\n        grid square of the map corresponds to a (`resolution_in_cm`x`resolution_in_cm`) square\n        in space.\n    flip_row_col: Should the rows/cols of the map be flipped\n\n    # Returns\n    A collection of maps of shape (... x map_size x map_size x (len(bins)+1)), note that bin_axis\n    has been moved to the last index of this returned map, the other two axes stay in their original\n    order unless `flip_row_col` has been called in which case they are reversed (useful if you give\n    points as often rows should correspond to y or z instead of x).\n    \"\"\"\n    bin_dim = [\"x\", \"y\", \"z\"].index(bin_axis)\n\n    start_shape = xyz_points.shape\n    xyz_points = xyz_points.reshape([-1, *start_shape[-3:]])\n    num_clouds, h, w, _ = xyz_points.shape\n\n    if not flip_row_col:\n        new_order = [i for i in [0, 1, 2] if i != bin_dim] + [bin_dim]\n    else:\n        new_order = [i for i in [2, 1, 0] if i != bin_dim] + [bin_dim]\n\n    uvw_points: np.ndarray = np.stack([xyz_points[..., i] for i in new_order], axis=-1)\n\n    num_bins = len(bins) + 1\n\n    isnotnan = ~np.isnan(xyz_points[..., 0])\n\n    uvw_points_binned = np.concatenate(\n        (\n            np.round(100 * uvw_points[..., :-1] / resolution_in_cm).astype(np.int32),\n            np.digitize(uvw_points[..., -1:], bins=bins).astype(np.int32),\n        ),\n        axis=-1,\n    )\n\n    maxes = np.array([map_size, map_size, num_bins]).reshape((1, 1, 1, 3))\n\n    isvalid = np.logical_and.reduce(\n        (\n            (uvw_points_binned >= 0).all(-1),\n            (uvw_points_binned < maxes).all(-1),\n            isnotnan,\n        )\n    )\n\n    uvw_points_binned_with_index_mat = np.concatenate(\n        (\n            np.repeat(np.arange(0, num_clouds), h * w).reshape(-1, 1),\n            uvw_points_binned.reshape(-1, 3),\n        ),\n        axis=1,\n    )\n\n    uvw_points_binned_with_index_mat[~isvalid.reshape(-1), :] = 0\n    ind = np.ravel_multi_index(\n        uvw_points_binned_with_index_mat.transpose(),\n        (num_clouds, map_size, map_size, num_bins),\n    )\n    ind[~isvalid.reshape(-1)] = 0\n    count = np.bincount(\n        ind.ravel(),\n        isvalid.ravel().astype(np.int32),\n        minlength=num_clouds * map_size * map_size * num_bins,\n    )\n\n    return count.reshape([*start_shape[:-3], map_size, map_size, num_bins])\n"
  },
  {
    "path": "allenact/embodiedai/models/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/embodiedai/models/aux_models.py",
    "content": "# Original work Copyright (c) Facebook, Inc. and its affiliates.\n# Modified work Copyright (c) Allen Institute for AI\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n\"\"\"Several of the models defined in this file are modified versions of those\nfound in https://github.com/joel99/habitat-pointnav-\naux/blob/master/habitat_baselines/\"\"\"\n\nimport torch\nimport torch.nn as nn\n\nfrom allenact.embodiedai.aux_losses.losses import (\n    InverseDynamicsLoss,\n    TemporalDistanceLoss,\n    CPCALoss,\n    CPCASoftMaxLoss,\n)\nfrom allenact.utils.model_utils import FeatureEmbedding\n\n\nclass AuxiliaryModel(nn.Module):\n    \"\"\"The class of defining the models for all kinds of self-supervised\n    auxiliary tasks.\"\"\"\n\n    def __init__(\n        self,\n        aux_uuid: str,\n        action_dim: int,\n        obs_embed_dim: int,\n        belief_dim: int,\n        action_embed_size: int = 4,\n        cpca_classifier_hidden_dim: int = 32,\n        cpca_softmax_dim: int = 128,\n    ):\n        super().__init__()\n        self.aux_uuid = aux_uuid\n        self.action_dim = action_dim\n        self.obs_embed_dim = obs_embed_dim\n        self.belief_dim = belief_dim\n        self.action_embed_size = action_embed_size\n        self.cpca_classifier_hidden_dim = cpca_classifier_hidden_dim\n        self.cpca_softmax_dim = cpca_softmax_dim\n\n        self.initialize_model_given_aux_uuid(self.aux_uuid)\n\n    def initialize_model_given_aux_uuid(self, aux_uuid: str):\n        if aux_uuid == InverseDynamicsLoss.UUID:\n            self.init_inverse_dynamics()\n        elif aux_uuid == TemporalDistanceLoss.UUID:\n            self.init_temporal_distance()\n        elif CPCALoss.UUID in aux_uuid:  # the CPCA family with various k\n            self.init_cpca()\n        elif CPCASoftMaxLoss.UUID in aux_uuid:\n            self.init_cpca_softmax()\n        else:\n            raise ValueError(\"Unknown Auxiliary Loss UUID\")\n\n    def init_inverse_dynamics(self):\n        self.decoder = nn.Linear(\n            2 * self.obs_embed_dim + self.belief_dim, self.action_dim\n        )\n\n    def init_temporal_distance(self):\n        self.decoder = nn.Linear(2 * self.obs_embed_dim + self.belief_dim, 1)\n\n    def init_cpca(self):\n        ## Auto-regressive model to predict future context\n        self.action_embedder = FeatureEmbedding(\n            self.action_dim + 1, self.action_embed_size\n        )\n        # NOTE: add extra 1 in embedding dict cuz we will pad zero actions?\n        self.context_model = nn.GRU(self.action_embed_size, self.belief_dim)\n\n        ## Classifier to estimate mutual information\n        self.classifier = nn.Sequential(\n            nn.Linear(\n                self.belief_dim + self.obs_embed_dim, self.cpca_classifier_hidden_dim\n            ),\n            nn.ReLU(),\n            nn.Linear(self.cpca_classifier_hidden_dim, 1),\n        )\n\n    def init_cpca_softmax(self):\n        # same as CPCA with extra MLP for contrastive losses.\n        ###\n        self.action_embedder = FeatureEmbedding(\n            self.action_dim + 1, self.action_embed_size\n        )\n        # NOTE: add extra 1 in embedding dict cuz we will pad zero actions?\n        self.context_model = nn.GRU(self.action_embed_size, self.belief_dim)\n\n        ## Classifier to estimate mutual information\n        self.visual_mlp = nn.Sequential(\n            nn.Linear(self.obs_embed_dim, self.cpca_classifier_hidden_dim),\n            nn.ReLU(),\n            nn.Linear(self.cpca_classifier_hidden_dim, self.cpca_softmax_dim),\n        )\n\n        self.belief_mlp = nn.Sequential(\n            nn.Linear(self.belief_dim, self.cpca_classifier_hidden_dim),\n            nn.ReLU(),\n            nn.Linear(self.cpca_classifier_hidden_dim, self.cpca_softmax_dim),\n        )\n\n    def forward(self, features: torch.FloatTensor):\n        if self.aux_uuid in [InverseDynamicsLoss.UUID, TemporalDistanceLoss.UUID]:\n            return self.decoder(features)\n        else:\n            raise NotImplementedError(\n                f\"Auxiliary model with UUID {self.aux_uuid} does not support `forward` call.\"\n            )\n"
  },
  {
    "path": "allenact/embodiedai/models/basic_models.py",
    "content": "\"\"\"Basic building block torch networks that can be used across a variety of\ntasks.\"\"\"\n\nfrom typing import (\n    Sequence,\n    Dict,\n    Union,\n    cast,\n    List,\n    Callable,\n    Optional,\n    Tuple,\n    Any,\n)\n\nimport gym\nimport numpy as np\nimport torch\nfrom gym.spaces.dict import Dict as SpaceDict\nimport torch.nn as nn\n\nfrom allenact.algorithms.onpolicy_sync.policy import ActorCriticModel, DistributionType\nfrom allenact.base_abstractions.distributions import CategoricalDistr, Distr\nfrom allenact.base_abstractions.misc import ActorCriticOutput, Memory\nfrom allenact.utils.model_utils import make_cnn, compute_cnn_output\nfrom allenact.utils.system import get_logger\n\n\nclass SimpleCNN(nn.Module):\n    \"\"\"A Simple N-Conv CNN followed by a fully connected layer. Takes in\n    observations (of type gym.spaces.dict) and produces an embedding of the\n    `rgb_uuid` and/or `depth_uuid` components.\n\n    # Attributes\n\n    observation_space : The observation_space of the agent, should have `rgb_uuid` or `depth_uuid` as\n        a component (otherwise it is a blind model).\n    output_size : The size of the embedding vector to produce.\n    \"\"\"\n\n    def __init__(\n        self,\n        observation_space: SpaceDict,\n        output_size: int,\n        rgb_uuid: Optional[str],\n        depth_uuid: Optional[str],\n        layer_channels: Sequence[int] = (32, 64, 32),\n        kernel_sizes: Sequence[Tuple[int, int]] = ((8, 8), (4, 4), (3, 3)),\n        layers_stride: Sequence[Tuple[int, int]] = ((4, 4), (2, 2), (1, 1)),\n        paddings: Sequence[Tuple[int, int]] = ((0, 0), (0, 0), (0, 0)),\n        dilations: Sequence[Tuple[int, int]] = ((1, 1), (1, 1), (1, 1)),\n        flatten: bool = True,\n        output_relu: bool = True,\n    ):\n        \"\"\"Initializer.\n\n        # Parameters\n\n        observation_space : See class attributes documentation.\n        output_size : See class attributes documentation.\n        \"\"\"\n        super().__init__()\n\n        self.rgb_uuid = rgb_uuid\n        if self.rgb_uuid is not None:\n            assert self.rgb_uuid in observation_space.spaces\n            self._n_input_rgb = observation_space.spaces[self.rgb_uuid].shape[2]\n            assert self._n_input_rgb >= 0\n        else:\n            self._n_input_rgb = 0\n\n        self.depth_uuid = depth_uuid\n        if self.depth_uuid is not None:\n            assert self.depth_uuid in observation_space.spaces\n            self._n_input_depth = observation_space.spaces[self.depth_uuid].shape[2]\n            assert self._n_input_depth >= 0\n        else:\n            self._n_input_depth = 0\n\n        if not self.is_blind:\n            # hyperparameters for layers\n            self._cnn_layers_channels = list(layer_channels)\n            self._cnn_layers_kernel_size = list(kernel_sizes)\n            self._cnn_layers_stride = list(layers_stride)\n            self._cnn_layers_paddings = list(paddings)\n            self._cnn_layers_dilations = list(dilations)\n\n            if self._n_input_rgb > 0:\n                input_rgb_cnn_dims = np.array(\n                    observation_space.spaces[self.rgb_uuid].shape[:2], dtype=np.float32\n                )\n                self.rgb_cnn = self.make_cnn_from_params(\n                    output_size=output_size,\n                    input_dims=input_rgb_cnn_dims,\n                    input_channels=self._n_input_rgb,\n                    flatten=flatten,\n                    output_relu=output_relu,\n                )\n\n            if self._n_input_depth > 0:\n                input_depth_cnn_dims = np.array(\n                    observation_space.spaces[self.depth_uuid].shape[:2],\n                    dtype=np.float32,\n                )\n                self.depth_cnn = self.make_cnn_from_params(\n                    output_size=output_size,\n                    input_dims=input_depth_cnn_dims,\n                    input_channels=self._n_input_depth,\n                    flatten=flatten,\n                    output_relu=output_relu,\n                )\n\n    def make_cnn_from_params(\n        self,\n        output_size: int,\n        input_dims: np.ndarray,\n        input_channels: int,\n        flatten: bool,\n        output_relu: bool,\n    ) -> nn.Module:\n        output_dims = input_dims\n        for kernel_size, stride, padding, dilation in zip(\n            self._cnn_layers_kernel_size,\n            self._cnn_layers_stride,\n            self._cnn_layers_paddings,\n            self._cnn_layers_dilations,\n        ):\n            # noinspection PyUnboundLocalVariable\n            output_dims = self._conv_output_dim(\n                dimension=output_dims,\n                padding=np.array(padding, dtype=np.float32),\n                dilation=np.array(dilation, dtype=np.float32),\n                kernel_size=np.array(kernel_size, dtype=np.float32),\n                stride=np.array(stride, dtype=np.float32),\n            )\n\n        # noinspection PyUnboundLocalVariable\n        cnn = make_cnn(\n            input_channels=input_channels,\n            layer_channels=self._cnn_layers_channels,\n            kernel_sizes=self._cnn_layers_kernel_size,\n            strides=self._cnn_layers_stride,\n            paddings=self._cnn_layers_paddings,\n            dilations=self._cnn_layers_dilations,\n            output_height=output_dims[0],\n            output_width=output_dims[1],\n            output_channels=output_size,\n            flatten=flatten,\n            output_relu=output_relu,\n        )\n        self.layer_init(cnn)\n\n        return cnn\n\n    @staticmethod\n    def _conv_output_dim(\n        dimension: Sequence[int],\n        padding: Sequence[int],\n        dilation: Sequence[int],\n        kernel_size: Sequence[int],\n        stride: Sequence[int],\n    ) -> Tuple[int, ...]:\n        \"\"\"Calculates the output height and width based on the input height and\n        width to the convolution layer. For parameter definitions see.\n\n        [here](https://pytorch.org/docs/master/nn.html#torch.nn.Conv2d).\n\n        # Parameters\n\n        dimension : See above link.\n        padding : See above link.\n        dilation : See above link.\n        kernel_size : See above link.\n        stride : See above link.\n        \"\"\"\n        assert len(dimension) == 2\n        out_dimension = []\n        for i in range(len(dimension)):\n            out_dimension.append(\n                int(\n                    np.floor(\n                        (\n                            (\n                                dimension[i]\n                                + 2 * padding[i]\n                                - dilation[i] * (kernel_size[i] - 1)\n                                - 1\n                            )\n                            / stride[i]\n                        )\n                        + 1\n                    )\n                )\n            )\n        return tuple(out_dimension)\n\n    @staticmethod\n    def layer_init(cnn) -> None:\n        \"\"\"Initialize layer parameters using Kaiming normal.\"\"\"\n        for layer in cnn:\n            if isinstance(layer, (nn.Conv2d, nn.Linear)):\n                nn.init.kaiming_normal_(layer.weight, nn.init.calculate_gain(\"relu\"))\n                if layer.bias is not None:\n                    nn.init.constant_(layer.bias, val=0)\n\n    @property\n    def is_blind(self):\n        \"\"\"True if the observation space doesn't include `self.rgb_uuid` or\n        `self.depth_uuid`.\"\"\"\n        return self._n_input_rgb + self._n_input_depth == 0\n\n    def forward(self, observations: Dict[str, torch.Tensor]):  # type: ignore\n        if self.is_blind:\n            return None\n\n        def check_use_agent(new_setting):\n            if use_agent is not None:\n                assert (\n                    use_agent is new_setting\n                ), \"rgb and depth must both use an agent dim or none\"\n            return new_setting\n\n        cnn_output_list: List[torch.Tensor] = []\n        use_agent: Optional[bool] = None\n\n        if self.rgb_uuid is not None:\n            use_agent = check_use_agent(len(observations[self.rgb_uuid].shape) == 6)\n            cnn_output_list.append(\n                compute_cnn_output(self.rgb_cnn, observations[self.rgb_uuid])\n            )\n\n        if self.depth_uuid is not None:\n            use_agent = check_use_agent(len(observations[self.depth_uuid].shape) == 6)\n            cnn_output_list.append(\n                compute_cnn_output(self.depth_cnn, observations[self.depth_uuid])\n            )\n\n        if use_agent:\n            channels_dim = 3  # [step, sampler, agent, channel (, height, width)]\n        else:\n            channels_dim = 2  # [step, sampler, channel (, height, width)]\n\n        return torch.cat(cnn_output_list, dim=channels_dim)\n\n\nclass RNNStateEncoder(nn.Module):\n    \"\"\"A simple RNN-based model playing a role in many baseline embodied-\n    navigation agents.\n\n    See `seq_forward` for more details of how this model is used.\n    \"\"\"\n\n    def __init__(\n        self,\n        input_size: int,\n        hidden_size: int,\n        num_layers: int = 1,\n        rnn_type: str = \"GRU\",\n        trainable_masked_hidden_state: bool = False,\n    ):\n        \"\"\"An RNN for encoding the state in RL. Supports masking the hidden\n        state during various timesteps in the forward lass.\n\n        # Parameters\n\n        input_size : The input size of the RNN.\n        hidden_size : The hidden size.\n        num_layers : The number of recurrent layers.\n        rnn_type : The RNN cell type.  Must be GRU or LSTM.\n        trainable_masked_hidden_state : If `True` the initial hidden state (used at the start of a Task)\n            is trainable (as opposed to being a vector of zeros).\n        \"\"\"\n\n        super().__init__()\n        self._num_recurrent_layers = num_layers\n        self._rnn_type = rnn_type\n\n        self.rnn = getattr(torch.nn, rnn_type)(\n            input_size=input_size, hidden_size=hidden_size, num_layers=num_layers\n        )\n\n        self.trainable_masked_hidden_state = trainable_masked_hidden_state\n        if trainable_masked_hidden_state:\n            self.init_hidden_state = nn.Parameter(\n                0.1 * torch.randn((num_layers, 1, hidden_size)), requires_grad=True\n            )\n\n        self.layer_init()\n\n    def layer_init(self):\n        \"\"\"Initialize the RNN parameters in the model.\"\"\"\n        for name, param in self.rnn.named_parameters():\n            if \"weight\" in name:\n                nn.init.orthogonal_(param)\n            elif \"bias\" in name:\n                nn.init.constant_(param, 0)\n\n    @property\n    def num_recurrent_layers(self) -> int:\n        \"\"\"The number of recurrent layers in the network.\"\"\"\n        return self._num_recurrent_layers * (2 if \"LSTM\" in self._rnn_type else 1)\n\n    def _pack_hidden(\n        self, hidden_states: Union[torch.FloatTensor, Sequence[torch.FloatTensor]]\n    ) -> torch.FloatTensor:\n        \"\"\"Stacks hidden states in an LSTM together (if using a GRU rather than\n        an LSTM this is just the identity).\n\n        # Parameters\n\n        hidden_states : The hidden states to (possibly) stack.\n        \"\"\"\n        if \"LSTM\" in self._rnn_type:\n            hidden_states = cast(\n                torch.FloatTensor,\n                torch.cat([hidden_states[0], hidden_states[1]], dim=0),\n            )\n        return cast(torch.FloatTensor, hidden_states)\n\n    def _unpack_hidden(\n        self, hidden_states: torch.FloatTensor\n    ) -> Union[torch.FloatTensor, Tuple[torch.FloatTensor, torch.FloatTensor]]:\n        \"\"\"Partial inverse of `_pack_hidden` (exact if there are 2 hidden\n        layers).\"\"\"\n        if \"LSTM\" in self._rnn_type:\n            new_hidden_states = (\n                hidden_states[0 : self._num_recurrent_layers],\n                hidden_states[self._num_recurrent_layers :],\n            )\n            return cast(Tuple[torch.FloatTensor, torch.FloatTensor], new_hidden_states)\n        return cast(torch.FloatTensor, hidden_states)\n\n    def _mask_hidden(\n        self,\n        hidden_states: Union[Tuple[torch.FloatTensor, ...], torch.FloatTensor],\n        masks: torch.FloatTensor,\n    ) -> Union[Tuple[torch.FloatTensor, ...], torch.FloatTensor]:\n        \"\"\"Mask input hidden states given `masks`. Useful when masks represent\n        steps on which a task has completed.\n\n        # Parameters\n\n        hidden_states : The hidden states.\n        masks : Masks to apply to hidden states (see seq_forward).\n\n        # Returns\n\n        Masked hidden states. Here masked hidden states will be replaced with\n        either all zeros (if `trainable_masked_hidden_state` was False) and will\n        otherwise be a learnable collection of parameters.\n        \"\"\"\n        if not self.trainable_masked_hidden_state:\n            if isinstance(hidden_states, tuple):\n                hidden_states = tuple(\n                    cast(torch.FloatTensor, v * masks) for v in hidden_states\n                )\n            else:\n                hidden_states = cast(torch.FloatTensor, masks * hidden_states)\n        else:\n            if isinstance(hidden_states, tuple):\n                # noinspection PyTypeChecker\n                hidden_states = tuple(\n                    v * masks  # type:ignore\n                    + (1.0 - masks) * (self.init_hidden_state.repeat(1, v.shape[1], 1))  # type: ignore\n                    for v in hidden_states  # type:ignore\n                )  # type: ignore\n            else:\n                # noinspection PyTypeChecker\n                hidden_states = masks * hidden_states + (1 - masks) * (  # type: ignore\n                    self.init_hidden_state.repeat(1, hidden_states.shape[1], 1)\n                )\n\n        return hidden_states\n\n    def single_forward(\n        self,\n        x: torch.FloatTensor,\n        hidden_states: torch.FloatTensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[\n        torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]\n    ]:\n        \"\"\"Forward for a single-step input.\"\"\"\n        (\n            x,\n            hidden_states,\n            masks,\n            mem_agent,\n            obs_agent,\n            nsteps,\n            nsamplers,\n            nagents,\n        ) = self.adapt_input(x, hidden_states, masks)\n\n        unpacked_hidden_states = self._unpack_hidden(hidden_states)\n\n        x, unpacked_hidden_states = self.rnn(\n            x,\n            self._mask_hidden(\n                unpacked_hidden_states, cast(torch.FloatTensor, masks[0].view(1, -1, 1))\n            ),\n        )\n\n        return self.adapt_result(\n            x,\n            self._pack_hidden(unpacked_hidden_states),\n            mem_agent,\n            obs_agent,\n            nsteps,\n            nsamplers,\n            nagents,\n        )\n\n    def adapt_input(\n        self,\n        x: torch.FloatTensor,\n        hidden_states: torch.FloatTensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[\n        torch.FloatTensor,\n        torch.FloatTensor,\n        torch.FloatTensor,\n        bool,\n        bool,\n        int,\n        int,\n        int,\n    ]:\n        nsteps, nsamplers = masks.shape[:2]\n\n        assert len(hidden_states.shape) in [\n            3,\n            4,\n        ], \"hidden_states must be [layer, sampler, hidden] or [layer, sampler, agent, hidden]\"\n\n        assert len(x.shape) in [\n            3,\n            4,\n        ], \"observations must be [step, sampler, data] or [step, sampler, agent, data]\"\n\n        nagents = 1\n        mem_agent: bool\n        if len(hidden_states.shape) == 4:  # [layer, sampler, agent, hidden]\n            mem_agent = True\n            nagents = hidden_states.shape[2]\n        else:  # [layer, sampler, hidden]\n            mem_agent = False\n\n        obs_agent: bool\n        if len(x.shape) == 4:  # [step, sampler, agent, dims]\n            obs_agent = True\n        else:  # [step, sampler, dims]\n            obs_agent = False\n\n        # Flatten (nsamplers, nagents)\n        x = x.view(nsteps, nsamplers * nagents, -1)  # type:ignore\n        masks = masks.expand(-1, -1, nagents).reshape(  # type:ignore\n            nsteps, nsamplers * nagents\n        )\n\n        # Flatten (nsamplers, nagents) and remove step dim\n        hidden_states = hidden_states.view(  # type:ignore\n            self.num_recurrent_layers, nsamplers * nagents, -1\n        )\n\n        # noinspection PyTypeChecker\n        return x, hidden_states, masks, mem_agent, obs_agent, nsteps, nsamplers, nagents\n\n    def adapt_result(\n        self,\n        outputs: torch.FloatTensor,\n        hidden_states: torch.FloatTensor,\n        mem_agent: bool,\n        obs_agent: bool,\n        nsteps: int,\n        nsamplers: int,\n        nagents: int,\n    ) -> Tuple[\n        torch.FloatTensor,\n        torch.FloatTensor,\n    ]:\n        output_dims = (nsteps, nsamplers) + ((nagents, -1) if obs_agent else (-1,))\n        hidden_dims = (self.num_recurrent_layers, nsamplers) + (\n            (nagents, -1) if mem_agent else (-1,)\n        )\n\n        outputs = cast(torch.FloatTensor, outputs.view(*output_dims))\n        hidden_states = cast(\n            torch.FloatTensor,\n            hidden_states.view(*hidden_dims),\n        )\n\n        return outputs, hidden_states\n\n    def seq_forward(  # type: ignore\n        self,\n        x: torch.FloatTensor,\n        hidden_states: torch.FloatTensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[\n        torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]\n    ]:\n        \"\"\"Forward for a sequence of length T.\n\n        # Parameters\n\n        x : (Steps, Samplers, Agents, -1) tensor.\n        hidden_states : The starting hidden states.\n        masks : A (Steps, Samplers, Agents) tensor.\n            The masks to be applied to hidden state at every timestep, equal to 0 whenever the previous step finalized\n            the task, 1 elsewhere.\n        \"\"\"\n        (\n            x,\n            hidden_states,\n            masks,\n            mem_agent,\n            obs_agent,\n            nsteps,\n            nsamplers,\n            nagents,\n        ) = self.adapt_input(x, hidden_states, masks)\n\n        # steps in sequence which have zero for any episode. Assume t=0 has\n        # a zero in it.\n        has_zeros = (masks[1:] == 0.0).any(dim=-1).nonzero().squeeze().cpu()\n        # +1 to correct the masks[1:]\n        if has_zeros.dim() == 0:\n            # handle scalar\n            has_zeros = [has_zeros.item() + 1]  # type: ignore\n        else:\n            has_zeros = (has_zeros + 1).numpy().tolist()\n        # add t=0 and t=T to the list\n        has_zeros = cast(List[int], [0] + has_zeros + [nsteps])\n\n        unpacked_hidden_states = self._unpack_hidden(\n            cast(torch.FloatTensor, hidden_states)\n        )\n\n        outputs = []\n        for i in range(len(has_zeros) - 1):\n            # process steps that don't have any zeros in masks together\n            start_idx = int(has_zeros[i])\n            end_idx = int(has_zeros[i + 1])\n\n            # noinspection PyTypeChecker\n            rnn_scores, unpacked_hidden_states = self.rnn(\n                x[start_idx:end_idx],\n                self._mask_hidden(\n                    unpacked_hidden_states,\n                    cast(torch.FloatTensor, masks[start_idx].view(1, -1, 1)),\n                ),\n            )\n\n            outputs.append(rnn_scores)\n\n        return self.adapt_result(\n            cast(torch.FloatTensor, torch.cat(outputs, dim=0)),\n            self._pack_hidden(unpacked_hidden_states),\n            mem_agent,\n            obs_agent,\n            nsteps,\n            nsamplers,\n            nagents,\n        )\n\n    def forward(  # type: ignore\n        self,\n        x: torch.FloatTensor,\n        hidden_states: torch.FloatTensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[\n        torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]\n    ]:\n        nsteps = masks.shape[0]\n        if nsteps == 1:\n            return self.single_forward(x, hidden_states, masks)\n        return self.seq_forward(x, hidden_states, masks)\n\n\nclass LinearActorCritic(ActorCriticModel[CategoricalDistr]):\n    def __init__(\n        self,\n        input_uuid: str,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n    ):\n        super().__init__(action_space=action_space, observation_space=observation_space)\n\n        assert (\n            input_uuid in observation_space.spaces\n        ), \"LinearActorCritic expects only a single observational input.\"\n        self.input_uuid = input_uuid\n\n        box_space: gym.spaces.Box = observation_space[self.input_uuid]\n        assert isinstance(box_space, gym.spaces.Box), (\n            \"LinearActorCritic requires that\"\n            \"observation space corresponding to the input uuid is a Box space.\"\n        )\n        assert len(box_space.shape) == 1\n        self.in_dim = box_space.shape[0]\n\n        self.linear = nn.Linear(self.in_dim, action_space.n + 1)\n\n        nn.init.orthogonal_(self.linear.weight)\n        nn.init.constant_(self.linear.bias, 0)\n\n    # noinspection PyMethodMayBeStatic\n    def _recurrent_memory_specification(self):\n        return None\n\n    def forward(self, observations, memory, prev_actions, masks):\n        out = self.linear(observations[self.input_uuid])\n\n        # noinspection PyArgumentList\n        return (\n            ActorCriticOutput(\n                # ensure [steps, samplers, ...]\n                distributions=CategoricalDistr(logits=out[..., :-1]),\n                # ensure [steps, samplers, flattened]\n                values=cast(torch.FloatTensor, out[..., -1:].view(*out.shape[:2], -1)),\n                extras={},\n            ),\n            None,\n        )\n\n\nclass RNNActorCritic(ActorCriticModel[Distr]):\n    def __init__(\n        self,\n        input_uuid: str,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        hidden_size: int = 128,\n        num_layers: int = 1,\n        rnn_type: str = \"GRU\",\n        head_type: Callable[..., ActorCriticModel[Distr]] = LinearActorCritic,\n    ):\n        super().__init__(action_space=action_space, observation_space=observation_space)\n        self.hidden_size = hidden_size\n        self.rnn_type = rnn_type\n\n        assert (\n            input_uuid in observation_space.spaces\n        ), \"LinearActorCritic expects only a single observational input.\"\n        self.input_uuid = input_uuid\n\n        box_space: gym.spaces.Box = observation_space[self.input_uuid]\n        assert isinstance(box_space, gym.spaces.Box), (\n            \"RNNActorCritic requires that\"\n            \"observation space corresponding to the input uuid is a Box space.\"\n        )\n        assert len(box_space.shape) == 1\n        self.in_dim = box_space.shape[0]\n\n        self.state_encoder = RNNStateEncoder(\n            input_size=self.in_dim,\n            hidden_size=hidden_size,\n            num_layers=num_layers,\n            rnn_type=rnn_type,\n            trainable_masked_hidden_state=True,\n        )\n\n        self.head_uuid = \"{}_{}\".format(\"rnn\", input_uuid)\n\n        self.ac_nonrecurrent_head: ActorCriticModel[Distr] = head_type(\n            input_uuid=self.head_uuid,\n            action_space=action_space,\n            observation_space=SpaceDict(\n                {\n                    self.head_uuid: gym.spaces.Box(\n                        low=np.float32(0.0), high=np.float32(1.0), shape=(hidden_size,)\n                    )\n                }\n            ),\n        )\n\n        self.memory_key = \"rnn\"\n\n    @property\n    def recurrent_hidden_state_size(self) -> int:\n        return self.hidden_size\n\n    @property\n    def num_recurrent_layers(self) -> int:\n        return self.state_encoder.num_recurrent_layers\n\n    def _recurrent_memory_specification(self):\n        return {\n            self.memory_key: (\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n        }\n\n    def forward(  # type:ignore\n        self,\n        observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]],\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n\n        if self.memory_key not in memory:\n            get_logger().warning(\n                f\"Key {self.memory_key} not found in memory,\"\n                f\" initializing this as all zeros.\"\n            )\n\n            obs = observations[self.input_uuid]\n            memory.check_append(\n                key=self.memory_key,\n                tensor=obs.new(\n                    self.num_recurrent_layers,\n                    obs.shape[1],\n                    self.recurrent_hidden_state_size,\n                )\n                .float()\n                .zero_(),\n                sampler_dim=1,\n            )\n\n        rnn_out, mem_return = self.state_encoder(\n            x=observations[self.input_uuid],\n            hidden_states=memory.tensor(self.memory_key),\n            masks=masks,\n        )\n\n        # noinspection PyCallingNonCallable\n        out, _ = self.ac_nonrecurrent_head(\n            observations={self.head_uuid: rnn_out},\n            memory=None,\n            prev_actions=prev_actions,\n            masks=masks,\n        )\n\n        # noinspection PyArgumentList\n        return (\n            out,\n            memory.set_tensor(self.memory_key, mem_return),\n        )\n"
  },
  {
    "path": "allenact/embodiedai/models/fusion_models.py",
    "content": "# Original work Copyright (c) Facebook, Inc. and its affiliates.\n# Modified work Copyright (c) Allen Institute for AI\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n# Adapted from https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/\n\nimport math\nfrom typing import Tuple\n\nimport torch\nimport torch.nn as nn\n\n\nclass Fusion(nn.Module):\n    \"\"\"Base class of belief fusion model from Auxiliary Tasks Speed Up Learning\n    PointGoal Navigation (Ye, 2020) Child class should implement\n    `get_belief_weights` function to generate weights to fuse the beliefs from\n    all the auxiliary task into one.\"\"\"\n\n    def __init__(self, hidden_size, obs_embed_size, num_tasks):\n        super().__init__()\n        self.hidden_size = hidden_size  # H\n        self.obs_embed_size = obs_embed_size  # Z\n        self.num_tasks = num_tasks  # k\n\n    def forward(\n        self,\n        all_beliefs: torch.FloatTensor,  # (T, N, H, K)\n        obs_embeds: torch.FloatTensor,  # (T, N, Z)\n    ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:  # (T, N, H), (T, N, K)\n\n        num_steps, num_samplers, _, _ = all_beliefs.shape\n        all_beliefs = all_beliefs.view(\n            num_steps * num_samplers, self.hidden_size, self.num_tasks\n        )\n        obs_embeds = obs_embeds.view(num_steps * num_samplers, -1)\n\n        weights = self.get_belief_weights(\n            all_beliefs=all_beliefs,\n            obs_embeds=obs_embeds,  # (T*N, H, K)  # (T*N, Z)\n        ).unsqueeze(\n            -1\n        )  # (T*N, K, 1)\n\n        beliefs = torch.bmm(all_beliefs, weights)  # (T*N, H, 1)\n\n        beliefs = beliefs.squeeze(-1).view(num_steps, num_samplers, self.hidden_size)\n        weights = weights.squeeze(-1).view(num_steps, num_samplers, self.num_tasks)\n\n        return beliefs, weights\n\n    def get_belief_weights(\n        self,\n        all_beliefs: torch.FloatTensor,  # (T*N, H, K)\n        obs_embeds: torch.FloatTensor,  # (T*N, Z)\n    ) -> torch.FloatTensor:  # (T*N, K)\n        raise NotImplementedError()\n\n\nclass AverageFusion(Fusion):\n    UUID = \"avg\"\n\n    def get_belief_weights(\n        self,\n        all_beliefs: torch.FloatTensor,  # (T*N, H, K)\n        obs_embeds: torch.FloatTensor,  # (T*N, Z)\n    ) -> torch.FloatTensor:  # (T*N, K)\n\n        batch_size = all_beliefs.shape[0]\n        weights = torch.ones(batch_size, self.num_tasks).to(all_beliefs)\n        weights /= self.num_tasks\n        return weights\n\n\nclass SoftmaxFusion(Fusion):\n    \"\"\"Situational Fusion of Visual Representation for Visual Navigation\n    https://arxiv.org/abs/1908.09073.\"\"\"\n\n    UUID = \"smax\"\n\n    def __init__(self, hidden_size, obs_embed_size, num_tasks):\n        super().__init__(hidden_size, obs_embed_size, num_tasks)\n        # mapping from rnn input to task\n        # ignore beliefs\n        self.linear = nn.Linear(obs_embed_size, num_tasks)\n\n    def get_belief_weights(\n        self,\n        all_beliefs: torch.Tensor,  # (T*N, H, K)\n        obs_embeds: torch.Tensor,  # (T*N, Z)\n    ) -> torch.Tensor:  # (T*N, K)\n\n        scores = self.linear(obs_embeds)  # (T*N, K)\n        weights = torch.softmax(scores, dim=-1)\n        return weights\n\n\nclass AttentiveFusion(Fusion):\n    \"\"\"Attention is All You Need https://arxiv.org/abs/1706.03762 i.e. scaled\n    dot-product attention.\"\"\"\n\n    UUID = \"attn\"\n\n    def __init__(self, hidden_size, obs_embed_size, num_tasks):\n        super().__init__(hidden_size, obs_embed_size, num_tasks)\n        self.linear = nn.Linear(obs_embed_size, hidden_size)\n\n    def get_belief_weights(\n        self,\n        all_beliefs: torch.Tensor,  # (T*N, H, K)\n        obs_embeds: torch.Tensor,  # (T*N, Z)\n    ) -> torch.Tensor:  # (T*N, K)\n\n        queries = self.linear(obs_embeds).unsqueeze(1)  # (T*N, 1, H)\n        scores = torch.bmm(queries, all_beliefs).squeeze(1)  # (T*N, K)\n        weights = torch.softmax(\n            scores / math.sqrt(self.hidden_size), dim=-1\n        )  # (T*N, K)\n        return weights\n"
  },
  {
    "path": "allenact/embodiedai/models/resnet.py",
    "content": "# Original work Copyright (c) Facebook, Inc. and its affiliates.\n# Modified work Copyright (c) Allen Institute for AI\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n# Adapted from https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/\n\nfrom typing import Optional\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom gym.spaces.dict import Dict as SpaceDict\n\nfrom allenact.utils.model_utils import Flatten\nfrom allenact.utils.system import get_logger\n\n\ndef conv3x3(in_planes, out_planes, stride=1, groups=1):\n    \"\"\"3x3 convolution with padding.\"\"\"\n    return nn.Conv2d(\n        in_planes,\n        out_planes,\n        kernel_size=3,\n        stride=stride,\n        padding=1,\n        bias=False,\n        groups=groups,\n    )\n\n\ndef conv1x1(in_planes, out_planes, stride=1):\n    \"\"\"1x1 convolution.\"\"\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n    resneXt = False\n\n    def __init__(\n        self,\n        inplanes,\n        planes,\n        ngroups,\n        stride=1,\n        downsample=None,\n        cardinality=1,\n    ):\n        super(BasicBlock, self).__init__()\n        self.convs = nn.Sequential(\n            conv3x3(inplanes, planes, stride, groups=cardinality),\n            nn.GroupNorm(ngroups, planes),\n            nn.ReLU(True),\n            conv3x3(planes, planes, groups=cardinality),\n            nn.GroupNorm(ngroups, planes),\n        )\n        self.downsample = downsample\n        self.relu = nn.ReLU(True)\n\n    def forward(self, x):\n        residual = x\n\n        out = self.convs(x)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        return self.relu(out + residual)\n\n\ndef _build_bottleneck_branch(inplanes, planes, ngroups, stride, expansion, groups=1):\n    return nn.Sequential(\n        conv1x1(inplanes, planes),\n        nn.GroupNorm(ngroups, planes),\n        nn.ReLU(True),\n        conv3x3(planes, planes, stride, groups=groups),\n        nn.GroupNorm(ngroups, planes),\n        nn.ReLU(True),\n        conv1x1(planes, planes * expansion),\n        nn.GroupNorm(ngroups, planes * expansion),\n    )\n\n\nclass SE(nn.Module):\n    def __init__(self, planes, r=16):\n        super().__init__()\n        self.squeeze = nn.AdaptiveAvgPool2d(1)\n        self.excite = nn.Sequential(\n            nn.Linear(planes, int(planes / r)),\n            nn.ReLU(True),\n            nn.Linear(int(planes / r), planes),\n            nn.Sigmoid(),\n        )\n\n    def forward(self, x):\n        b, c, _, _ = x.size()\n        x = self.squeeze(x)\n        x = x.view(b, c)\n        x = self.excite(x)\n\n        return x.view(b, c, 1, 1)\n\n\ndef _build_se_branch(planes, r=16):\n    return SE(planes, r)\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n    resneXt = False\n\n    def __init__(\n        self,\n        inplanes,\n        planes,\n        ngroups,\n        stride=1,\n        downsample=None,\n        cardinality=1,\n    ):\n        super().__init__()\n        self.convs = _build_bottleneck_branch(\n            inplanes,\n            planes,\n            ngroups,\n            stride,\n            self.expansion,\n            groups=cardinality,\n        )\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def _impl(self, x):\n        identity = x\n\n        out = self.convs(x)\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        return self.relu(out + identity)\n\n    def forward(self, x):\n        return self._impl(x)\n\n\nclass SEBottleneck(Bottleneck):\n    def __init__(\n        self,\n        inplanes,\n        planes,\n        ngroups,\n        stride=1,\n        downsample=None,\n        cardinality=1,\n    ):\n        super().__init__(inplanes, planes, ngroups, stride, downsample, cardinality)\n\n        self.se = _build_se_branch(planes * self.expansion)\n\n    def _impl(self, x):\n        identity = x\n\n        out = self.convs(x)\n        out = self.se(out) * out\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        return self.relu(out + identity)\n\n\nclass SEResNeXtBottleneck(SEBottleneck):\n    expansion = 2\n    resneXt = True\n\n\nclass ResNeXtBottleneck(Bottleneck):\n    expansion = 2\n    resneXt = True\n\n\nclass GroupNormResNet(nn.Module):\n    def __init__(self, in_channels, base_planes, ngroups, block, layers, cardinality=1):\n        super(GroupNormResNet, self).__init__()\n        self.conv1 = nn.Sequential(\n            nn.Conv2d(\n                in_channels,\n                base_planes,\n                kernel_size=7,\n                stride=2,\n                padding=3,\n                bias=False,\n            ),\n            nn.GroupNorm(ngroups, base_planes),\n            nn.ReLU(True),\n        )\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.cardinality = cardinality\n\n        self.inplanes = base_planes\n        if block.resneXt:\n            base_planes *= 2\n\n        self.layer1 = self._make_layer(block, ngroups, base_planes, layers[0])\n        self.layer2 = self._make_layer(\n            block, ngroups, base_planes * 2, layers[1], stride=2\n        )\n        self.layer3 = self._make_layer(\n            block, ngroups, base_planes * 2 * 2, layers[2], stride=2\n        )\n        self.layer4 = self._make_layer(\n            block, ngroups, base_planes * 2 * 2 * 2, layers[3], stride=2\n        )\n\n        self.final_channels = self.inplanes\n        self.final_spatial_compress = 1.0 / (2**5)\n\n    def _make_layer(self, block, ngroups, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                conv1x1(self.inplanes, planes * block.expansion, stride),\n                nn.GroupNorm(ngroups, planes * block.expansion),\n            )\n\n        layers = [\n            block(\n                self.inplanes,\n                planes,\n                ngroups,\n                stride,\n                downsample,\n                cardinality=self.cardinality,\n            )\n        ]\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes, ngroups))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        return x\n\n\ndef gnresnet18(in_channels, base_planes, ngroups):\n    model = GroupNormResNet(in_channels, base_planes, ngroups, BasicBlock, [2, 2, 2, 2])\n\n    return model\n\n\ndef gnresnet50(in_channels, base_planes, ngroups):\n    model = GroupNormResNet(in_channels, base_planes, ngroups, Bottleneck, [3, 4, 6, 3])\n\n    return model\n\n\ndef gnresneXt50(in_channels, base_planes, ngroups):\n    model = GroupNormResNet(\n        in_channels,\n        base_planes,\n        ngroups,\n        ResNeXtBottleneck,\n        [3, 4, 6, 3],\n        cardinality=int(base_planes / 2),\n    )\n\n    return model\n\n\ndef se_gnresnet50(in_channels, base_planes, ngroups):\n    model = GroupNormResNet(\n        in_channels, base_planes, ngroups, SEBottleneck, [3, 4, 6, 3]\n    )\n\n    return model\n\n\ndef se_gnresneXt50(in_channels, base_planes, ngroups):\n    model = GroupNormResNet(\n        in_channels,\n        base_planes,\n        ngroups,\n        SEResNeXtBottleneck,\n        [3, 4, 6, 3],\n        cardinality=int(base_planes / 2),\n    )\n\n    return model\n\n\ndef se_gnresneXt101(in_channels, base_planes, ngroups):\n    model = GroupNormResNet(\n        in_channels,\n        base_planes,\n        ngroups,\n        SEResNeXtBottleneck,\n        [3, 4, 23, 3],\n        cardinality=int(base_planes / 2),\n    )\n\n    return model\n\n\nclass GroupNormResNetEncoder(nn.Module):\n    def __init__(\n        self,\n        observation_space: SpaceDict,\n        rgb_uuid: Optional[str],\n        depth_uuid: Optional[str],\n        output_size: int,\n        baseplanes=32,\n        ngroups=32,\n        make_backbone=None,\n    ):\n        super().__init__()\n\n        self._inputs = []\n\n        self.rgb_uuid = rgb_uuid\n        if self.rgb_uuid is not None:\n            assert self.rgb_uuid in observation_space.spaces\n            self._n_input_rgb = observation_space.spaces[self.rgb_uuid].shape[2]\n            assert self._n_input_rgb >= 0\n            self._inputs.append(self.rgb_uuid)\n        else:\n            self._n_input_rgb = 0\n\n        self.depth_uuid = depth_uuid\n        if self.depth_uuid is not None:\n            assert self.depth_uuid in observation_space.spaces\n            self._n_input_depth = observation_space.spaces[self.depth_uuid].shape[2]\n            assert self._n_input_depth >= 0\n            self._inputs.append(self.depth_uuid)\n        else:\n            self._n_input_depth = 0\n\n        if not self.is_blind:\n            spatial_size = (\n                observation_space.spaces[self._inputs[0]].shape[0] // 2\n            )  # H (=W) / 2\n\n            # RGBD into one model\n            input_channels = self._n_input_rgb + self._n_input_depth  # C\n\n            self.backbone = make_backbone(input_channels, baseplanes, ngroups)\n\n            final_spatial = int(\n                np.ceil(spatial_size * self.backbone.final_spatial_compress)\n            )  # fix bug in habitat that uses int()\n            after_compression_flat_size = 2048\n            num_compression_channels = int(\n                round(after_compression_flat_size / (final_spatial**2))\n            )\n            self.compression = nn.Sequential(\n                nn.Conv2d(\n                    self.backbone.final_channels,\n                    num_compression_channels,\n                    kernel_size=3,\n                    padding=1,\n                    bias=False,\n                ),\n                nn.GroupNorm(1, num_compression_channels),\n                nn.ReLU(True),\n            )\n\n            self.output_shape = (\n                num_compression_channels,\n                final_spatial,\n                final_spatial,\n            )\n\n            self.head = nn.Sequential(\n                Flatten(),\n                nn.Linear(np.prod(self.output_shape), output_size),\n                nn.ReLU(True),\n            )\n\n            self.layer_init()\n\n    @property\n    def is_blind(self):\n        return self._n_input_rgb + self._n_input_depth == 0\n\n    def layer_init(self):\n        for layer in self.modules():\n            if isinstance(layer, (nn.Conv2d, nn.Linear)):\n                nn.init.kaiming_normal_(layer.weight, nn.init.calculate_gain(\"relu\"))\n                if layer.bias is not None:\n                    nn.init.constant_(layer.bias, val=0)\n        get_logger().debug(\"Initializing resnet encoder\")\n\n    def forward(self, observations):\n        if self.is_blind:\n            return None\n\n        # TODO: the reshape follows compute_cnn_output()\n        # but it's hard to make the forward as a nn.Module as cnn param\n        nagents: Optional[int] = None\n        nsteps: Optional[int] = None\n        nsamplers: Optional[int] = None\n        assert len(self._inputs) > 0\n\n        cnn_input = []\n        for mode in self._inputs:\n            mode_obs = observations[mode]\n            assert len(mode_obs.shape) in [\n                5,\n                6,\n            ], \"CNN input must have shape [STEP, SAMPLER, (AGENT,) dim1, dim2, dim3]\"\n            if len(mode_obs.shape) == 6:\n                nsteps, nsamplers, nagents = mode_obs.shape[:3]\n            else:\n                nsteps, nsamplers = mode_obs.shape[:2]\n            # Make FLAT_BATCH = nsteps * nsamplers (* nagents)\n            mode_obs = mode_obs.view(\n                (-1,) + mode_obs.shape[2 + int(nagents is not None) :]\n            )\n            # permute tensor to dimension [BATCH x CHANNEL x HEIGHT X WIDTH]\n            mode_obs = mode_obs.permute(0, 3, 1, 2)\n            cnn_input.append(mode_obs)\n\n        x = torch.cat(cnn_input, dim=1)\n        x = F.avg_pool2d(x, 2)  # 2x downsampling\n\n        x = self.backbone(x)  # (256, 4, 4)\n        x = self.compression(x)  # (128, 4, 4)\n        x = self.head(x)  # (2048) -> (hidden_size)\n\n        if nagents is not None:\n            x = x.reshape(\n                (\n                    nsteps,\n                    nsamplers,\n                    nagents,\n                )\n                + x.shape[1:]\n            )\n        else:\n            x = x.reshape(\n                (\n                    nsteps,\n                    nsamplers,\n                )\n                + x.shape[1:]\n            )\n\n        return x\n"
  },
  {
    "path": "allenact/embodiedai/models/visual_nav_models.py",
    "content": "from collections import OrderedDict\nfrom typing import Tuple, Dict, Optional, List, Sequence\nfrom typing import TypeVar\n\nimport gym\nimport torch\nimport torch.nn as nn\nfrom gym.spaces.dict import Dict as SpaceDict\n\nfrom allenact.algorithms.onpolicy_sync.policy import (\n    ActorCriticModel,\n    LinearCriticHead,\n    LinearActorHead,\n    ObservationType,\n    DistributionType,\n)\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput, Memory\nfrom allenact.embodiedai.aux_losses.losses import MultiAuxTaskNegEntropyLoss\nfrom allenact.embodiedai.models.aux_models import AuxiliaryModel\nfrom allenact.embodiedai.models.basic_models import RNNStateEncoder\nfrom allenact.embodiedai.models.fusion_models import Fusion\nfrom allenact.utils.model_utils import FeatureEmbedding\nfrom allenact.utils.system import get_logger\n\nFusionType = TypeVar(\"FusionType\", bound=Fusion)\n\n\nclass VisualNavActorCritic(ActorCriticModel[CategoricalDistr]):\n    \"\"\"Base class of visual navigation / manipulation (or broadly, embodied AI)\n    model.\n\n    `forward_encoder` function requires implementation.\n    \"\"\"\n\n    action_space: gym.spaces.Discrete\n\n    def __init__(\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        hidden_size=512,\n        multiple_beliefs=False,\n        beliefs_fusion: Optional[FusionType] = None,\n        auxiliary_uuids: Optional[List[str]] = None,\n        auxiliary_model_class=AuxiliaryModel,\n    ):\n        super().__init__(action_space=action_space, observation_space=observation_space)\n        self._hidden_size = hidden_size\n        assert multiple_beliefs == (beliefs_fusion is not None)\n        self.multiple_beliefs = multiple_beliefs\n        self.beliefs_fusion = beliefs_fusion\n        self.auxiliary_uuids = auxiliary_uuids\n        if isinstance(self.auxiliary_uuids, list) and len(self.auxiliary_uuids) == 0:\n            self.auxiliary_uuids = None\n\n        # Define the placeholders in init function\n        self.state_encoders: Optional[nn.ModuleDict] = None\n        self.aux_models: Optional[nn.ModuleDict] = None\n        self.actor: Optional[LinearActorHead] = None\n        self.critic: Optional[LinearCriticHead] = None\n        self.prev_action_embedder: Optional[FeatureEmbedding] = None\n\n        self.fusion_model: Optional[nn.Module] = None\n        self.belief_names: Optional[Sequence[str]] = None\n        self.auxiliary_model_class = auxiliary_model_class\n\n    def create_state_encoders(\n        self,\n        obs_embed_size: int,\n        prev_action_embed_size: int,\n        num_rnn_layers: int,\n        rnn_type: str,\n        add_prev_actions: bool,\n        add_prev_action_null_token: bool,\n        trainable_masked_hidden_state=False,\n    ):\n        rnn_input_size = obs_embed_size\n        self.prev_action_embedder = FeatureEmbedding(\n            input_size=int(add_prev_action_null_token) + self.action_space.n,\n            output_size=prev_action_embed_size if add_prev_actions else 0,\n        )\n        if add_prev_actions:\n            rnn_input_size += prev_action_embed_size\n\n        state_encoders = OrderedDict()  # perserve insertion order in py3.6\n        if self.multiple_beliefs:  # multiple belief model\n            for aux_uuid in self.auxiliary_uuids:\n                state_encoders[aux_uuid] = RNNStateEncoder(\n                    rnn_input_size,\n                    self._hidden_size,\n                    num_layers=num_rnn_layers,\n                    rnn_type=rnn_type,\n                    trainable_masked_hidden_state=trainable_masked_hidden_state,\n                )\n            # create fusion model\n            self.fusion_model = self.beliefs_fusion(\n                hidden_size=self._hidden_size,\n                obs_embed_size=obs_embed_size,\n                num_tasks=len(self.auxiliary_uuids),\n            )\n\n        else:  # single belief model\n            state_encoders[\"single_belief\"] = RNNStateEncoder(\n                rnn_input_size,\n                self._hidden_size,\n                num_layers=num_rnn_layers,\n                rnn_type=rnn_type,\n                trainable_masked_hidden_state=trainable_masked_hidden_state,\n            )\n\n        self.state_encoders = nn.ModuleDict(state_encoders)\n\n        self.belief_names = list(self.state_encoders.keys())\n\n        get_logger().info(\n            \"there are {} belief models: {}\".format(\n                len(self.belief_names), self.belief_names\n            )\n        )\n\n    def load_state_dict(self, state_dict, **kwargs):\n        new_state_dict = OrderedDict()\n        for key in state_dict.keys():\n            if \"state_encoder.\" in key:  # old key name\n                new_key = key.replace(\"state_encoder.\", \"state_encoders.single_belief.\")\n            elif \"goal_visual_encoder.embed_class\" in key:\n                new_key = key.replace(\n                    \"goal_visual_encoder.embed_class\", \"goal_visual_encoder.embed_goal\"\n                )\n            else:\n                new_key = key\n            new_state_dict[new_key] = state_dict[key]\n\n        return super().load_state_dict(new_state_dict, **kwargs)  # compatible in keys\n\n    def create_actorcritic_head(self):\n        self.actor = LinearActorHead(self._hidden_size, self.action_space.n)\n        self.critic = LinearCriticHead(self._hidden_size)\n\n    def create_aux_models(self, obs_embed_size: int, action_embed_size: int):\n        if self.auxiliary_uuids is None:\n            return\n        aux_models = OrderedDict()\n        for aux_uuid in self.auxiliary_uuids:\n            aux_models[aux_uuid] = self.auxiliary_model_class(\n                aux_uuid=aux_uuid,\n                action_dim=self.action_space.n,\n                obs_embed_dim=obs_embed_size,\n                belief_dim=self._hidden_size,\n                action_embed_size=action_embed_size,\n            )\n\n        self.aux_models = nn.ModuleDict(aux_models)\n\n    @property\n    def num_recurrent_layers(self):\n        \"\"\"Number of recurrent hidden layers.\"\"\"\n        return list(self.state_encoders.values())[0].num_recurrent_layers\n\n    @property\n    def recurrent_hidden_state_size(self):\n        \"\"\"The recurrent hidden state size of a single model.\"\"\"\n        return self._hidden_size\n\n    def _recurrent_memory_specification(self):\n        return {\n            memory_key: (\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n            for memory_key in self.belief_names\n        }\n\n    def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:\n        raise NotImplementedError(\"Obs Encoder Not Implemented\")\n\n    def fuse_beliefs(\n        self,\n        beliefs_dict: Dict[str, torch.FloatTensor],\n        obs_embeds: torch.FloatTensor,\n    ) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]:\n        all_beliefs = torch.stack(list(beliefs_dict.values()), dim=-1)  # (T, N, H, k)\n\n        if self.multiple_beliefs:  # call the fusion model\n            return self.fusion_model(all_beliefs=all_beliefs, obs_embeds=obs_embeds)\n        # single belief\n        beliefs = all_beliefs.squeeze(-1)  # (T,N,H)\n        return beliefs, None\n\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        \"\"\"Processes input batched observations to produce new actor and critic\n        values. Processes input batched observations (along with prior hidden\n        states, previous actions, and masks denoting which recurrent hidden\n        states should be masked) and returns an `ActorCriticOutput` object\n        containing the model's policy (distribution over actions) and\n        evaluation of the current state (value).\n\n        # Parameters\n        observations : Batched input observations.\n        memory : `Memory` containing the hidden states from initial timepoints.\n        prev_actions : Tensor of previous actions taken.\n        masks : Masks applied to hidden states. See `RNNStateEncoder`.\n        # Returns\n        Tuple of the `ActorCriticOutput` and recurrent hidden state.\n        \"\"\"\n\n        # 1.1 use perception model (i.e. encoder) to get observation embeddings\n        obs_embeds = self.forward_encoder(observations)\n\n        # 1.2 use embedding model to get prev_action embeddings\n        if self.prev_action_embedder.input_size == self.action_space.n + 1:\n            # In this case we have a unique embedding for the start of an episode\n            prev_actions_embeds = self.prev_action_embedder(\n                torch.where(\n                    condition=0 != masks.view(*prev_actions.shape),\n                    input=prev_actions + 1,\n                    other=torch.zeros_like(prev_actions),\n                )\n            )\n        else:\n            prev_actions_embeds = self.prev_action_embedder(prev_actions)\n        joint_embeds = torch.cat((obs_embeds, prev_actions_embeds), dim=-1)  # (T, N, *)\n\n        # 2. use RNNs to get single/multiple beliefs\n        beliefs_dict = {}\n        for key, model in self.state_encoders.items():\n            beliefs_dict[key], rnn_hidden_states = model(\n                joint_embeds, memory.tensor(key), masks\n            )\n            memory.set_tensor(key, rnn_hidden_states)  # update memory here\n\n        # 3. fuse beliefs for multiple belief models\n        beliefs, task_weights = self.fuse_beliefs(\n            beliefs_dict, obs_embeds\n        )  # fused beliefs\n\n        # 4. prepare output\n        extras = (\n            {\n                aux_uuid: {\n                    \"beliefs\": (\n                        beliefs_dict[aux_uuid] if self.multiple_beliefs else beliefs\n                    ),\n                    \"obs_embeds\": obs_embeds,\n                    \"aux_model\": (\n                        self.aux_models[aux_uuid]\n                        if aux_uuid in self.aux_models\n                        else None\n                    ),\n                }\n                for aux_uuid in self.auxiliary_uuids\n            }\n            if self.auxiliary_uuids is not None\n            else {}\n        )\n\n        if self.multiple_beliefs:\n            extras[MultiAuxTaskNegEntropyLoss.UUID] = task_weights\n\n        actor_critic_output = ActorCriticOutput(\n            distributions=self.actor(beliefs),\n            values=self.critic(beliefs),\n            extras=extras,\n        )\n\n        return actor_critic_output, memory\n"
  },
  {
    "path": "allenact/embodiedai/preprocessors/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/embodiedai/preprocessors/resnet.py",
    "content": "from typing import List, Callable, Optional, Any, cast, Dict\n\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom torchvision import models\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.misc_utils import prepare_locals_for_super\n\n\nclass ResNetEmbedder(nn.Module):\n    def __init__(self, resnet, pool=True):\n        super().__init__()\n        self.model = resnet\n        self.pool = pool\n        self.eval()\n\n    def forward(self, x):\n        with torch.no_grad():\n            x = self.model.conv1(x)\n            x = self.model.bn1(x)\n            x = self.model.relu(x)\n            x = self.model.maxpool(x)\n\n            x = self.model.layer1(x)\n            x = self.model.layer2(x)\n            x = self.model.layer3(x)\n            x = self.model.layer4(x)\n\n            if not self.pool:\n                return x\n            else:\n                x = self.model.avgpool(x)\n                x = torch.flatten(x, 1)\n                return x\n\n\nclass ResNetPreprocessor(Preprocessor):\n    \"\"\"Preprocess RGB or depth image using a ResNet model.\"\"\"\n\n    def __init__(\n        self,\n        input_uuids: List[str],\n        output_uuid: str,\n        input_height: int,\n        input_width: int,\n        output_height: int,\n        output_width: int,\n        output_dims: int,\n        pool: bool,\n        torchvision_resnet_model: Callable[..., models.ResNet] = models.resnet18,\n        device: Optional[torch.device] = None,\n        device_ids: Optional[List[torch.device]] = None,\n        **kwargs: Any,\n    ):\n        def f(x, k):\n            assert k in x, \"{} must be set in ResNetPreprocessor\".format(k)\n            return x[k]\n\n        def optf(x, k, default):\n            return x[k] if k in x else default\n\n        self.input_height = input_height\n        self.input_width = input_width\n        self.output_height = output_height\n        self.output_width = output_width\n        self.output_dims = output_dims\n        self.pool = pool\n        self.make_model = torchvision_resnet_model\n\n        self.device = torch.device(\"cpu\") if device is None else device\n        self.device_ids = device_ids or cast(\n            List[torch.device], list(range(torch.cuda.device_count()))\n        )\n\n        self._resnet: Optional[ResNetEmbedder] = None\n\n        low = -np.inf\n        high = np.inf\n        shape = (self.output_dims, self.output_height, self.output_width)\n\n        assert (\n            len(input_uuids) == 1\n        ), \"resnet preprocessor can only consume one observation type\"\n\n        observation_space = gym.spaces.Box(low=low, high=high, shape=shape)\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    @property\n    def resnet(self) -> ResNetEmbedder:\n        if self._resnet is None:\n            self._resnet = ResNetEmbedder(\n                self.make_model(pretrained=True).to(self.device), pool=self.pool\n            )\n        return self._resnet\n\n    def to(self, device: torch.device) -> \"ResNetPreprocessor\":\n        self._resnet = self.resnet.to(device)\n        self.device = device\n        return self\n\n    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:\n        x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)  # bhwc -> bchw\n        # If the input is depth, repeat it across all 3 channels\n        if x.shape[1] == 1:\n            x = x.repeat(1, 3, 1, 1)\n        return self.resnet(x.to(self.device))\n"
  },
  {
    "path": "allenact/embodiedai/sensors/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/embodiedai/sensors/vision_sensors.py",
    "content": "from abc import abstractmethod, ABC\nfrom typing import Optional, Tuple, Any, cast, Union, Sequence\n\nimport PIL\nimport gym\nimport numpy as np\nfrom torchvision import transforms\n\nfrom allenact.base_abstractions.misc import EnvType\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import SubTaskType\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom allenact.utils.tensor_utils import ScaleBothSides\n\nIMAGENET_RGB_MEANS: Tuple[float, float, float] = (0.485, 0.456, 0.406)\nIMAGENET_RGB_STDS: Tuple[float, float, float] = (0.229, 0.224, 0.225)\n\n\nclass VisionSensor(Sensor[EnvType, SubTaskType]):\n    def __init__(\n        self,\n        mean: Union[Sequence[float], np.ndarray, None] = None,\n        stdev: Union[Sequence[float], np.ndarray, None] = None,\n        height: Optional[int] = None,\n        width: Optional[int] = None,\n        uuid: str = \"vision\",\n        output_shape: Optional[Tuple[int, ...]] = None,\n        output_channels: Optional[int] = None,\n        unnormalized_infimum: float = -np.inf,\n        unnormalized_supremum: float = np.inf,\n        scale_first: bool = True,\n        **kwargs: Any\n    ):\n        \"\"\"Initializer.\n\n        # Parameters\n\n        mean : The images will be normalized with the given mean\n        stdev : The images will be normalized with the given standard deviations.\n        height : If it's a non-negative integer and `width` is also non-negative integer, the image returned from the\n                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.\n        width : If it's a non-negative integer and `height` is also non-negative integer, the image returned from the\n                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.\n        uuid : The universally unique identifier for the sensor.\n        output_shape : Optional observation space shape (alternative to `output_channels`).\n        output_channels : Optional observation space number of channels (alternative to `output_shape`).\n        unnormalized_infimum : Lower limit(s) for the observation space range.\n        unnormalized_supremum : Upper limit(s) for the observation space range.\n        scale_first : Whether to scale image before normalization (if needed).\n        kwargs : Extra kwargs. Currently unused.\n        \"\"\"\n\n        self._norm_means = np.array(mean) if mean is not None else None\n        self._norm_sds = np.array(stdev) if stdev is not None else None\n\n        assert (self._norm_means is None) == (self._norm_sds is None), (\n            \"In VisionSensor's config, \"\n            \"either both mean/stdev must be None or neither.\"\n        )\n        self._should_normalize = self._norm_means is not None\n\n        self._height = height\n        self._width = width\n        assert (self._width is None) == (self._height is None), (\n            \"In VisionSensor's config, \"\n            \"either both height/width must be None or neither.\"\n        )\n\n        self._scale_first = scale_first\n\n        self.scaler: Optional[ScaleBothSides] = None\n        if self._width is not None:\n            self.scaler = ScaleBothSides(\n                width=cast(int, self._width), height=cast(int, self._height)\n            )\n\n        self.to_pil = transforms.ToPILImage()  # assumes mode=\"RGB\" for 3 channels\n\n        self._observation_space = self._make_observation_space(\n            output_shape=output_shape,\n            output_channels=output_channels,\n            unnormalized_infimum=unnormalized_infimum,\n            unnormalized_supremum=unnormalized_supremum,\n        )\n\n        assert int(PIL.__version__.split(\".\")[0]) != 7, (\n            \"We found that Pillow version >=7.* has broken scaling,\"\n            \" please downgrade to version 6.2.1 or upgrade to >=8.0.0\"\n        )\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _make_observation_space(\n        self,\n        output_shape: Optional[Tuple[int, ...]],\n        output_channels: Optional[int],\n        unnormalized_infimum: float,\n        unnormalized_supremum: float,\n    ) -> gym.spaces.Box:\n        assert output_shape is None or output_channels is None, (\n            \"In VisionSensor's config, \"\n            \"only one of output_shape and output_channels can be not None.\"\n        )\n\n        shape: Optional[Tuple[int, ...]] = None\n        if output_shape is not None:\n            shape = output_shape\n        elif self._height is not None and output_channels is not None:\n            shape = (\n                cast(int, self._height),\n                cast(int, self._width),\n                cast(int, output_channels),\n            )\n\n        if not self._should_normalize or shape is None or len(shape) == 1:\n            return gym.spaces.Box(\n                low=np.float32(unnormalized_infimum),\n                high=np.float32(unnormalized_supremum),\n                shape=shape,\n            )\n        else:\n            out_shape = shape[:-1] + (1,)\n            low = np.tile(\n                (unnormalized_infimum - cast(np.ndarray, self._norm_means))\n                / cast(np.ndarray, self._norm_sds),\n                out_shape,\n            )\n            high = np.tile(\n                (unnormalized_supremum - cast(np.ndarray, self._norm_means))\n                / cast(np.ndarray, self._norm_sds),\n                out_shape,\n            )\n            return gym.spaces.Box(low=np.float32(low), high=np.float32(high))\n\n    def _get_observation_space(self):\n        return self._observation_space\n\n    @property\n    def height(self) -> Optional[int]:\n        \"\"\"Height that input image will be rescale to have.\n\n        # Returns\n\n        The height as a non-negative integer or `None` if no rescaling is done.\n        \"\"\"\n        return self._height\n\n    @property\n    def width(self) -> Optional[int]:\n        \"\"\"Width that input image will be rescale to have.\n\n        # Returns\n\n        The width as a non-negative integer or `None` if no rescaling is done.\n        \"\"\"\n        return self._width\n\n    @abstractmethod\n    def frame_from_env(self, env: EnvType, task: Optional[SubTaskType]) -> np.ndarray:\n        raise NotImplementedError\n\n    def process_img(self, img: np.ndarray):\n        assert (\n            np.issubdtype(img.dtype, np.float32)\n            and (len(img.shape) == 2 or img.shape[-1] == 1)\n        ) or (img.shape[-1] == 3 and np.issubdtype(img.dtype, np.uint8)), (\n            \"Input frame must either have 3 channels and be of\"\n            \" type np.uint8 or have one channel and be of type np.float32\"\n        )\n\n        if (\n            self._scale_first\n            and self.scaler is not None\n            and img.shape[:2] != (self._height, self._width)\n        ):\n            img = np.array(self.scaler(self.to_pil(img)), dtype=img.dtype)  # hwc\n        elif np.issubdtype(img.dtype, np.float32):\n            img = img.copy()\n\n        assert img.dtype in [np.uint8, np.float32]\n\n        if np.issubdtype(img.dtype, np.uint8):\n            img = img.astype(np.float32) / 255.0\n\n        if self._should_normalize:\n            img -= self._norm_means\n            img /= self._norm_sds\n\n        if (\n            (not self._scale_first)\n            and self.scaler is not None\n            and img.shape[:2] != (self._height, self._width)\n        ):\n            img = np.array(self.scaler(self.to_pil(img)), dtype=np.float32)  # hwc\n\n        return img\n\n    def get_observation(\n        self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any\n    ) -> Any:\n        return self.process_img(self.frame_from_env(env=env, task=task))\n\n\nclass RGBSensor(VisionSensor[EnvType, SubTaskType], ABC):\n    def __init__(\n        self,\n        use_resnet_normalization: bool = False,\n        mean: Optional[Union[np.ndarray, Sequence[float]]] = IMAGENET_RGB_MEANS,\n        stdev: Optional[Union[np.ndarray, Sequence[float]]] = IMAGENET_RGB_STDS,\n        height: Optional[int] = None,\n        width: Optional[int] = None,\n        uuid: str = \"rgb\",\n        output_shape: Optional[Tuple[int, ...]] = None,\n        output_channels: int = 3,\n        unnormalized_infimum: float = 0.0,\n        unnormalized_supremum: float = 1.0,\n        scale_first: bool = True,\n        **kwargs: Any\n    ):\n        \"\"\"Initializer.\n\n        # Parameters\n\n        use_resnet_normalization : Whether to apply image normalization with the given `mean` and `stdev`.\n        mean : The images will be normalized with the given mean if `use_resnet_normalization` is True (default\n               `[0.485, 0.456, 0.406]`, i.e. the standard resnet normalization mean).\n        stdev : The images will be normalized with the given standard deviation if `use_resnet_normalization` is True\n                (default `[0.229, 0.224, 0.225]`, i.e. the standard resnet normalization standard deviation).\n        height: If it's a non-negative integer and `width` is also non-negative integer, the image returned from the\n                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.\n        width: If it's a non-negative integer and `height` is also non-negative integer, the image returned from the\n                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.\n        uuid: The universally unique identifier for the sensor.\n        output_shape: Optional observation space shape (alternative to `output_channels`).\n        output_channels: Optional observation space number of channels (alternative to `output_shape`).\n        unnormalized_infimum: Lower limit(s) for the observation space range.\n        unnormalized_supremum: Upper limit(s) for the observation space range.\n        scale_first: Whether to scale image before normalization (if needed).\n        kwargs : Extra kwargs. Currently unused.\n        \"\"\"\n\n        if not use_resnet_normalization:\n            mean, stdev = None, None\n\n        if isinstance(mean, tuple):\n            mean = np.array(mean, dtype=np.float32).reshape((1, 1, len(mean)))\n        if isinstance(stdev, tuple):\n            stdev = np.array(stdev, dtype=np.float32).reshape((1, 1, len(stdev)))\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n\nclass DepthSensor(VisionSensor[EnvType, SubTaskType], ABC):\n    def __init__(\n        self,\n        use_normalization: bool = False,\n        mean: Optional[Union[np.ndarray, float]] = 0.5,\n        stdev: Optional[Union[np.ndarray, float]] = 0.25,\n        height: Optional[int] = None,\n        width: Optional[int] = None,\n        uuid: str = \"depth\",\n        output_shape: Optional[Tuple[int, ...]] = None,\n        output_channels: int = 1,\n        unnormalized_infimum: float = 0.0,\n        unnormalized_supremum: float = 5.0,\n        scale_first: bool = True,\n        **kwargs: Any\n    ):\n        \"\"\"Initializer.\n\n        # Parameters\n\n        config : If `config[\"use_normalization\"]` is `True` then the depth images will be normalized\n            with mean 0.5 and standard deviation 0.25. If both `config[\"height\"]` and `config[\"width\"]` are\n            non-negative integers then the depth image returned from the environment will be rescaled to have shape\n            (config[\"height\"], config[\"width\"]) using bilinear sampling.\n        use_normalization : Whether to apply image normalization with the given `mean` and `stdev`.\n        mean : The images will be normalized with the given mean if `use_normalization` is True (default 0.5).\n        stdev : The images will be normalized with the given standard deviation if `use_normalization` is True\n                (default 0.25).\n        height: If it's a non-negative integer and `width` is also non-negative integer, the image returned from the\n                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.\n        width: If it's a non-negative integer and `height` is also non-negative integer, the image returned from the\n                environment will be rescaled to have `height` rows and `width` columns using bilinear sampling.\n        uuid: The universally unique identifier for the sensor.\n        output_shape: Optional observation space shape (alternative to `output_channels`).\n        output_channels: Optional observation space number of channels (alternative to `output_shape`).\n        unnormalized_infimum: Lower limit(s) for the observation space range.\n        unnormalized_supremum: Upper limit(s) for the observation space range.\n        scale_first: Whether to scale image before normalization (if needed).\n        kwargs : Extra kwargs. Currently unused.\n        \"\"\"\n\n        if not use_normalization:\n            mean, stdev = None, None\n\n        if isinstance(mean, float):\n            mean = np.array(mean, dtype=np.float32).reshape(1, 1)\n        if isinstance(stdev, float):\n            stdev = np.array(stdev, dtype=np.float32).reshape(1, 1)\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(  # type: ignore\n        self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any\n    ) -> Any:\n        depth = super().get_observation(env, task, *args, **kwargs)\n        depth = np.expand_dims(depth, 2)\n\n        return depth\n"
  },
  {
    "path": "allenact/embodiedai/storage/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/embodiedai/storage/vdr_storage.py",
    "content": "import math\nimport random\nfrom collections import defaultdict\nfrom typing import Union, Tuple, Optional, Dict, Callable, cast, Sequence\n\nimport torch\nimport torch.nn.functional as F\n\nfrom allenact.algorithms.onpolicy_sync.policy import ObservationType\nfrom allenact.algorithms.onpolicy_sync.storage import (\n    MiniBatchStorageMixin,\n    ExperienceStorage,\n)\nfrom allenact.base_abstractions.misc import (\n    GenericAbstractLoss,\n    ModelType,\n    Memory,\n    LossOutput,\n)\nfrom allenact.utils.misc_utils import unzip, partition_sequence\n\n\ndef _index_recursive(d: Dict, key: Union[str, Tuple[str, ...]]):\n    if isinstance(key, str):\n        return d[key]\n    for k in key:\n        d = d[k]\n    return d\n\n\nclass InverseDynamicsVDRLoss(GenericAbstractLoss):\n    def __init__(\n        self,\n        compute_action_logits_fn: Callable,\n        img0_key: str,\n        img1_key: str,\n        action_key: str,\n    ):\n        self.compute_action_logits_fn = compute_action_logits_fn\n        self.img0_key = img0_key\n        self.img1_key = img1_key\n        self.action_key = action_key\n\n    def loss(\n        self,\n        *,\n        model: ModelType,\n        batch: ObservationType,\n        batch_memory: Memory,\n        stream_memory: Memory,\n    ) -> LossOutput:\n        action_logits = self.compute_action_logits_fn(\n            model=model,\n            img0=batch[self.img0_key],\n            img1=batch[self.img1_key],\n        )\n        loss = F.cross_entropy(action_logits, target=batch[self.action_key])\n        return LossOutput(\n            value=loss,\n            info={\"cross_entropy\": loss.item()},\n            per_epoch_info={},\n            batch_memory=batch_memory,\n            stream_memory=stream_memory,\n            bsize=int(batch[self.img0_key].shape[0]),\n        )\n\n\nclass DiscreteVisualDynamicsReplayStorage(ExperienceStorage, MiniBatchStorageMixin):\n    def __init__(\n        self,\n        image_uuid: Union[str, Tuple[str, ...]],\n        action_success_uuid: Optional[Union[str, Tuple[str, ...]]],\n        nactions: int,\n        num_to_store_per_action: int,\n        max_to_save_per_episode: int,\n        target_batch_size: int,\n        extra_targets: Optional[Sequence] = None,\n    ):\n        self.image_uuid = image_uuid\n        self.action_success_uuid = action_success_uuid\n        self.nactions = nactions\n        self.num_to_store_per_action = num_to_store_per_action\n        self.max_to_save_per_episode = max_to_save_per_episode\n        self.target_batch_size = target_batch_size\n        self.extra_targets = extra_targets if extra_targets is not None else []\n\n        self._prev_imgs: Optional[torch.Tensor] = None\n\n        self.action_to_saved_transitions = {i: [] for i in range(nactions)}\n        self.action_to_num_seen = {i: 0 for i in range(nactions)}\n        self.task_sampler_to_actions_already_sampled = defaultdict(lambda: set())\n\n        self.device = torch.device(\"cpu\")\n\n        self._total_samples_returned_in_batches = 0\n\n    @property\n    def total_experiences(self):\n        return self._total_samples_returned_in_batches\n\n    def set_partition(self, index: int, num_parts: int):\n        self.num_to_store_per_action = math.ceil(\n            self.num_to_store_per_action / num_parts\n        )\n        self.target_batch_size = math.ceil(self.target_batch_size / num_parts)\n\n    def initialize(self, *, observations: ObservationType, **kwargs):\n        self._prev_imgs = None\n        self.add(observations=observations, actions=None, masks=None)\n\n    def batched_experience_generator(self, num_mini_batch: int):\n        triples = [\n            (i0, a, i1)\n            for a, v in self.action_to_saved_transitions.items()\n            for (i0, i1) in v\n        ]\n        random.shuffle(triples)\n\n        if len(triples) == 0:\n            return\n\n        parts = partition_sequence(\n            triples, math.ceil(len(triples) / self.target_batch_size)\n        )\n        for part in parts:\n            img0s, actions, img1s = unzip(part, n=3)\n\n            img0 = torch.stack([i0.to(self.device) for i0 in img0s], 0)\n            action = torch.tensor(actions, device=self.device)\n            img1 = torch.stack([i1.to(self.device) for i1 in img1s], 0)\n\n            self._total_samples_returned_in_batches += img0.shape[0]\n            yield {\"img0\": img0, \"action\": action, \"img1\": img1}\n\n    def add(\n        self,\n        *,\n        observations: ObservationType,\n        actions: Optional[torch.Tensor],\n        masks: Optional[torch.Tensor],\n        **kwargs,\n    ):\n        cur_imgs = cast(\n            torch.Tensor, _index_recursive(d=observations, key=self.image_uuid).cpu()\n        )\n\n        if self._prev_imgs is not None:\n            actions = actions.view(-1).cpu().numpy()\n            masks = masks.view(-1).cpu().numpy()\n\n            if self.action_success_uuid is not None:\n                action_successes = (\n                    observations[self.action_success_uuid].cpu().view(-1).numpy()\n                )\n            else:\n                action_successes = [True] * actions.shape[0]\n\n            extra = {}\n            for et in self.extra_targets:\n                extra[et] = observations[et][0].cpu().numpy()\n\n            nsamplers = actions.shape[0]\n            assert nsamplers == masks.shape[0]\n\n            for i, (a, m, action_success) in enumerate(\n                zip(actions, masks, action_successes)\n            ):\n                actions_already_sampled_in_ep = (\n                    self.task_sampler_to_actions_already_sampled[i]\n                )\n\n                if (\n                    m != 0\n                    and action_success\n                    and (\n                        len(actions_already_sampled_in_ep)\n                        <= self.max_to_save_per_episode\n                    )\n                    and a not in actions_already_sampled_in_ep\n                ):  # Not the start of a new episode/task -> self._prev_imgs[i] corresponds to cur_imgs[i]\n                    saved_transitions = self.action_to_saved_transitions[a]\n\n                    if len(saved_transitions) < self.num_to_store_per_action:\n                        saved_transitions.append((self._prev_imgs[i], cur_imgs[i]))\n                    else:\n                        saved_transitions[\n                            random.randint(0, len(saved_transitions) - 1)\n                        ] = (\n                            self._prev_imgs[i],\n                            cur_imgs[i],\n                        )\n\n                    # Reservoir sampling transitions\n                    # a = int(a)\n                    # saved_transitions = self.action_to_saved_transitions[a]\n                    # num_seen = self.action_to_num_seen[a]\n                    # if num_seen < self.triples_to_save_per_action:\n                    #     saved_transitions.append((self._prev_imgs[i], cur_imgs[i]))\n                    # else:\n                    #     index = random.randint(0, num_seen)\n                    #     if index < self.triples_to_save_per_action:\n                    #         saved_transitions[index] = (self._prev_imgs[i], cur_imgs[i])\n\n                    actions_already_sampled_in_ep.add(a)\n                    self.action_to_num_seen[a] += 1\n                else:\n                    actions_already_sampled_in_ep.clear()\n\n        self._prev_imgs = cur_imgs\n\n    def before_updates(self, **kwargs):\n        pass\n\n    def after_updates(self, **kwargs):\n        pass\n\n    def to(self, device: torch.device):\n        self.device = device\n"
  },
  {
    "path": "allenact/main.py",
    "content": "\"\"\"Entry point to training/validating/testing for a user given experiment\nname.\"\"\"\n\nimport os\n\nif \"CUDA_DEVICE_ORDER\" not in os.environ:\n    # Necessary to order GPUs correctly in some cases\n    os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\n\nimport argparse\nimport ast\nimport importlib\nimport inspect\nimport json\nfrom typing import Dict, List, Optional, Tuple, Type\n\nfrom setproctitle import setproctitle as ptitle\n\nfrom allenact import __version__\nfrom allenact.algorithms.onpolicy_sync.runner import (\n    CONFIG_KWARGS_STR,\n    OnPolicyRunner,\n    SaveDirFormat,\n)\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig\nfrom allenact.utils.system import HUMAN_LOG_LEVELS, get_logger, init_logging\n\n\ndef get_argument_parser():\n    \"\"\"Creates the argument parser.\"\"\"\n\n    # noinspection PyTypeChecker\n    parser = argparse.ArgumentParser(\n        description=\"allenact\",\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n    )\n\n    parser.add_argument(\n        \"experiment\",\n        type=str,\n        help=\"the path to experiment config file relative the 'experiment_base' directory\"\n        \" (see the `--experiment_base` flag).\",\n    )\n\n    parser.add_argument(\n        \"--eval\",\n        dest=\"eval\",\n        action=\"store_true\",\n        required=False,\n        help=\"if you pass the `--eval` flag, AllenAct will run inference on your experiment configuration.\"\n        \" You will need to specify which experiment checkpoints to run evaluation using the `--checkpoint`\"\n        \" flag.\",\n    )\n    parser.set_defaults(eval=False)\n\n    parser.add_argument(\n        \"--config_kwargs\",\n        type=str,\n        default=None,\n        required=False,\n        help=\"sometimes it is useful to be able to pass additional key-word arguments\"\n        \" to `__init__` when initializing an experiment configuration. This flag can be used\"\n        \" to pass such key-word arugments by specifying them with json, e.g.\"\n        '\\n\\t--config_kwargs \\'{\"gpu_id\": 0, \"my_important_variable\": [1,2,3]}\\''\n        \"\\nTo see which arguments are supported for your experiment see the experiment\"\n        \" config's `__init__` function. If the value passed to this function is a file path\"\n        \" then we will try to load this file path as a json object and use this json object\"\n        \" as key-word arguments.\",\n    )\n\n    parser.add_argument(\n        \"--extra_tag\",\n        type=str,\n        default=\"\",\n        required=False,\n        help=\"Add an extra tag to the experiment when trying out new ideas (will be used\"\n        \" as a subdirectory of the tensorboard path so you will be able to\"\n        \" search tensorboard logs using this extra tag). This can also be used to add an extra\"\n        \" organization when running evaluation (e.g. `--extra_tag running_eval_on_great_idea_12`)\",\n    )\n\n    parser.add_argument(\n        \"-o\",\n        \"--output_dir\",\n        required=False,\n        type=str,\n        default=\"experiment_output\",\n        help=\"experiment output folder\",\n    )\n\n    parser.add_argument(\n        \"--save_dir_fmt\",\n        required=False,\n        type=lambda s: SaveDirFormat[s.upper()],\n        default=\"flat\",\n        help=\"The file structure to use when saving results from allenact.\"\n        \" See documentation o f`SaveDirFormat` for more details.\"\n        \" Allowed values are ('flat' and 'nested'). Default: 'flat'.\",\n    )\n\n    parser.add_argument(\n        \"-s\",\n        \"--seed\",\n        required=False,\n        default=None,\n        type=int,\n        help=\"random seed\",\n    )\n    parser.add_argument(\n        \"-b\",\n        \"--experiment_base\",\n        required=False,\n        default=os.getcwd(),\n        type=str,\n        help=\"experiment configuration base folder (default: working directory)\",\n    )\n    parser.add_argument(\n        \"-c\",\n        \"--checkpoint\",\n        required=False,\n        default=None,\n        type=str,\n        help=\"optional checkpoint file name to resume training on or run testing with. When testing (see the `--eval` flag) this\"\n        \" argument can be used very flexibly as:\"\n        \"\\n(1) the path to a particular individual checkpoint file,\"\n        \"\\n(2) the path to a directory of checkpoint files all of which you'd like to be evaluated\"\n        \" (checkpoints are expected to have a `.pt` file extension),\"\n        '\\n(3) a \"glob\" pattern (https://tldp.org/LDP/abs/html/globbingref.html) that will be expanded'\n        \" using python's `glob.glob` function and should return a collection of checkpoint files.\"\n        \"\\nIf you'd like to only evaluate a subset of the checkpoints specified by the above directory/glob\"\n        \" (e.g. every checkpoint saved after 5mil steps) you'll likely want to use the `--approx_ckpt_step_interval`\"\n        \" flag.\",\n    )\n    parser.add_argument(\n        \"--infer_output_dir\",\n        dest=\"infer_output_dir\",\n        action=\"store_true\",\n        required=False,\n        help=\"applied when evaluating checkpoint(s) in nested save_dir_fmt: if specified, the output dir will be inferred from checkpoint path.\",\n    )\n    parser.add_argument(\n        \"--approx_ckpt_step_interval\",\n        required=False,\n        default=None,\n        type=float,\n        help=\"if running tests on a collection of checkpoints (see the `--checkpoint` flag) this argument can be\"\n        \" used to skip checkpoints. In particular, if this value is specified and equals `n` then we will\"\n        \" only evaluate checkpoints whose step count is closest to each of `0*n`, `1*n`, `2*n`, `3*n`, ... \"\n        \" n * ceil(max training steps in ckpts / n). Note that 'closest to' is important here as AllenAct does\"\n        \" not generally save checkpoints at exact intervals (doing so would result in performance degregation\"\n        \" in distributed training).\",\n    )\n    parser.add_argument(\n        \"-r\",\n        \"--restart_pipeline\",\n        dest=\"restart_pipeline\",\n        action=\"store_true\",\n        required=False,\n        help=\"for training, if checkpoint is specified, DO NOT continue the training pipeline from where\"\n        \" training had previously ended. Instead restart the training pipeline from scratch but\"\n        \" with the model weights from the checkpoint.\",\n    )\n    parser.set_defaults(restart_pipeline=False)\n\n    parser.add_argument(\n        \"-d\",\n        \"--deterministic_cudnn\",\n        dest=\"deterministic_cudnn\",\n        action=\"store_true\",\n        required=False,\n        help=\"sets CuDNN to deterministic mode\",\n    )\n    parser.set_defaults(deterministic_cudnn=False)\n\n    parser.add_argument(\n        \"-m\",\n        \"--max_sampler_processes_per_worker\",\n        required=False,\n        default=None,\n        type=int,\n        help=\"maximal number of sampler processes to spawn for each worker\",\n    )\n\n    parser.add_argument(\n        \"-e\",\n        \"--deterministic_agents\",\n        dest=\"deterministic_agents\",\n        action=\"store_true\",\n        required=False,\n        help=\"enable deterministic agents (i.e. always taking the mode action) during validation/testing\",\n    )\n    parser.set_defaults(deterministic_agents=False)\n\n    parser.add_argument(\n        \"-l\",\n        \"--log_level\",\n        default=\"info\",\n        type=str,\n        required=False,\n        help=\"sets the log_level. it must be one of {}.\".format(\n            \", \".join(HUMAN_LOG_LEVELS)\n        ),\n    )\n\n    parser.add_argument(\n        \"-i\",\n        \"--disable_tensorboard\",\n        dest=\"disable_tensorboard\",\n        action=\"store_true\",\n        required=False,\n        help=\"disable tensorboard logging\",\n    )\n    parser.set_defaults(disable_tensorboard=False)\n\n    parser.add_argument(\n        \"-a\",\n        \"--disable_config_saving\",\n        dest=\"disable_config_saving\",\n        action=\"store_true\",\n        required=False,\n        help=\"disable saving the used config in the output directory\",\n    )\n    parser.set_defaults(disable_config_saving=False)\n\n    parser.add_argument(\n        \"--collect_valid_results\",\n        dest=\"collect_valid_results\",\n        action=\"store_true\",\n        required=False,\n        help=\"enables returning and saving valid results during training\",\n    )\n    parser.set_defaults(collect_valid_results=False)\n\n    parser.add_argument(\n        \"--valid_on_initial_weights\",\n        dest=\"valid_on_initial_weights\",\n        action=\"store_true\",\n        required=False,\n        help=\"enables running validation on the model with initial weights\",\n    )\n    parser.set_defaults(valid_on_initial_weights=False)\n\n    parser.add_argument(\n        \"--test_expert\",\n        dest=\"test_expert\",\n        action=\"store_true\",\n        required=False,\n        help=\"use expert during test\",\n    )\n    parser.set_defaults(test_expert=False)\n\n    parser.add_argument(\n        \"--version\", action=\"version\", version=f\"allenact {__version__}\"\n    )\n\n    parser.add_argument(\n        \"--distributed_ip_and_port\",\n        dest=\"distributed_ip_and_port\",\n        required=False,\n        type=str,\n        default=\"127.0.0.1:0\",\n        help=\"IP address and port of listener for distributed process with rank 0.\"\n        \" Port number 0 lets runner choose a free port. For more details, please follow the\"\n        \" tutorial https://allenact.org/tutorials/distributed-objectnav-tutorial/.\",\n    )\n\n    parser.add_argument(\n        \"--machine_id\",\n        dest=\"machine_id\",\n        required=False,\n        type=int,\n        default=0,\n        help=\"ID for machine in distributed runs. For more details, please follow the\"\n        \" tutorial https://allenact.org/tutorials/distributed-objectnav-tutorial/\",\n    )\n\n    parser.add_argument(\n        \"--save_ckpt_at_every_host\",\n        dest=\"save_ckpt_at_every_host\",\n        action=\"store_true\",\n        required=False,\n        help=\"if you pass the `--save_ckpt_at_every_host` flag, AllenAct will save checkpoints at every host as the\"\n        \" the training progresses in distributed training mode.\",\n    )\n    parser.set_defaults(save_ckpt_at_every_host=False)\n\n    parser.add_argument(\n        \"--callbacks\",\n        dest=\"callbacks\",\n        required=False,\n        type=str,\n        default=\"\",\n        help=\"Comma-separated list of files with Callback classes to use.\",\n    )\n\n    parser.add_argument(\n        \"--enable_crash_recovery\",\n        dest=\"enable_crash_recovery\",\n        default=False,\n        action=\"store_true\",\n        required=False,\n        help=\"Whether or not to try recovering when a task crashes (use at your own risk).\",\n    )\n\n    ### DEPRECATED FLAGS\n    parser.add_argument(\n        \"-t\",\n        \"--test_date\",\n        default=None,\n        type=str,\n        required=False,\n        help=\"`--test_date` has been deprecated. Please use `--eval` instead.\",\n    )\n    parser.add_argument(\n        \"--approx_ckpt_steps_count\",\n        required=False,\n        default=None,\n        type=float,\n        help=\"`--approx_ckpt_steps_count` has been deprecated.\"\n        \" Please specify the checkpoint directly using the '--checkpoint' flag.\",\n    )\n    parser.add_argument(\n        \"-k\",\n        \"--skip_checkpoints\",\n        required=False,\n        default=0,\n        type=int,\n        help=\"`--skip_checkpoints` has been deprecated. Please use `--approx_ckpt_steps_count` instead.\",\n    )\n    ### END DEPRECATED FLAGS\n\n    return parser\n\n\ndef get_args():\n    \"\"\"Creates the argument parser and parses any input arguments.\"\"\"\n\n    parser = get_argument_parser()\n    args = parser.parse_args()\n\n    # check for deprecated\n    deprecated_flags = [\"test_date\", \"skip_checkpoints\", \"approx_ckpt_steps_count\"]\n    for df in deprecated_flags:\n        df_info = parser._option_string_actions[f\"--{df}\"]\n        if getattr(args, df) is not df_info.default:\n            raise RuntimeError(df_info.help)\n\n    return args\n\n\ndef _config_source(config_type: Type) -> Dict[str, str]:\n    if config_type is ExperimentConfig:\n        return {}\n\n    try:\n        module_file_path = inspect.getfile(config_type)\n        module_dot_path = config_type.__module__\n        sources_dict = {module_file_path: module_dot_path}\n        for super_type in config_type.__bases__:\n            sources_dict.update(_config_source(super_type))\n\n        return sources_dict\n    except TypeError as _:\n        return {}\n\n\ndef find_sub_modules(path: str, module_list: Optional[List] = None):\n    if module_list is None:\n        module_list = []\n\n    path = os.path.abspath(path)\n    if path[-3:] == \".py\":\n        module_list.append(path)\n    elif os.path.isdir(path):\n        contents = os.listdir(path)\n        if any(key in contents for key in [\"__init__.py\", \"setup.py\"]):\n            new_paths = [os.path.join(path, f) for f in os.listdir(path)]\n            for new_path in new_paths:\n                find_sub_modules(new_path, module_list)\n    return module_list\n\n\ndef load_config(args) -> Tuple[ExperimentConfig, Dict[str, str]]:\n    assert os.path.exists(\n        args.experiment_base\n    ), \"The path '{}' does not seem to exist (your current working directory is '{}').\".format(\n        args.experiment_base, os.getcwd()\n    )\n    rel_base_dir = os.path.relpath(  # Normalizing string representation of path\n        os.path.abspath(args.experiment_base), os.getcwd()\n    )\n    rel_base_dot_path = rel_base_dir.replace(\"/\", \".\")\n    if rel_base_dot_path == \".\":\n        rel_base_dot_path = \"\"\n\n    exp_dot_path = args.experiment\n    if exp_dot_path[-3:] == \".py\":\n        exp_dot_path = exp_dot_path[:-3]\n    exp_dot_path = exp_dot_path.replace(\"/\", \".\")\n\n    module_path = (\n        f\"{rel_base_dot_path}.{exp_dot_path}\"\n        if len(rel_base_dot_path) != 0\n        else exp_dot_path\n    )\n\n    try:\n        importlib.invalidate_caches()\n        module = importlib.import_module(module_path)\n    except ModuleNotFoundError as e:\n        if not any(isinstance(arg, str) and module_path in arg for arg in e.args):\n            raise e\n        all_sub_modules = set(find_sub_modules(os.getcwd()))\n        desired_config_name = module_path.split(\".\")[-1]\n        relevant_submodules = [\n            sm for sm in all_sub_modules if desired_config_name in os.path.basename(sm)\n        ]\n        raise ModuleNotFoundError(\n            f\"Could not import experiment '{module_path}', are you sure this is the right path?\"\n            f\" Possibly relevant files include {relevant_submodules}.\"\n            f\" Note that the experiment must be reachable along your `PYTHONPATH`, it might\"\n            f\" be helpful for you to run `export PYTHONPATH=$PYTHONPATH:$PWD` in your\"\n            f\" project's top level directory.\"\n        ) from e\n\n    experiments = [\n        m[1]\n        for m in inspect.getmembers(module, inspect.isclass)\n        if m[1].__module__ == module.__name__ and issubclass(m[1], ExperimentConfig)\n    ]\n    assert (\n        len(experiments) == 1\n    ), \"Too many or two few experiments defined in {}\".format(module_path)\n\n    config_kwargs = {}\n    if args.config_kwargs is not None:\n        if os.path.exists(args.config_kwargs):\n            with open(args.config_kwargs, \"r\") as f:\n                config_kwargs = json.load(f)\n        else:\n            try:\n                config_kwargs = json.loads(args.config_kwargs)\n            except json.JSONDecodeError:\n                get_logger().warning(\n                    f\"The input for --config_kwargs ('{args.config_kwargs}')\"\n                    f\" does not appear to be valid json. Often this is due to\"\n                    f\" json requiring very specific syntax (e.g. double quoted strings)\"\n                    f\" we'll try to get around this by evaluating with `ast.literal_eval`\"\n                    f\" (a safer version of the standard `eval` function).\"\n                )\n                config_kwargs = ast.literal_eval(args.config_kwargs)\n\n        assert isinstance(\n            config_kwargs, Dict\n        ), \"`--config_kwargs` must be a json string (or a path to a .json file) that evaluates to a dictionary.\"\n\n    config = experiments[0](**config_kwargs)\n    sources = _config_source(config_type=experiments[0])\n    sources[CONFIG_KWARGS_STR] = json.dumps(config_kwargs)\n    return config, sources\n\n\ndef main():\n    args = get_args()\n\n    init_logging(args.log_level)\n\n    get_logger().info(\"Running with args {}\".format(args))\n\n    ptitle(\"Master: {}\".format(\"Training\" if args.eval is None else \"Evaluation\"))\n\n    cfg, srcs = load_config(args)\n\n    if not args.eval:\n        OnPolicyRunner(\n            config=cfg,\n            output_dir=args.output_dir,\n            save_dir_fmt=args.save_dir_fmt,\n            loaded_config_src_files=srcs,\n            seed=args.seed,\n            mode=\"train\",\n            deterministic_cudnn=args.deterministic_cudnn,\n            deterministic_agents=args.deterministic_agents,\n            extra_tag=args.extra_tag,\n            disable_tensorboard=args.disable_tensorboard,\n            disable_config_saving=args.disable_config_saving,\n            distributed_ip_and_port=args.distributed_ip_and_port,\n            machine_id=args.machine_id,\n            callbacks_paths=args.callbacks,\n        ).start_train(\n            checkpoint=args.checkpoint,\n            restart_pipeline=args.restart_pipeline,\n            max_sampler_processes_per_worker=args.max_sampler_processes_per_worker,\n            collect_valid_results=args.collect_valid_results,\n            valid_on_initial_weights=args.valid_on_initial_weights,\n            try_restart_after_task_error=args.enable_crash_recovery,\n            save_ckpt_at_every_host=save_ckpt_at_every_host,\n        )\n    else:\n        OnPolicyRunner(\n            config=cfg,\n            output_dir=args.output_dir,\n            save_dir_fmt=args.save_dir_fmt,\n            loaded_config_src_files=srcs,\n            seed=args.seed,\n            mode=\"test\",\n            deterministic_cudnn=args.deterministic_cudnn,\n            deterministic_agents=args.deterministic_agents,\n            extra_tag=args.extra_tag,\n            disable_tensorboard=args.disable_tensorboard,\n            disable_config_saving=args.disable_config_saving,\n            distributed_ip_and_port=args.distributed_ip_and_port,\n            machine_id=args.machine_id,\n            callbacks_paths=args.callbacks,\n        ).start_test(\n            checkpoint_path_dir_or_pattern=args.checkpoint,\n            infer_output_dir=args.infer_output_dir,\n            approx_ckpt_step_interval=args.approx_ckpt_step_interval,\n            max_sampler_processes_per_worker=args.max_sampler_processes_per_worker,\n            inference_expert=args.test_expert,\n        )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "allenact/setup.py",
    "content": "import os\nfrom pathlib import Path\n\nfrom setuptools import find_packages, setup\n\n\ndef parse_req_file(fname, initial=None):\n    \"\"\"Reads requires.txt file generated by setuptools and outputs a\n    new/updated dict of extras as keys and corresponding lists of dependencies\n    as values.\n\n    The input file's contents are similar to a `ConfigParser` file, e.g.\n    pkg_1\n    pkg_2\n    pkg_3\n\n    [extras1]\n    pkg_4\n    pkg_5\n\n    [extras2]\n    pkg_6\n    pkg_7\n    \"\"\"\n    reqs = {} if initial is None else initial\n    cline = None\n    with open(fname, \"r\") as f:\n        for line in f.readlines():\n            line = line[:-1].strip()\n            if len(line) == 0:\n                continue\n            if line[0] == \"[\":\n                # Add new key for current extras (if missing in dict)\n                cline = line[1:-1].strip()\n                if cline not in reqs:\n                    reqs[cline] = []\n            else:\n                # Only keep dependencies from extras\n                if cline is not None:\n                    reqs[cline].append(line)\n    return reqs\n\n\ndef get_version(fname):\n    \"\"\"Reads PKG-INFO file generated by setuptools and extracts the Version\n    number.\"\"\"\n    res = \"UNK\"\n    with open(fname, \"r\") as f:\n        for line in f.readlines():\n            line = line[:-1]\n            if line.startswith(\"Version:\"):\n                res = line.replace(\"Version:\", \"\").strip()\n                break\n    if res in [\"UNK\", \"\"]:\n        raise ValueError(f\"Missing Version number in {fname}\")\n    return res\n\n\ndef _do_setup():\n    base_dir = os.path.abspath(os.path.dirname(Path(__file__)))\n\n    if not os.path.exists(\n        os.path.join(base_dir, \"allenact.egg-info/dependency_links.txt\")\n    ):\n        # Build mode for sdist\n        os.chdir(os.path.join(base_dir, \"..\"))\n\n        with open(\".VERSION\", \"r\") as f:\n            __version__ = f.readline().strip()\n\n        # Extra dependencies for development (actually unnecessary)\n        extras = {\n            \"dev\": [\n                l.strip()\n                for l in open(\"dev_requirements.txt\", \"r\").readlines()\n                if l.strip() != \"\"\n            ]\n        }\n    else:\n        # Install mode from sdist\n        __version__ = get_version(os.path.join(base_dir, \"allenact.egg-info/PKG-INFO\"))\n        extras = parse_req_file(\n            os.path.join(base_dir, \"allenact.egg-info/requires.txt\")\n        )\n\n    setup(\n        name=\"allenact\",\n        version=__version__,\n        description=\"AllenAct framework\",\n        long_description=(\n            \"AllenAct is a modular and flexible learning framework designed with\"\n            \" a focus on the unique requirements of Embodied-AI research.\"\n        ),\n        classifiers=[\n            \"Intended Audience :: Science/Research\",\n            \"Development Status :: 3 - Alpha\",\n            \"License :: OSI Approved :: MIT License\",\n            \"Topic :: Scientific/Engineering :: Artificial Intelligence\",\n            \"Programming Language :: Python\",\n            \"Programming Language :: Python :: 3.6\",\n            \"Programming Language :: Python :: 3.7\",\n            \"Programming Language :: Python :: 3.8\",\n            \"Programming Language :: Python :: 3.9\",\n            \"Programming Language :: Python :: 3.10\",\n        ],\n        keywords=[\"reinforcement learning\", \"embodied-AI\", \"AI\", \"RL\", \"SLAM\"],\n        url=\"https://github.com/allenai/allenact\",\n        author=\"Allen Institute for Artificial Intelligence\",\n        author_email=\"lucaw@allenai.org\",\n        license=\"MIT\",\n        packages=find_packages(include=[\"allenact\", \"allenact.*\"]),\n        install_requires=[\n            \"gym==0.17.*\",  # Newer versions of gym are now broken with updates to setuptools\n            \"torch>=1.6.0,!=1.8.0\",\n            \"torchvision>=0.7.0,<=0.16.2\",\n            \"tensorboardx>=2.1\",\n            \"setproctitle\",\n            \"moviepy>=1.0.3\",\n            \"filelock\",\n            \"numpy>=1.19.1\",\n            \"Pillow>=8.2.0,<10.3.0\",\n            \"matplotlib>=3.3.1\",\n            \"networkx\",\n            \"opencv-python\",\n            \"wheel>=0.36.2\",\n            \"attrs>=21.4.0\",\n            \"scipy>=1.5.4\",\n        ],\n        setup_requires=[\"pytest-runner\"],\n        tests_require=[\"pytest\", \"pytest-cov\", \"compress_pickle\"],\n        entry_points={\"console_scripts\": [\"allenact=allenact.main:main\"]},\n        extras_require=extras,\n    )\n\n\nif __name__ == \"__main__\":\n    _do_setup()\n"
  },
  {
    "path": "allenact/utils/__init__.py",
    "content": ""
  },
  {
    "path": "allenact/utils/cache_utils.py",
    "content": "import math\nfrom typing import Dict, Any, Union, Callable, Optional\n\nfrom allenact.utils.system import get_logger\n\n\ndef pos_to_str_for_cache(pos: Dict[str, float]) -> str:\n    return \"_\".join([str(pos[\"x\"]), str(pos[\"y\"]), str(pos[\"z\"])])\n\n\ndef str_to_pos_for_cache(s: str) -> Dict[str, float]:\n    split = s.split(\"_\")\n    return {\"x\": float(split[0]), \"y\": float(split[1]), \"z\": float(split[2])}\n\n\ndef get_distance(\n    cache: Dict[str, Any], pos: Dict[str, float], target: Dict[str, float]\n) -> float:\n    pos = {\n        \"x\": 0.25 * math.ceil(pos[\"x\"] / 0.25),\n        \"y\": pos[\"y\"],\n        \"z\": 0.25 * math.ceil(pos[\"z\"] / 0.25),\n    }\n    sp = _get_shortest_path_distance_from_cache(cache, pos, target)\n    if sp == -1.0:\n        pos = {\n            \"x\": 0.25 * math.floor(pos[\"x\"] / 0.25),\n            \"y\": pos[\"y\"],\n            \"z\": 0.25 * math.ceil(pos[\"z\"] / 0.25),\n        }\n        sp = _get_shortest_path_distance_from_cache(cache, pos, target)\n    if sp == -1.0:\n        pos = {\n            \"x\": 0.25 * math.ceil(pos[\"x\"] / 0.25),\n            \"y\": pos[\"y\"],\n            \"z\": 0.25 * math.floor(pos[\"z\"] / 0.25),\n        }\n        sp = _get_shortest_path_distance_from_cache(cache, pos, target)\n    if sp == -1.0:\n        pos = {\n            \"x\": 0.25 * math.floor(pos[\"x\"] / 0.25),\n            \"y\": pos[\"y\"],\n            \"z\": 0.25 * math.floor(pos[\"z\"] / 0.25),\n        }\n        sp = _get_shortest_path_distance_from_cache(cache, pos, target)\n    if sp == -1.0:\n        pos = find_nearest_point_in_cache(cache, pos)\n        sp = _get_shortest_path_distance_from_cache(cache, pos, target)\n    if sp == -1.0:\n        target = find_nearest_point_in_cache(cache, target)\n        sp = _get_shortest_path_distance_from_cache(cache, pos, target)\n    if sp == -1.0:\n        print(\"Your cache is incomplete!\")\n        exit()\n    return sp\n\n\ndef get_distance_to_object(\n    cache: Dict[str, Any], pos: Dict[str, float], target_class: str\n) -> float:\n\n    dists = []\n    weights = []\n    for rounder_func_0 in [math.ceil, math.floor]:\n        for rounder_func_1 in [math.ceil, math.floor]:\n            rounded_pos = {\n                \"x\": 0.25 * rounder_func_0(pos[\"x\"] / 0.25),\n                \"y\": pos[\"y\"],\n                \"z\": 0.25 * rounder_func_1(pos[\"z\"] / 0.25),\n            }\n            dist = _get_shortest_path_distance_to_object_from_cache(\n                cache, rounded_pos, target_class\n            )\n            if dist >= 0:\n                dists.append(dist)\n                weights.append(\n                    1.0\n                    / (\n                        math.sqrt(\n                            (pos[\"x\"] - rounded_pos[\"x\"]) ** 2\n                            + (pos[\"z\"] - rounded_pos[\"z\"]) ** 2\n                        )\n                        + 1e6\n                    )\n                )\n\n    if len(dists) == 0:\n        raise RuntimeError(\"Your cache is incomplete!\")\n\n    total_weight = sum(weights)\n    weights = [w / total_weight for w in weights]\n\n    return sum(d * w for d, w in zip(dists, weights))\n\n\ndef _get_shortest_path_distance_from_cache(\n    cache: Dict[str, Any], position: Dict[str, float], target: Dict[str, float]\n) -> float:\n    try:\n        return cache[pos_to_str_for_cache(position)][pos_to_str_for_cache(target)][\n            \"distance\"\n        ]\n    except KeyError:\n        return -1.0\n\n\ndef _get_shortest_path_distance_to_object_from_cache(\n    cache: Dict[str, Any], position: Dict[str, float], target_class: str\n) -> float:\n    try:\n        return cache[pos_to_str_for_cache(position)][target_class][\"distance\"]\n    except KeyError:\n        return -1.0\n\n\ndef find_nearest_point_in_cache(\n    cache: Dict[str, Any], point: Dict[str, float]\n) -> Dict[str, float]:\n    best_delta = float(\"inf\")\n    closest_point: Dict[str, float] = {}\n    for p in cache:\n        pos = str_to_pos_for_cache(p)\n        delta = (\n            abs(point[\"x\"] - pos[\"x\"])\n            + abs(point[\"y\"] - pos[\"y\"])\n            + abs(point[\"z\"] - pos[\"z\"])\n        )\n        if delta < best_delta:\n            best_delta = delta\n            closest_point = pos\n    return closest_point\n\n\nclass DynamicDistanceCache(object):\n    def __init__(self, rounding: Optional[int] = None):\n        self.cache: Dict[str, Any] = {}\n        self.rounding = rounding\n        self.hits = 0\n        self.misses = 0\n        self.num_accesses = 0\n\n    def find_distance(\n        self,\n        scene_name: str,\n        position: Dict[str, Any],\n        target: Union[Dict[str, Any], str],\n        native_distance_function: Callable[\n            [Dict[str, Any], Union[Dict[str, Any], str]], float\n        ],\n    ) -> float:\n        # Convert the position to its rounded string representation\n        position_str = scene_name + self._pos_to_str(position)\n        # If the target is also a position, convert it to its rounded string representation\n        if isinstance(target, str):\n            target_str = target\n        else:\n            target_str = self._pos_to_str(target)\n\n        if position_str not in self.cache:\n            self.cache[position_str] = {}\n        if target_str not in self.cache[position_str]:\n            self.cache[position_str][target_str] = native_distance_function(\n                position, target\n            )\n            self.misses += 1\n        else:\n            self.hits += 1\n        self.num_accesses += 1\n        if self.num_accesses % 1000 == 0:\n            get_logger().debug(\"Cache Miss-Hit Ratio: %.4f\" % (self.misses / self.hits))\n        return self.cache[position_str][target_str]\n\n    def invalidate(self):\n        self.cache = []\n\n    def _pos_to_str(self, pos: Dict[str, Any]) -> str:\n        if self.rounding:\n            pos = {k: round(v, self.rounding) for k, v in pos.items()}\n        return str(pos)\n"
  },
  {
    "path": "allenact/utils/cacheless_frcnn.py",
    "content": "from typing import List, Any\n\nimport torch\nfrom torchvision.models.detection.backbone_utils import resnet_fpn_backbone\nfrom torchvision.models.detection.faster_rcnn import FasterRCNN\n\n# noinspection PyProtectedMember\nfrom torchvision.models.detection.faster_rcnn import model_urls\nfrom torchvision.models.detection.rpn import AnchorGenerator\nfrom torchvision.models.utils import load_state_dict_from_url\n\n\nclass CachelessAnchorGenerator(AnchorGenerator):\n    def forward(self, image_list: Any, feature_maps: Any):\n        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])\n        image_size = image_list.tensors.shape[-2:]\n        strides = [\n            [int(image_size[0] / g[0]), int(image_size[1] / g[1])] for g in grid_sizes\n        ]\n        dtype, device = feature_maps[0].dtype, feature_maps[0].device\n        self.set_cell_anchors(dtype, device)\n        anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides)\n        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])  # type:ignore\n        for i, (image_height, image_width) in enumerate(image_list.image_sizes):\n            anchors_in_image = []\n            for anchors_per_feature_map in anchors_over_all_feature_maps:\n                anchors_in_image.append(anchors_per_feature_map)\n            anchors.append(anchors_in_image)\n        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]\n\n        return anchors\n\n\ndef fasterrcnn_resnet50_fpn(\n    pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs\n):\n    if pretrained:\n        # no need to download the backbone if pretrained is set\n        pretrained_backbone = False\n    backbone = resnet_fpn_backbone(\"resnet50\", pretrained_backbone)\n\n    anchor_sizes = ((32,), (64,), (128,), (256,), (512,))\n    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)\n    rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios)\n    model = FasterRCNN(\n        backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs\n    )\n\n    # min_size = 300\n    # max_size = 400\n    # anchor_sizes = ((12,), (24,), (48,), (96,), (192,))\n    # aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)\n    # rpn_anchor_generator = CachelessAnchorGenerator(\n    #     anchor_sizes, aspect_ratios\n    # )\n    # model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs)\n\n    if pretrained:\n        state_dict = load_state_dict_from_url(\n            model_urls[\"fasterrcnn_resnet50_fpn_coco\"], progress=progress\n        )\n        model.load_state_dict(state_dict)\n    return model\n"
  },
  {
    "path": "allenact/utils/experiment_utils.py",
    "content": "\"\"\"Utility classes and functions for running and designing experiments.\"\"\"\n\nimport abc\nimport collections.abc\nimport copy\nimport numbers\nimport random\nfrom collections import OrderedDict, defaultdict\nfrom typing import (\n    Callable,\n    NamedTuple,\n    Dict,\n    Any,\n    Union,\n    Iterator,\n    Optional,\n    List,\n    cast,\n    Sequence,\n    TypeVar,\n    Generic,\n    Tuple,\n)\n\nimport attr\nimport numpy as np\nimport torch\nimport torch.optim as optim\nimport wandb\nimport shutil\n\nfrom allenact.algorithms.offpolicy_sync.losses.abstract_offpolicy_loss import Memory\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n)\nfrom allenact.algorithms.onpolicy_sync.storage import (\n    ExperienceStorage,\n    RolloutStorage,\n    RolloutBlockStorage,\n)\nfrom allenact.base_abstractions.misc import Loss, GenericAbstractLoss\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom allenact.utils.system import get_logger\n\ntry:\n    # noinspection PyProtectedMember,PyUnresolvedReferences\n    from torch.optim.lr_scheduler import _LRScheduler\nexcept (ImportError, ModuleNotFoundError):\n    raise ImportError(\"`_LRScheduler` was not found in `torch.optim.lr_scheduler`\")\n\n_DEFAULT_ONPOLICY_UUID = \"onpolicy\"\n\n\ndef evenly_distribute_count_into_bins(count: int, nbins: int) -> List[int]:\n    \"\"\"Distribute a count into a number of bins.\n\n    # Parameters\n    count: A positive integer to be distributed, should be `>= nbins`.\n    nbins: The number of bins.\n\n    # Returns\n    A list of positive integers which sum to `count`. These values will be\n    as close to equal as possible (may differ by at most 1).\n    \"\"\"\n    assert count >= nbins, f\"count ({count}) < nbins ({nbins})\"\n    res = [0] * nbins\n    for it in range(count):\n        res[it % nbins] += 1\n    return res\n\n\ndef recursive_update(\n    original: Union[Dict, collections.abc.MutableMapping],\n    update: Union[Dict, collections.abc.MutableMapping],\n):\n    \"\"\"Recursively updates original dictionary with entries form update dict.\n\n    # Parameters\n\n    original : Original dictionary to be updated.\n    update : Dictionary with additional or replacement entries.\n\n    # Returns\n\n    Updated original dictionary.\n    \"\"\"\n    for k, v in update.items():\n        if isinstance(v, collections.abc.MutableMapping):\n            original[k] = recursive_update(original.get(k, {}), v)\n        else:\n            original[k] = v\n    return original\n\n\nToBuildType = TypeVar(\"ToBuildType\")\n\n\nclass Builder(tuple, Generic[ToBuildType]):\n    \"\"\"Used to instantiate a given class with (default) parameters.\n\n    Helper class that stores a class, default parameters for that\n    class, and key word arguments that (possibly) overwrite the defaults.\n    When calling this an object of the Builder class it generates\n    a class of type `class_type` with parameters specified by\n    the attributes `default` and `kwargs` (and possibly additional, overwriting,\n    keyword arguments).\n\n    # Attributes\n\n    class_type : The class to be instantiated when calling the object.\n    kwargs : Keyword arguments used to instantiate an object of type `class_type`.\n    default : Default parameters used when instantiating the class.\n    \"\"\"\n\n    class_type: ToBuildType\n    kwargs: Dict[str, Any]\n    default: Dict[str, Any]\n\n    # noinspection PyTypeChecker\n    def __new__(\n        cls,\n        class_type: ToBuildType,\n        kwargs: Optional[Dict[str, Any]] = None,\n        default: Optional[Dict[str, Any]] = None,\n    ):\n        \"\"\"Create a new Builder.\n\n        For parameter descriptions see the class documentation. Note\n        that `kwargs` and `default` can be None in which case they are\n        set to be empty dictionaries.\n        \"\"\"\n        self = tuple.__new__(\n            cls,\n            (\n                class_type,\n                kwargs if kwargs is not None else {},\n                default if default is not None else {},\n            ),\n        )\n        self.class_type = class_type\n        self.kwargs = self[1]\n        self.default = self[2]\n        return self\n\n    def __repr__(self) -> str:\n        return (\n            f\"Group(class_type={self.class_type},\"\n            f\" kwargs={self.kwargs},\"\n            f\" default={self.default})\"\n        )\n\n    def __call__(self, **kwargs) -> ToBuildType:\n        \"\"\"Build and return a new class.\n\n        # Parameters\n        kwargs : additional keyword arguments to use when instantiating\n            the object. These overwrite all arguments already in the `self.kwargs`\n            and `self.default` attributes.\n\n        # Returns\n\n        Class of type `self.class_type` with parameters\n        taken from `self.default`, `self.kwargs`, and\n        any keyword arguments additionally passed to `__call__`.\n        \"\"\"\n        allkwargs = copy.deepcopy(self.default)\n        recursive_update(allkwargs, self.kwargs)\n        recursive_update(allkwargs, kwargs)\n        return cast(Callable, self.class_type)(**allkwargs)\n\n\nclass ScalarMeanTracker(object):\n    \"\"\"Track a collection `scalar key -> mean` pairs.\"\"\"\n\n    def __init__(self) -> None:\n        self._sums: Dict[str, float] = OrderedDict()\n        self._counts: Dict[str, int] = OrderedDict()\n\n    def add_scalars(\n        self, scalars: Dict[str, Union[float, int]], n: Union[int, Dict[str, int]] = 1\n    ) -> None:\n        \"\"\"Add additional scalars to track.\n\n        # Parameters\n\n        scalars : A dictionary of `scalar key -> value` pairs.\n        \"\"\"\n        ndict = cast(\n            Dict[str, int], (n if isinstance(n, Dict) else defaultdict(lambda: n))  # type: ignore\n        )\n\n        for k in scalars:\n            if k not in self._sums:\n                self._sums[k] = ndict[k] * scalars[k]\n                self._counts[k] = ndict[k]\n            else:\n                self._sums[k] += ndict[k] * scalars[k]\n                self._counts[k] += ndict[k]\n\n    def pop_and_reset(self) -> Dict[str, float]:\n        \"\"\"Return tracked means and reset.\n\n        On resetting all previously tracked values are discarded.\n\n        # Returns\n\n        A dictionary of `scalar key -> current mean` pairs corresponding to those\n        values added with `add_scalars`.\n        \"\"\"\n        means = OrderedDict(\n            [(k, float(self._sums[k] / self._counts[k])) for k in self._sums]\n        )\n        self.reset()\n        return means\n\n    def reset(self):\n        self._sums = OrderedDict()\n        self._counts = OrderedDict()\n\n    def sums(self):\n        return copy.copy(self._sums)\n\n    def counts(self) -> Dict[str, int]:\n        return copy.copy(self._counts)\n\n    def means(self) -> Dict[str, float]:\n        return OrderedDict(\n            [(k, float(self._sums[k] / self._counts[k])) for k in self._sums]\n        )\n\n    @property\n    def empty(self):\n        assert len(self._sums) == len(\n            self._counts\n        ), \"Mismatched length of _sums {} and _counts {}\".format(\n            len(self._sums), len(self._counts)\n        )\n        return len(self._sums) == 0\n\n\nclass LoggingPackage:\n    \"\"\"Data package used for logging.\"\"\"\n\n    def __init__(\n        self,\n        mode: str,\n        training_steps: Optional[int],\n        storage_uuid_to_total_experiences: Dict[str, int],\n        pipeline_stage: Optional[int] = None,\n        checkpoint_file_name: Optional[str] = None,\n    ) -> None:\n        self.mode = mode\n\n        self.training_steps: int = training_steps\n        self.storage_uuid_to_total_experiences: Dict[str, int] = (\n            storage_uuid_to_total_experiences\n        )\n        self.pipeline_stage = pipeline_stage\n\n        self.metrics_tracker = ScalarMeanTracker()\n        self.info_trackers: Dict[Tuple[str, str], ScalarMeanTracker] = {}\n\n        self.metric_dicts: List[Any] = []\n        self.viz_data: Optional[Dict[str, List[Dict[str, Any]]]] = None\n        self.checkpoint_file_name: Optional[str] = checkpoint_file_name\n        self.task_callback_data: List[Any] = []\n\n        self.num_empty_metrics_dicts_added: int = 0\n\n    @property\n    def num_non_empty_metrics_dicts_added(self) -> int:\n        return len(self.metric_dicts)\n\n    @staticmethod\n    def _metrics_dict_is_empty(\n        single_task_metrics_dict: Dict[str, Union[float, int]]\n    ) -> bool:\n        return (\n            len(single_task_metrics_dict) == 0\n            or (\n                len(single_task_metrics_dict) == 1\n                and \"task_info\" in single_task_metrics_dict\n            )\n            or (\n                \"success\" in single_task_metrics_dict\n                and single_task_metrics_dict[\"success\"] is None\n            )\n        )\n\n    def add_metrics_dict(\n        self, single_task_metrics_dict: Dict[str, Union[float, int]]\n    ) -> bool:\n        if self._metrics_dict_is_empty(single_task_metrics_dict):\n            self.num_empty_metrics_dicts_added += 1\n            return False\n\n        self.metric_dicts.append(single_task_metrics_dict)\n        self.metrics_tracker.add_scalars(\n            {k: v for k, v in single_task_metrics_dict.items() if k != \"task_info\"}\n        )\n        return True\n\n    def add_info_dict(\n        self,\n        info_dict: Dict[str, Union[int, float]],\n        n: int,\n        stage_component_uuid: str,\n        storage_uuid: str,\n    ):\n        key = (stage_component_uuid, storage_uuid)\n        if key not in self.info_trackers:\n            self.info_trackers[key] = ScalarMeanTracker()\n\n        assert n >= 0\n        self.info_trackers[key].add_scalars(scalars=info_dict, n=n)\n\n\nclass LinearDecay(object):\n    \"\"\"Linearly decay between two values over some number of steps.\n\n    Obtain the value corresponding to the `i`-th step by calling\n    an instance of this class with the value `i`.\n\n    # Parameters\n\n    steps : The number of steps over which to decay.\n    startp : The starting value.\n    endp : The ending value.\n    \"\"\"\n\n    def __init__(self, steps: int, startp: float = 1.0, endp: float = 0.0) -> None:\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        self.steps = steps\n        self.startp = startp\n        self.endp = endp\n\n    def __call__(self, epoch: int) -> float:\n        \"\"\"Get the decayed value for `epoch` number of steps.\n\n        # Parameters\n\n        epoch : The number of steps.\n\n        # Returns\n\n        Decayed value for `epoch` number of steps.\n        \"\"\"\n        epoch = max(min(epoch, self.steps), 0)\n        return self.startp + (self.endp - self.startp) * (epoch / float(self.steps))\n\n\nclass MultiLinearDecay(object):\n    \"\"\"Container for multiple stages of LinearDecay.\n\n    Obtain the value corresponding to the `i`-th step by calling\n    an instance of this class with the value `i`.\n\n    # Parameters\n\n    stages: List of `LinearDecay` objects to be sequentially applied\n        for the number of steps in each stage.\n    \"\"\"\n\n    def __init__(self, stages: Sequence[LinearDecay]) -> None:\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        self.stages = stages\n        self.steps = np.cumsum([stage.steps for stage in self.stages])\n        self.total_steps = self.steps[-1]\n        self.stage_idx = -1\n        self.min_steps = 0\n        self.max_steps = 0\n        self.stage = None\n\n    def __call__(self, epoch: int) -> float:\n        \"\"\"Get the decayed value factor for `epoch` number of steps.\n\n        # Parameters\n\n        epoch : The number of steps.\n\n        # Returns\n\n        Decayed value for `epoch` number of steps.\n        \"\"\"\n        epoch = max(min(epoch, self.total_steps), 0)\n\n        while epoch >= self.max_steps and self.max_steps < self.total_steps:\n            self.stage_idx += 1\n            assert self.stage_idx < len(self.stages)\n\n            self.min_steps = self.max_steps\n            self.max_steps = self.steps[self.stage_idx]\n            self.stage = self.stages[self.stage_idx]\n\n        return self.stage(epoch - self.min_steps)\n\n\n# noinspection PyTypeHints,PyUnresolvedReferences\ndef set_deterministic_cudnn() -> None:\n    \"\"\"Makes cudnn deterministic.\n\n    This may slow down computations.\n    \"\"\"\n    if torch.cuda.is_available():\n        torch.backends.cudnn.deterministic = True  # type: ignore\n        torch.backends.cudnn.benchmark = False  # type: ignore\n\n\ndef set_seed(seed: Optional[int] = None) -> None:\n    \"\"\"Set seeds for multiple (cpu) sources of randomness.\n\n    Sets seeds for (cpu) `pytorch`, base `random`, and `numpy`.\n\n    # Parameters\n\n    seed : The seed to set. If set to None, keep using the current seed.\n    \"\"\"\n    if seed is None:\n        return\n\n    torch.manual_seed(seed)  # seeds the RNG for all devices (CPU and GPUs)\n    random.seed(seed)\n    np.random.seed(seed)\n\n\nclass EarlyStoppingCriterion(abc.ABC):\n    \"\"\"Abstract class for class who determines if training should stop early in\n    a particular pipeline stage.\"\"\"\n\n    @abc.abstractmethod\n    def __call__(\n        self,\n        stage_steps: int,\n        total_steps: int,\n        training_metrics: ScalarMeanTracker,\n    ) -> bool:\n        \"\"\"Returns `True` if training should be stopped early.\n\n        # Parameters\n\n        stage_steps: Total number of steps taken in the current pipeline stage.\n        total_steps: Total number of steps taken during training so far (includes steps\n            taken in prior pipeline stages).\n        training_metrics: Metrics recovered over some fixed number of steps\n            (see the `metric_accumulate_interval` attribute in the `TrainingPipeline` class)\n            training.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass NeverEarlyStoppingCriterion(EarlyStoppingCriterion):\n    \"\"\"Implementation of `EarlyStoppingCriterion` which never stops early.\"\"\"\n\n    def __call__(\n        self,\n        stage_steps: int,\n        total_steps: int,\n        training_metrics: ScalarMeanTracker,\n    ) -> bool:\n        return False\n\n\nclass OffPolicyPipelineComponent(NamedTuple):\n    \"\"\"An off-policy component for a PipeLineStage.\n\n    # Attributes\n\n    data_iterator_builder: A function to instantiate a Data Iterator (with a __next__(self) method)\n    loss_names: list of unique names assigned to off-policy losses\n    updates: number of off-policy updates between on-policy rollout collections\n    loss_weights : A list of floating point numbers describing the relative weights\n        applied to the losses referenced by `loss_names`. Should be the same length\n        as `loss_names`. If this is `None`, all weights will be assumed to be one.\n    data_iterator_kwargs_generator: Optional generator of keyword arguments for data_iterator_builder (useful for\n        distributed training. It takes\n        a `cur_worker` int value,\n        a `rollouts_per_worker` list of number of samplers per training worker,\n        and an optional random `seed` shared by all workers, which can be None.\n    \"\"\"\n\n    data_iterator_builder: Callable[..., Iterator]\n    loss_names: List[str]\n    updates: int\n    loss_weights: Optional[Sequence[float]] = None\n    data_iterator_kwargs_generator: Callable[\n        [int, Sequence[int], Optional[int]], Dict\n    ] = lambda cur_worker, rollouts_per_worker, seed: {}\n\n\nclass TrainingSettings:\n    \"\"\"Class defining parameters used for training (within a stage or the\n    entire pipeline).\n\n    # Attributes\n\n    num_mini_batch : The number of mini-batches to break a rollout into.\n    update_repeats : The number of times we will cycle through the mini-batches corresponding\n        to a single rollout doing gradient updates.\n    max_grad_norm : The maximum \"inf\" norm of any gradient step (gradients are clipped to not exceed this).\n    num_steps : Total number of steps a single agent takes in a rollout.\n    gamma : Discount factor applied to rewards (should be in [0, 1]).\n    use_gae : Whether or not to use generalized advantage estimation (GAE).\n    gae_lambda : The additional parameter used in GAE.\n    advance_scene_rollout_period: Optional number of rollouts before enforcing an advance scene in all samplers.\n    save_interval : The frequency with which to save (in total agent steps taken). If `None` then *no*\n        checkpoints will be saved. Otherwise, in addition to the checkpoints being saved every\n        `save_interval` steps, a checkpoint will *always* be saved at the end of each pipeline stage.\n        If `save_interval <= 0` then checkpoints will only be saved at the end of each pipeline stage.\n    metric_accumulate_interval : The frequency with which training/validation metrics are accumulated\n        (in total agent steps). Metrics accumulated in an interval are logged (if `should_log` is `True`)\n        and used by the stage's early stopping criterion (if any).\n    \"\"\"\n\n    num_mini_batch: Optional[int]\n    update_repeats: Optional[Union[int, Sequence[int]]]\n    max_grad_norm: Optional[float]\n    num_steps: Optional[int]\n    gamma: Optional[float]\n    use_gae: Optional[bool]\n    gae_lambda: Optional[float]\n    advance_scene_rollout_period: Optional[int]\n    save_interval: Optional[int]\n    metric_accumulate_interval: Optional[int]\n\n    # noinspection PyUnresolvedReferences\n    def __init__(\n        self,\n        num_mini_batch: Optional[int] = None,\n        update_repeats: Optional[int] = None,\n        max_grad_norm: Optional[float] = None,\n        num_steps: Optional[int] = None,\n        gamma: Optional[float] = None,\n        use_gae: Optional[bool] = None,\n        gae_lambda: Optional[float] = None,\n        advance_scene_rollout_period: Optional[int] = None,\n        save_interval: Optional[int] = None,\n        metric_accumulate_interval: Optional[int] = None,\n    ):\n        self._key_to_setting = prepare_locals_for_super(locals(), ignore_kwargs=True)\n        self._training_setting_keys = tuple(sorted(self._key_to_setting.keys()))\n\n        self._defaults: Optional[\"TrainingSettings\"] = None\n\n    def keys(self) -> Tuple[str, ...]:\n        return self._training_setting_keys\n\n    def has_key(self, key: str) -> bool:\n        return key in self._key_to_setting\n\n    def set_defaults(self, defaults: \"TrainingSettings\"):\n        assert self._defaults is None, \"Defaults can only be set once.\"\n        self._defaults = defaults\n\n    def __getattr__(self, item: str):\n        if item in self._key_to_setting:\n            val = self._key_to_setting[item]\n            if val is None and self._defaults is not None:\n                val = getattr(self._defaults, item)\n            return val\n        else:\n            super(TrainingSettings, self).__getattribute__(item)\n\n\n@attr.s(kw_only=True)\nclass StageComponent:\n    \"\"\"A custom component for a PipelineStage, possibly including overrides to\n    the `TrainingSettings` from the `TrainingPipeline` and `PipelineStage`.\n\n    # Attributes\n\n    uuid: the name of this component\n    storage_uuid: the name of the `ExperienceStorage` that will be used with this component.\n    loss_names: list of unique names assigned to off-policy losses\n    training_settings: Instance of `TrainingSettings`\n    loss_weights : A list of floating point numbers describing the relative weights\n        applied to the losses referenced by `loss_names`. Should be the same length\n        as `loss_names`. If this is `None`, all weights will be assumed to be one.\n    \"\"\"\n\n    uuid: str = attr.ib()\n    storage_uuid: str = attr.ib()\n    loss_names: Sequence[str] = attr.ib()\n    training_settings: TrainingSettings = attr.ib(\n        default=attr.Factory(TrainingSettings)\n    )\n\n    @training_settings.validator\n    def _validate_training_settings(self, attribute, value: TrainingSettings):\n        must_be_none = [\n            \"num_steps\",\n            \"gamma\",\n            \"use_gae\",\n            \"gae_lambda\",\n            \"advance_scene_rollout_period\",\n            \"save_interval\",\n            \"metric_accumulate_interval\",\n        ]\n        for key in must_be_none:\n            assert getattr(value, key) is None, (\n                f\"`{key}` must be `None` in `TrainingSettings` passed to\"\n                f\" `StageComponent` (as such values will be ignored). Pass such\"\n                f\" settings to the `PipelineStage` or `TrainingPipeline` objects instead.\",\n            )\n\n\nclass PipelineStage:\n    \"\"\"A single stage in a training pipeline, possibly including overrides to\n    the global `TrainingSettings` in `TrainingPipeline`.\n\n    # Attributes\n\n    loss_name : A collection of unique names assigned to losses. These will\n        reference the `Loss` objects in a `TrainingPipeline` instance.\n    max_stage_steps : Either the total number of steps agents should take in this stage or\n        a Callable object (e.g. a function)\n    loss_weights : A list of floating point numbers describing the relative weights\n        applied to the losses referenced by `loss_name`. Should be the same length\n        as `loss_name`. If this is `None`, all weights will be assumed to be one.\n    teacher_forcing : If applicable, defines the probability an agent will take the\n        expert action (as opposed to its own sampled action) at a given time point.\n    early_stopping_criterion: An `EarlyStoppingCriterion` object which determines if\n        training in this stage should be stopped early. If `None` then no early stopping\n        occurs. If `early_stopping_criterion` is not `None` then we do not guarantee\n        reproducibility when restarting a model from a checkpoint (as the\n        `EarlyStoppingCriterion` object may store internal state which is not\n        saved in the checkpoint). Currently, AllenAct only supports using early stopping\n        criterion when **not** using distributed training.\n    training_settings: Instance of `TrainingSettings`.\n    training_settings_kwargs: For backwards compatability: arguments to instantiate TrainingSettings when\n     `training_settings` is `None`.\n    \"\"\"\n\n    def __init__(\n        self,\n        *,  # Disables positional arguments. Please provide arguments as keyword arguments.\n        max_stage_steps: Union[int, Callable],\n        loss_names: List[str],\n        loss_weights: Optional[Sequence[float]] = None,\n        teacher_forcing: Optional[Callable[[int], float]] = None,\n        stage_components: Optional[Sequence[StageComponent]] = None,\n        early_stopping_criterion: Optional[EarlyStoppingCriterion] = None,\n        training_settings: Optional[TrainingSettings] = None,\n        callback_to_change_engine_attributes: Optional[Dict[str, Any]] = None,\n        **training_settings_kwargs,\n    ):\n        self.callback_to_change_engine_attributes = callback_to_change_engine_attributes\n\n        # Populate TrainingSettings members\n        # THIS MUST COME FIRST IN `__init__` as otherwise `__getattr__` will loop infinitely.\n        assert training_settings is None or len(training_settings_kwargs) == 0\n        if training_settings is None:\n            training_settings = TrainingSettings(**training_settings_kwargs)\n        self.training_settings = training_settings\n        assert self.training_settings.update_repeats is None or isinstance(\n            self.training_settings.update_repeats, numbers.Integral\n        ), (\n            \"`training_settings` passed to `PipelineStage` must have `training_settings.update_repeats`\"\n            \" equal to `None` or an integer. If you'd like to specify per-loss `update_repeats` then please\"\n            \" do so in the training settings of a `StageComponent`.\"\n        )\n\n        self.loss_names = loss_names\n        self.max_stage_steps = max_stage_steps\n\n        self.loss_weights = (\n            [1.0] * len(loss_names) if loss_weights is None else loss_weights\n        )\n        assert len(self.loss_weights) == len(self.loss_names)\n\n        self.teacher_forcing = teacher_forcing\n\n        self.early_stopping_criterion = early_stopping_criterion\n\n        self.steps_taken_in_stage: int = 0\n        self.rollout_count = 0\n        self.early_stopping_criterion_met = False\n\n        self.uuid_to_loss_weight: Dict[str, float] = {\n            loss_uuid: loss_weight\n            for loss_uuid, loss_weight in zip(loss_names, self.loss_weights)\n        }\n\n        self._stage_components: List[StageComponent] = []\n        self.uuid_to_stage_component: Dict[str, StageComponent] = {}\n        self.storage_uuid_to_steps_taken_in_stage: Dict[str, int] = {}\n        self.stage_component_uuid_to_stream_memory: Dict[str, Memory] = {}\n\n        if stage_components is not None:\n            for stage_component in stage_components:\n                self.add_stage_component(stage_component)\n\n        # Sanity check\n        for key in training_settings.keys():\n            assert not hasattr(\n                self, key\n            ), f\"`{key}` should be defined in `TrainingSettings`, not in `PipelineStage`.\"\n\n    def reset(self):\n        self.steps_taken_in_stage: int = 0\n        self.rollout_count = 0\n        self.early_stopping_criterion_met = False\n\n        for k in self.storage_uuid_to_steps_taken_in_stage:\n            self.storage_uuid_to_steps_taken_in_stage[k] = 0\n\n        for memory in self.stage_component_uuid_to_stream_memory.values():\n            memory.clear()\n\n    # TODO: Replace Any with the correct type\n    def change_engine_attributes(self, engine: Any):\n        if self.callback_to_change_engine_attributes is not None:\n            for key, value in self.callback_to_change_engine_attributes.items():\n                # check if the engine has the attribute\n                assert hasattr(engine, key)\n\n                func = value[\"func\"]\n                args = value[\"args\"]\n                setattr(engine, key, func(engine, **args))\n\n    @property\n    def stage_components(self) -> Tuple[StageComponent]:\n        return tuple(self._stage_components)\n\n    def add_stage_component(self, stage_component: StageComponent):\n        assert stage_component.uuid not in self.uuid_to_stage_component\n\n        # Setting default training settings for the `stage_component`\n        sc_ts = stage_component.training_settings\n        sc_ts.set_defaults(self.training_settings)\n\n        # Handling the case where different losses should be updated different\n        # numbers of times\n        stage_update_repeats = self.training_settings.update_repeats\n        if stage_update_repeats is not None and sc_ts.update_repeats is None:\n            loss_to_update_repeats = dict(zip(self.loss_names, stage_update_repeats))\n            if isinstance(stage_update_repeats, Sequence):\n                sc_ts.update_repeats = [\n                    loss_to_update_repeats[uuid] for uuid in stage_component.loss_names\n                ]\n            else:\n                sc_ts.update_repeats = stage_update_repeats\n\n        self._stage_components.append(stage_component)\n        self.uuid_to_stage_component[stage_component.uuid] = stage_component\n\n        if (\n            stage_component.storage_uuid\n            not in self.storage_uuid_to_steps_taken_in_stage\n        ):\n            self.storage_uuid_to_steps_taken_in_stage[stage_component.storage_uuid] = 0\n        else:\n            raise NotImplementedError(\n                \"Cannot have multiple stage components which\"\n                f\" use the same storage (reused storage uuid: '{stage_component.storage_uuid}'.\"\n            )\n\n        self.stage_component_uuid_to_stream_memory[stage_component.uuid] = Memory()\n\n    def __setattr__(self, key: str, value: Any):\n        if key not in [\n            \"training_settings\",\n            \"callback_to_change_engine_attributes\",\n        ] and self.training_settings.has_key(key):\n            raise NotImplementedError(\n                f\"Cannot set {key} in {self.__name__}, update the\"\n                f\" `training_settings` attribute of {self.__name__} instead.\"\n            )\n        else:\n            return super(PipelineStage, self).__setattr__(key, value)\n\n    @property\n    def is_complete(self):\n        return (\n            self.early_stopping_criterion_met\n            or self.steps_taken_in_stage >= self.max_stage_steps\n        )\n\n\nclass TrainingPipeline:\n    \"\"\"Class defining the stages (and global training settings) in a training\n    pipeline.\n\n    The training pipeline can be used as an iterator to go through the pipeline\n    stages in, for instance, a loop.\n\n    # Parameters\n\n    named_losses : Dictionary mapping a the name of a loss to either an instantiation\n        of that loss or a `Builder` that, when called, will return that loss.\n    pipeline_stages : A list of PipelineStages. Each of these define how the agent\n        will be trained and are executed sequentially.\n    optimizer_builder : Builder object to instantiate the optimizer to use during training.\n    named_storages: Map of storage names to corresponding `ExperienceStorage` instances or `Builder` objects.\n        If this is `None` (or does not contain a value of (sub)type `RolloutStorage`) then a new\n        `Builder[RolloutBlockStorage]` will be created and added by default.\n    rollout_storage_uuid: Optional name of `RolloutStorage`, if `None` given, it will be assigned to the\n    `ExperienceStorage`  of subclass `RolloutStorage` in `named_storages`. Note that this assumes that there\n    is only a single `RolloutStorage` object in the values of `named_storages`.\n    should_log: `True` if metrics accumulated during training should be logged to the console as well\n        as to a tensorboard file.\n    lr_scheduler_builder : Optional builder object to instantiate the learning rate scheduler used\n        through the pipeline.\n    training_settings: Instance of `TrainingSettings`\n    training_settings_kwargs: For backwards compatability: arguments to instantiate TrainingSettings when\n        `training_settings` is `None`.\n    \"\"\"\n\n    # noinspection PyUnresolvedReferences\n    def __init__(\n        self,\n        *,\n        named_losses: Dict[str, Union[Loss, Builder[Loss]]],\n        pipeline_stages: List[PipelineStage],\n        optimizer_builder: Builder[optim.Optimizer],  # type: ignore\n        named_storages: Optional[\n            Dict[str, Union[ExperienceStorage, Builder[ExperienceStorage]]]\n        ] = None,\n        rollout_storage_uuid: Optional[str] = None,\n        should_log: bool = True,\n        lr_scheduler_builder: Optional[Builder[_LRScheduler]] = None,  # type: ignore\n        training_settings: Optional[TrainingSettings] = None,\n        valid_pipeline_stage: Optional[PipelineStage] = None,\n        test_pipeline_stage: Optional[PipelineStage] = None,\n        **training_settings_kwargs,\n    ):\n        \"\"\"Initializer.\n\n        See class docstring for parameter definitions.\n        \"\"\"\n\n        # Populate TrainingSettings members\n        assert training_settings is None or len(training_settings_kwargs) == 0\n        if training_settings is None:\n            training_settings = TrainingSettings(**training_settings_kwargs)\n        self.training_settings = training_settings\n\n        assert self.training_settings.update_repeats is None or isinstance(\n            self.training_settings.update_repeats, numbers.Integral\n        ), (\n            \"`training_settings` passed to `TrainingPipeline` must have `training_settings.update_repeats`\"\n            \" equal to `None` or an integer. If you'd like to specify per-loss `update_repeats` then please\"\n            \" do so in the training settings of a `StageComponent`.\"\n        )\n        self.training_settings = training_settings\n\n        self.optimizer_builder = optimizer_builder\n        self.lr_scheduler_builder = lr_scheduler_builder\n\n        self._named_losses = named_losses\n        self._named_storages = self._initialize_named_storages(\n            named_storages=named_storages\n        )\n        self.rollout_storage_uuid = self._initialize_rollout_storage_uuid(\n            rollout_storage_uuid\n        )\n\n        if self.rollout_storage_uuid is None:\n            get_logger().warning(\n                f\"No rollout storage was specified in the TrainingPipeline. This need not be an issue\"\n                f\" if you are performing off-policy training but, otherwise, please ensure you have\"\n                f\" defined a rollout storage in the `named_storages` argument of the TrainingPipeline.\"\n            )\n\n        self.should_log = should_log\n\n        self.pipeline_stages = pipeline_stages\n\n        def if_none_then_empty_stage(stage: Optional[PipelineStage]) -> PipelineStage:\n            return (\n                stage\n                if stage is not None\n                else PipelineStage(max_stage_steps=-1, loss_names=[])\n            )\n\n        self.valid_pipeline_stage = if_none_then_empty_stage(valid_pipeline_stage)\n        self.test_pipeline_stage = if_none_then_empty_stage(test_pipeline_stage)\n\n        assert (\n            len(self.pipeline_stages) == len(set(id(ps) for ps in pipeline_stages))\n            and self.valid_pipeline_stage not in self.pipeline_stages\n            and self.test_pipeline_stage not in self.pipeline_stages\n        ), (\n            \"Duplicate `PipelineStage` object instances found in the pipeline stages input\"\n            \" to `TrainingPipeline`. `PipelineStage` objects are not immutable, if you'd\"\n            \" like to have multiple pipeline stages of the same type, please instantiate\"\n            \" multiple separate instances.\"\n        )\n\n        self._ensure_pipeline_stages_all_have_at_least_one_stage_component()\n\n        self._current_stage: Optional[PipelineStage] = None\n        self.rollout_count = 0\n        self._refresh_current_stage(force_stage_search_from_start=True)\n\n    def _initialize_rollout_storage_uuid(\n        self, rollout_storage_uuid: Optional[str]\n    ) -> str:\n        if rollout_storage_uuid is None:\n            rollout_storage_uuids = self._get_uuids_of_rollout_storages(\n                self._named_storages\n            )\n            assert len(rollout_storage_uuids) <= 1, (\n                f\"`rollout_storage_uuid` cannot be automatically inferred as there are multiple storages defined\"\n                f\" (ids: {rollout_storage_uuids}) of type `RolloutStorage`.\"\n            )\n            rollout_storage_uuid = next(iter(rollout_storage_uuids), None)\n        assert (\n            rollout_storage_uuid is None or rollout_storage_uuid in self._named_storages\n        )\n        return rollout_storage_uuid\n\n    def _ensure_pipeline_stages_all_have_at_least_one_stage_component(self):\n        rollout_storages_uuids = self._get_uuids_of_rollout_storages(\n            self._named_storages\n        )\n\n        named_pipeline_stages = {\n            f\"{i}th\": ps for i, ps in enumerate(self.pipeline_stages)\n        }\n\n        named_pipeline_stages[\"valid\"] = self.valid_pipeline_stage\n        named_pipeline_stages[\"test\"] = self.test_pipeline_stage\n\n        for stage_name, stage in named_pipeline_stages.items():\n            # Forward default `TrainingSettings` to all `PipelineStage`s settings:\n            stage.training_settings.set_defaults(defaults=self.training_settings)\n\n            if len(stage.stage_components) == 0:\n                assert len(rollout_storages_uuids) <= 1, (\n                    f\"In {stage_name} pipeline stage: you have several storages specified ({rollout_storages_uuids}) which\"\n                    f\" are subclasses of `RolloutStorage`. This is only allowed when stage components are explicitly\"\n                    f\" defined in every `PipelineStage` instance. You have `PipelineStage`s for which stage components\"\n                    f\" are not specified.\"\n                )\n                if len(rollout_storages_uuids) > 0:\n                    stage.add_stage_component(\n                        StageComponent(\n                            uuid=rollout_storages_uuids[0],\n                            storage_uuid=rollout_storages_uuids[0],\n                            loss_names=stage.loss_names,\n                            training_settings=TrainingSettings(),\n                        )\n                    )\n\n            for sc in stage.stage_components:\n                assert sc.storage_uuid in self._named_storages, (\n                    f\"In {stage_name} pipeline stage: storage with name '{sc.storage_uuid}' not found in collection of\"\n                    f\" defined storages names: {list(self._named_storages.keys())}\"\n                )\n\n            if (\n                self.rollout_storage_uuid is not None\n                and self.rollout_storage_uuid\n                not in stage.storage_uuid_to_steps_taken_in_stage\n            ):\n                stage.storage_uuid_to_steps_taken_in_stage[\n                    self.rollout_storage_uuid\n                ] = 0\n\n    @classmethod\n    def _get_uuids_of_rollout_storages(\n        cls,\n        named_storages: Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]],\n    ) -> List[str]:\n        return [\n            uuid\n            for uuid, storage in named_storages.items()\n            if isinstance(storage, RolloutStorage)\n            or (\n                isinstance(storage, Builder)\n                and issubclass(storage.class_type, RolloutStorage)\n            )\n        ]\n\n    @classmethod\n    def _initialize_named_storages(\n        cls,\n        named_storages: Optional[\n            Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]]\n        ],\n    ) -> Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]]:\n        named_storages = {} if named_storages is None else {**named_storages}\n\n        rollout_storages_uuids = cls._get_uuids_of_rollout_storages(named_storages)\n        if len(named_storages) == 0:\n            assert (\n                _DEFAULT_ONPOLICY_UUID not in named_storages\n            ), f\"Storage uuid '{_DEFAULT_ONPOLICY_UUID}' is reserved, please pick a different uuid.\"\n            named_storages[_DEFAULT_ONPOLICY_UUID] = Builder(RolloutBlockStorage)\n            rollout_storages_uuids.append(_DEFAULT_ONPOLICY_UUID)\n        return named_storages\n\n    def _refresh_current_stage(\n        self, force_stage_search_from_start: bool = False\n    ) -> Optional[PipelineStage]:\n        if force_stage_search_from_start:\n            self._current_stage = None\n\n        if self._current_stage is None or self._current_stage.is_complete:\n            if self._current_stage is None:\n                start_index = 0\n            else:\n                start_index = self.pipeline_stages.index(self._current_stage) + 1\n\n            self._current_stage = None\n            for ps in self.pipeline_stages[start_index:]:\n                if not ps.is_complete:\n                    self._current_stage = ps\n                    break\n        return self._current_stage\n\n    @property\n    def total_steps(self) -> int:\n        return sum(ps.steps_taken_in_stage for ps in self.pipeline_stages)\n\n    @property\n    def storage_uuid_to_total_experiences(self) -> Dict[str, int]:\n        totals = {k: 0 for k in self._named_storages}\n        for ps in self.pipeline_stages:\n            for k in ps.storage_uuid_to_steps_taken_in_stage:\n                totals[k] += ps.storage_uuid_to_steps_taken_in_stage[k]\n\n        for k in totals:\n            split = k.split(\"__\")\n            if len(split) == 2 and split[1] in [\"valid\", \"test\"]:\n                assert totals[k] == 0, (\n                    \"Total experiences should be 0 for validation/test storages, i.e.\"\n                    \" storages who have `__valid` or `__test` as their suffix. These storages\"\n                    \" will copy their `total_experiences` from the corresponding training\"\n                    \" storage i.e.:\\n\"\n                    \" 1. the storage without the above suffix if it exists, else\\n\"\n                    \" 2. the total number of steps.\"\n                )\n                totals[k] = totals.get(split[0], self.total_steps)\n\n        return totals\n\n    @property\n    def current_stage(self) -> Optional[PipelineStage]:\n        return self._current_stage\n\n    @property\n    def current_stage_index(self) -> Optional[int]:\n        if self.current_stage is None:\n            return None\n        return self.pipeline_stages.index(self.current_stage)\n\n    def before_rollout(self, train_metrics: Optional[ScalarMeanTracker] = None) -> bool:\n        if (\n            train_metrics is not None\n            and self.current_stage.early_stopping_criterion is not None\n        ):\n            self.current_stage.early_stopping_criterion_met = (\n                self.current_stage.early_stopping_criterion(\n                    stage_steps=self.current_stage.steps_taken_in_stage,\n                    total_steps=self.total_steps,\n                    training_metrics=train_metrics,\n                )\n            )\n        if self.current_stage.early_stopping_criterion_met:\n            get_logger().debug(\n                f\"Early stopping criterion met after {self.total_steps} total steps \"\n                f\"({self.current_stage.steps_taken_in_stage} in current stage, stage index {self.current_stage_index}).\"\n            )\n        return self.current_stage is not self._refresh_current_stage(\n            force_stage_search_from_start=False\n        )\n\n    def restart_pipeline(self):\n        for ps in self.pipeline_stages:\n            ps.reset()\n\n        if self.valid_pipeline_stage:\n            self.valid_pipeline_stage.reset()\n\n        if self.test_pipeline_stage:\n            self.test_pipeline_stage.reset()\n\n        self._current_stage = None\n        self._refresh_current_stage(force_stage_search_from_start=True)\n\n    def state_dict(self):\n        return dict(\n            stage_info_list=[\n                {\n                    \"early_stopping_criterion_met\": ps.early_stopping_criterion_met,\n                    \"steps_taken_in_stage\": ps.steps_taken_in_stage,\n                    \"storage_uuid_to_steps_taken_in_stage\": ps.storage_uuid_to_steps_taken_in_stage,\n                    \"rollout_count\": ps.rollout_count,\n                }\n                for ps in self.pipeline_stages\n            ],\n            rollout_count=self.rollout_count,\n        )\n\n    def load_state_dict(self, state_dict: Dict[str, Any]):\n        if \"off_policy_epochs\" in state_dict:\n            get_logger().warning(\n                \"Loaded state dict was saved using an older version of AllenAct.\"\n                \" If you are attempting to restart training for a model that had an off-policy component, be aware\"\n                \" that logging for the off-policy component will not behave as it previously did.\"\n                \" Additionally, while the total step count will remain accurate, step counts\"\n                \" associated with losses will be reset to step 0.\"\n            )\n\n        for ps, stage_info in zip(self.pipeline_stages, state_dict[\"stage_info_list\"]):\n            ps.early_stopping_criterion_met = stage_info[\"early_stopping_criterion_met\"]\n            ps.steps_taken_in_stage = stage_info[\"steps_taken_in_stage\"]\n\n            if \"storage_uuid_to_steps_taken_in_stage\" in stage_info:\n                ps.storage_uuid_to_steps_taken_in_stage = stage_info[\n                    \"storage_uuid_to_steps_taken_in_stage\"\n                ]\n                ps.rollout_count = stage_info[\"rollout_count\"]\n\n        self.rollout_count = state_dict[\"rollout_count\"]\n\n        self._refresh_current_stage(force_stage_search_from_start=True)\n\n    @property\n    def rollout_storage(self) -> Optional[RolloutStorage]:\n        if self.rollout_storage_uuid is None:\n            return None\n\n        rs = self._named_storages[self.rollout_storage_uuid]\n        if isinstance(rs, Builder):\n            rs = rs()\n            self._named_storages[self.rollout_storage_uuid] = rs\n\n        return cast(RolloutStorage, rs)\n\n    def get_stage_storage(\n        self, stage: PipelineStage\n    ) -> \"OrderedDict[str, ExperienceStorage]\":\n        storage_uuids_for_current_stage_set = set(\n            sc.storage_uuid for sc in stage.stage_components\n        )\n\n        # Always include self.rollout_storage_uuid in the current stage storage (when the uuid is defined)\n        if self.rollout_storage_uuid is not None:\n            storage_uuids_for_current_stage_set.add(self.rollout_storage_uuid)\n\n        storage_uuids_for_current_stage = sorted(\n            list(storage_uuids_for_current_stage_set)\n        )\n\n        for storage_uuid in storage_uuids_for_current_stage:\n            if isinstance(self._named_storages[storage_uuid], Builder):\n                self._named_storages[storage_uuid] = cast(\n                    Builder[\"ExperienceStorage\"],\n                    self._named_storages[storage_uuid],\n                )()\n\n        return OrderedDict(\n            (k, self._named_storages[k]) for k in storage_uuids_for_current_stage\n        )\n\n    @property\n    def current_stage_storage(self) -> \"OrderedDict[str, ExperienceStorage]\":\n        return self.get_stage_storage(self.current_stage)\n\n    def get_loss(self, uuid: str):\n        if isinstance(self._named_losses[uuid], Builder):\n            self._named_losses[uuid] = cast(\n                Builder[Union[\"AbstractActorCriticLoss\", \"GenericAbstractLoss\"]],\n                self._named_losses[uuid],\n            )()\n        return self._named_losses[uuid]\n\n    @property\n    def current_stage_losses(\n        self,\n    ) -> Dict[str, Union[AbstractActorCriticLoss, GenericAbstractLoss]]:\n        for loss_name in self.current_stage.loss_names:\n            if isinstance(self._named_losses[loss_name], Builder):\n                self._named_losses[loss_name] = cast(\n                    Builder[Union[\"AbstractActorCriticLoss\", \"GenericAbstractLoss\"]],\n                    self._named_losses[loss_name],\n                )()\n\n        return {\n            loss_name: cast(\n                Union[AbstractActorCriticLoss, GenericAbstractLoss],\n                self._named_losses[loss_name],\n            )\n            for loss_name in self.current_stage.loss_names\n        }\n\n\ndef download_checkpoint_from_wandb(\n    checkpoint_path_dir_or_pattern, all_ckpt_dir, only_allow_one_ckpt=False\n):\n    api = wandb.Api()\n    run_token = checkpoint_path_dir_or_pattern.split(\"//\")[1]\n    ckpt_steps = checkpoint_path_dir_or_pattern.split(\"//\")[2:]\n    if ckpt_steps[-1] == \"\":\n        ckpt_steps = ckpt_steps[:-1]\n    if not only_allow_one_ckpt:\n        ckpts_paths = []\n        for steps in ckpt_steps:\n            ckpt_fn = \"{}-step-{}:latest\".format(run_token, steps)\n            artifact = api.artifact(ckpt_fn)\n            _ = artifact.download(all_ckpt_dir)\n            ckpt_dir = \"{}/ckpt-{}.pt\".format(all_ckpt_dir, steps)\n            shutil.move(\"{}/ckpt.pt\".format(all_ckpt_dir), ckpt_dir)\n            ckpts_paths.append(ckpt_dir)\n        return ckpts_paths\n    else:\n        assert len(ckpt_steps) == 1\n        step = ckpt_steps[0]\n        ckpt_fn = \"{}-step-{}:latest\".format(run_token, step)\n        artifact = api.artifact(ckpt_fn)\n        _ = artifact.download(all_ckpt_dir)\n        ckpt_dir = \"{}/ckpt-{}.pt\".format(all_ckpt_dir, step)\n        shutil.move(\"{}/ckpt.pt\".format(all_ckpt_dir), ckpt_dir)\n        return ckpt_dir\n"
  },
  {
    "path": "allenact/utils/inference.py",
    "content": "from typing import Optional, cast, Tuple, Any, Dict\n\nimport attr\nimport torch\n\nfrom allenact.algorithms.onpolicy_sync.policy import ActorCriticModel\nfrom allenact.algorithms.onpolicy_sync.storage import RolloutStorage\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams\nfrom allenact.base_abstractions.misc import (\n    Memory,\n    ObservationType,\n    ActorCriticOutput,\n    DistributionType,\n)\nfrom allenact.base_abstractions.preprocessor import SensorPreprocessorGraph\nfrom allenact.utils import spaces_utils as su\nfrom allenact.utils.tensor_utils import batch_observations\n\n\n@attr.s(kw_only=True)\nclass InferenceAgent:\n    actor_critic: ActorCriticModel = attr.ib()\n    rollout_storage: RolloutStorage = attr.ib()\n    device: torch.device = attr.ib()\n    sensor_preprocessor_graph: Optional[SensorPreprocessorGraph] = attr.ib()\n    steps_before_rollout_refresh: int = attr.ib(default=128)\n    memory: Optional[Memory] = attr.ib(default=None)\n    steps_taken_in_task: int = attr.ib(default=0)\n    last_action_flat: Optional = attr.ib(default=None)\n    has_initialized: Optional = attr.ib(default=False)\n\n    def __attrs_post_init__(self):\n        self.actor_critic.eval()\n        self.actor_critic.to(device=self.device)\n        if self.memory is not None:\n            self.memory.to(device=self.device)\n        if self.sensor_preprocessor_graph is not None:\n            self.sensor_preprocessor_graph.to(self.device)\n\n        self.rollout_storage.to(self.device)\n        self.rollout_storage.set_partition(index=0, num_parts=1)\n\n    @classmethod\n    def from_experiment_config(\n        cls,\n        exp_config: ExperimentConfig,\n        device: torch.device,\n        checkpoint_path: Optional[str] = None,\n        model_state_dict: Optional[Dict[str, Any]] = None,\n        mode: str = \"test\",\n    ):\n        assert (\n            checkpoint_path is None or model_state_dict is None\n        ), \"Cannot have `checkpoint_path` and `model_state_dict` both non-None.\"\n        rollout_storage = exp_config.training_pipeline().rollout_storage\n\n        machine_params = exp_config.machine_params(mode)\n        if not isinstance(machine_params, MachineParams):\n            machine_params = MachineParams(**machine_params)\n\n        sensor_preprocessor_graph = machine_params.sensor_preprocessor_graph\n\n        actor_critic = cast(\n            ActorCriticModel,\n            exp_config.create_model(\n                sensor_preprocessor_graph=sensor_preprocessor_graph\n            ),\n        )\n\n        if checkpoint_path is not None:\n            actor_critic.load_state_dict(\n                torch.load(checkpoint_path, map_location=\"cpu\")[\"model_state_dict\"]\n            )\n        elif model_state_dict is not None:\n            actor_critic.load_state_dict(\n                model_state_dict\n                if \"model_state_dict\" not in model_state_dict\n                else model_state_dict[\"model_state_dict\"]\n            )\n\n        return cls(\n            actor_critic=actor_critic,\n            rollout_storage=rollout_storage,\n            device=device,\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n\n    def reset(self):\n        if self.has_initialized:\n            self.rollout_storage.after_updates()\n        self.steps_taken_in_task = 0\n        self.memory = None\n\n    def act(self, observations: ObservationType):\n        # Batch of size 1\n        obs_batch = batch_observations([observations], device=self.device)\n        if self.sensor_preprocessor_graph is not None:\n            obs_batch = self.sensor_preprocessor_graph.get_observations(obs_batch)\n\n        if self.steps_taken_in_task == 0:\n            self.has_initialized = True\n            self.rollout_storage.initialize(\n                observations=obs_batch,\n                num_samplers=1,\n                recurrent_memory_specification=self.actor_critic.recurrent_memory_specification,\n                action_space=self.actor_critic.action_space,\n            )\n            self.rollout_storage.after_updates()\n        else:\n            dummy_val = torch.zeros((1, 1), device=self.device)  # Unused dummy value\n            self.rollout_storage.add(\n                observations=obs_batch,\n                memory=self.memory,\n                actions=self.last_action_flat[0],\n                action_log_probs=dummy_val,\n                value_preds=dummy_val,\n                rewards=dummy_val,\n                masks=torch.ones(\n                    (1, 1), device=self.device\n                ),  # Always == 1 as we're in a single task until `reset`\n            )\n\n        agent_input = self.rollout_storage.agent_input_for_next_step()\n\n        actor_critic_output, self.memory = cast(\n            Tuple[ActorCriticOutput[DistributionType], Optional[Memory]],\n            self.actor_critic(**agent_input),\n        )\n\n        action = actor_critic_output.distributions.sample()\n        self.last_action_flat = su.flatten(self.actor_critic.action_space, action)\n\n        self.steps_taken_in_task += 1\n\n        if self.steps_taken_in_task % self.steps_before_rollout_refresh == 0:\n            self.rollout_storage.after_updates()\n\n        return su.action_list(self.actor_critic.action_space, self.last_action_flat)[0]\n"
  },
  {
    "path": "allenact/utils/misc_utils.py",
    "content": "import copy\nimport functools\nimport hashlib\nimport inspect\nimport json\nimport math\nimport os\nimport pdb\nimport random\nimport subprocess\nimport sys\nimport urllib\nimport urllib.request\nfrom collections import Counter\nfrom contextlib import contextmanager\nfrom typing import Sequence, List, Optional, Tuple, Hashable\n\nimport filelock\nimport numpy as np\nimport torch\nfrom scipy.special import comb\n\nfrom allenact.utils.system import get_logger\n\nTABLEAU10_RGB = (\n    (31, 119, 180),\n    (255, 127, 14),\n    (44, 160, 44),\n    (214, 39, 40),\n    (148, 103, 189),\n    (140, 86, 75),\n    (227, 119, 194),\n    (127, 127, 127),\n    (188, 189, 34),\n    (23, 190, 207),\n)\n\n\ndef multiprocessing_safe_download_file_from_url(url: str, save_path: str):\n    with filelock.FileLock(save_path + \".lock\"):\n        if not os.path.isfile(save_path):\n            get_logger().info(f\"Downloading file from {url} to {save_path}.\")\n            urllib.request.urlretrieve(\n                url,\n                save_path,\n            )\n        else:\n            get_logger().debug(f\"{save_path} exists - skipping download.\")\n\n\ndef experimental_api(to_decorate):\n    \"\"\"Decorate a function to note that it is part of the experimental API.\"\"\"\n\n    have_warned = [False]\n    name = f\"{inspect.getmodule(to_decorate).__name__}.{to_decorate.__qualname__}\"\n    if to_decorate.__name__ == \"__init__\":\n        name = name.replace(\".__init__\", \"\")\n\n    @functools.wraps(to_decorate)\n    def decorated(*args, **kwargs):\n        if not have_warned[0]:\n            get_logger().warning(\n                f\"'{name}' is a part of AllenAct's experimental API.\"\n                f\" This means: (1) there are likely bugs present and (2)\"\n                f\" we may remove/change this functionality without warning.\"\n                f\" USE AT YOUR OWN RISK.\",\n            )\n            have_warned[0] = True\n        return to_decorate(*args, **kwargs)\n\n    return decorated\n\n\ndef deprecated(to_decorate):\n    \"\"\"Decorate a function to note that it has been deprecated.\"\"\"\n\n    have_warned = [False]\n    name = f\"{inspect.getmodule(to_decorate).__name__}.{to_decorate.__qualname__}\"\n    if to_decorate.__name__ == \"__init__\":\n        name = name.replace(\".__init__\", \"\")\n\n    @functools.wraps(to_decorate)\n    def decorated(*args, **kwargs):\n        if not have_warned[0]:\n            get_logger().warning(\n                f\"'{name}' has been deprecated and will soon be removed from AllenAct's API.\"\n                f\" Please discontinue your use of this function.\",\n            )\n            have_warned[0] = True\n        return to_decorate(*args, **kwargs)\n\n    return decorated\n\n\nclass NumpyJSONEncoder(json.JSONEncoder):\n    \"\"\"JSON encoder for numpy objects.\n\n    Based off the stackoverflow answer by Jie Yang here: https://stackoverflow.com/a/57915246.\n    The license for this code is [BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/).\n    \"\"\"\n\n    def default(self, obj):\n        if isinstance(obj, np.void):\n            return None\n        elif isinstance(obj, np.bool_):\n            return bool(obj)\n        elif isinstance(obj, np.integer):\n            return int(obj)\n        elif isinstance(obj, np.floating):\n            return float(obj)\n        elif isinstance(obj, np.ndarray):\n            return obj.tolist()\n        else:\n            return super(NumpyJSONEncoder, self).default(obj)\n\n\n@contextmanager\ndef tensor_print_options(**print_opts):\n    torch_print_opts = copy.deepcopy(torch._tensor_str.PRINT_OPTS)\n    np_print_opts = np.get_printoptions()\n    try:\n        torch.set_printoptions(**print_opts)\n        np.set_printoptions(**print_opts)\n        yield None\n    finally:\n        torch.set_printoptions(**{k: getattr(torch_print_opts, k) for k in print_opts})\n        np.set_printoptions(**np_print_opts)\n\n\ndef md5_hash_str_as_int(to_hash: str):\n    return int(\n        hashlib.md5(to_hash.encode()).hexdigest(),\n        16,\n    )\n\n\ndef get_git_diff_of_project() -> Tuple[str, str]:\n    short_sha = (\n        subprocess.check_output([\"git\", \"describe\", \"--always\"]).decode(\"utf-8\").strip()\n    )\n    diff = subprocess.check_output([\"git\", \"diff\", short_sha]).decode(\"utf-8\")\n    return short_sha, diff\n\n\nclass HashableDict(dict):\n    \"\"\"A dictionary which is hashable so long as all of its values are\n    hashable.\n\n    A HashableDict object will allow setting / deleting of items until\n    the first time that `__hash__()` is called on it after which\n    attempts to set or delete items will throw `RuntimeError`\n    exceptions.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n\n        self._hash_has_been_called = False\n\n    def __key(self):\n        return tuple((k, self[k]) for k in sorted(self))\n\n    def __hash__(self):\n        self._hash_has_been_called = True\n        return hash(self.__key())\n\n    def __eq__(self, other):\n        return self.__key() == other.__key()\n\n    def __setitem__(self, *args, **kwargs):\n        if not self._hash_has_been_called:\n            return super(HashableDict, self).__setitem__(*args, **kwargs)\n        raise RuntimeError(\"Cannot set item in HashableDict after having called hash.\")\n\n    def __delitem__(self, *args, **kwargs):\n        if not self._hash_has_been_called:\n            return super(HashableDict, self).__delitem__(*args, **kwargs)\n        raise RuntimeError(\n            \"Cannot delete item in HashableDict after having called hash.\"\n        )\n\n\ndef partition_sequence(seq: Sequence, parts: int) -> List:\n    assert 0 < parts, f\"parts [{parts}] must be greater > 0\"\n    assert parts <= len(seq), f\"parts [{parts}] > len(seq) [{len(seq)}]\"\n    n = len(seq)\n\n    quotient = n // parts\n    remainder = n % parts\n    counts = [quotient + (i < remainder) for i in range(parts)]\n    inds = np.cumsum([0] + counts)\n    return [seq[ind0:ind1] for ind0, ind1 in zip(inds[:-1], inds[1:])]\n\n\ndef unzip(seq: Sequence[Tuple], n: Optional[int]):\n    \"\"\"Undoes a `zip` operation.\n\n    # Parameters\n\n    seq: The sequence of tuples that should be unzipped\n    n: The number of items in each tuple. This is an optional value but is necessary if\n       `len(seq) == 0` (as there is no other way to infer how many empty lists were zipped together\n        in this case) and can otherwise be used to error check.\n\n    # Returns\n\n    A tuple (of length `n` if `n` is given) of lists where the ith list contains all\n    the ith elements from the tuples in the input `seq`.\n    \"\"\"\n    assert n is not None or len(seq) != 0\n    if n is None:\n        n = len(seq[0])\n    lists = [[] for _ in range(n)]\n\n    for t in seq:\n        assert len(t) == n\n        for i in range(n):\n            lists[i].append(t[i])\n    return lists\n\n\ndef uninterleave(seq: Sequence, parts: int) -> List:\n    assert 0 < parts <= len(seq)\n    n = len(seq)\n\n    quotient = n // parts\n\n    return [\n        [seq[i + j * parts] for j in range(quotient + 1) if i + j * parts < len(seq)]\n        for i in range(parts)\n    ]\n\n\n@functools.lru_cache(10000)\ndef cached_comb(n: int, m: int):\n    return comb(n, m)\n\n\ndef expected_max_of_subset_statistic(vals: List[float], m: int):\n    n = len(vals)\n    assert m <= n\n\n    vals_and_counts = list(Counter([round(val, 8) for val in vals]).items())\n    vals_and_counts.sort()\n\n    count_so_far = 0\n    logdenom = math.log(comb(n, m))\n\n    expected_max = 0.0\n    for val, num_occurances_of_val in vals_and_counts:\n        count_so_far += num_occurances_of_val\n        if count_so_far < m:\n            continue\n\n        count_where_max = 0\n        for i in range(1, min(num_occurances_of_val, m) + 1):\n            count_where_max += cached_comb(num_occurances_of_val, i) * cached_comb(\n                count_so_far - num_occurances_of_val, m - i\n            )\n\n        expected_max += val * math.exp(math.log(count_where_max) - logdenom)\n\n    return expected_max\n\n\ndef bootstrap_max_of_subset_statistic(\n    vals: List[float], m: int, reps=1000, seed: Optional[int] = None\n):\n    rstate = None\n    if seed is not None:\n        rstate = random.getstate()\n        random.seed(seed)\n    results = []\n    for _ in range(reps):\n        results.append(\n            expected_max_of_subset_statistic(random.choices(vals, k=len(vals)), m)\n        )\n\n    if seed is not None:\n        random.setstate(rstate)\n    return results\n\n\ndef rand_float(low: float, high: float, shape):\n    assert low <= high\n    try:\n        return np.random.rand(*shape) * (high - low) + low\n    except TypeError as _:\n        return np.random.rand(shape) * (high - low) + low\n\n\ndef all_unique(seq: Sequence[Hashable]):\n    seen = set()\n    for s in seq:\n        if s in seen:\n            return False\n        seen.add(s)\n    return True\n\n\ndef all_equal(s: Sequence):\n    if len(s) <= 1:\n        return True\n    return all(s[0] == ss for ss in s[1:])\n\n\ndef prepare_locals_for_super(\n    local_vars, args_name=\"args\", kwargs_name=\"kwargs\", ignore_kwargs=False\n):\n    assert (\n        args_name not in local_vars\n    ), \"`prepare_locals_for_super` does not support {}.\".format(args_name)\n    new_locals = {k: v for k, v in local_vars.items() if k != \"self\" and \"__\" not in k}\n    if kwargs_name in new_locals:\n        if ignore_kwargs:\n            new_locals.pop(kwargs_name)\n        else:\n            kwargs = new_locals.pop(kwargs_name)\n            kwargs.update(new_locals)\n            new_locals = kwargs\n    return new_locals\n\n\ndef partition_limits(num_items: int, num_parts: int):\n    return (\n        np.round(np.linspace(0, num_items, num_parts + 1, endpoint=True))\n        .astype(np.int32)\n        .tolist()\n    )\n\n\ndef str2bool(v: str):\n    v = v.lower().strip()\n    if v in (\"yes\", \"true\", \"t\", \"y\", \"1\"):\n        return True\n    elif v in (\"no\", \"false\", \"f\", \"n\", \"0\"):\n        return False\n    else:\n        raise ValueError(f\"{v} cannot be converted to a bool\")\n\n\nclass ForkedPdb(pdb.Pdb):\n    \"\"\"A Pdb subclass that may be used from a forked multiprocessing child.\"\"\"\n\n    def interaction(self, *args, **kwargs):\n        _stdin = sys.stdin\n        try:\n            sys.stdin = open(\"/dev/stdin\")\n            pdb.Pdb.interaction(self, *args, **kwargs)\n        finally:\n            sys.stdin = _stdin\n"
  },
  {
    "path": "allenact/utils/model_utils.py",
    "content": "\"\"\"Functions used to initialize and manipulate pytorch models.\"\"\"\n\nimport hashlib\nfrom typing import Sequence, Tuple, Union, Optional, Dict, Any, Callable\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\nfrom allenact.utils.misc_utils import md5_hash_str_as_int\n\n\ndef md5_hash_of_state_dict(state_dict: Dict[str, Any]):\n    hashables = []\n    for piece in sorted(state_dict.items()):\n        if isinstance(piece[1], (np.ndarray, torch.Tensor, nn.Parameter)):\n            hashables.append(piece[0])\n            if not isinstance(piece[1], np.ndarray):\n                p1 = piece[1].data.cpu().numpy()\n            else:\n                p1 = piece[1]\n            hashables.append(\n                int(\n                    hashlib.md5(p1.tobytes()).hexdigest(),\n                    16,\n                )\n            )\n        else:\n            hashables.append(md5_hash_str_as_int(str(piece)))\n\n    return md5_hash_str_as_int(str(hashables))\n\n\nclass Flatten(nn.Module):\n    \"\"\"Flatten input tensor so that it is of shape (FLATTENED_BATCH x -1).\"\"\"\n\n    # noinspection PyMethodMayBeStatic\n    def forward(self, x):\n        \"\"\"Flatten input tensor.\n\n        # Parameters\n        x : Tensor of size (FLATTENED_BATCH x ...) to flatten to size (FLATTENED_BATCH x -1)\n        # Returns\n        Flattened tensor.\n        \"\"\"\n        return x.reshape(x.size(0), -1)\n\n\ndef init_linear_layer(\n    module: nn.Linear, weight_init: Callable, bias_init: Callable, gain=1\n):\n    \"\"\"Initialize a torch.nn.Linear layer.\n\n    # Parameters\n\n    module : A torch linear layer.\n    weight_init : Function used to initialize the weight parameters of the linear layer. Should take the weight data\n        tensor and gain as input.\n    bias_init : Function used to initialize the bias parameters of the linear layer. Should take the bias data\n        tensor and gain as input.\n    gain : The gain to apply.\n\n    # Returns\n\n    The initialized linear layer.\n    \"\"\"\n    weight_init(module.weight.data, gain=gain)\n    bias_init(module.bias.data)\n    return module\n\n\ndef grad_norm(parameters, norm_type=2):\n    if isinstance(parameters, torch.Tensor):\n        parameters = [parameters]\n    parameters = list(filter(lambda p: p.grad is not None, parameters))\n    norm_type = float(norm_type)\n    if norm_type == \"inf\":\n        total_norm = max(p.grad.data.abs().max() for p in parameters)\n    else:\n        total_norm = 0\n        for p in parameters:\n            param_norm = p.grad.data.norm(norm_type)\n            total_norm += param_norm.item() ** norm_type\n        total_norm = total_norm ** (1.0 / norm_type)\n    return total_norm\n\n\ndef make_cnn(\n    input_channels: int,\n    layer_channels: Sequence[int],\n    kernel_sizes: Sequence[Union[int, Tuple[int, int]]],\n    strides: Sequence[Union[int, Tuple[int, int]]],\n    paddings: Sequence[Union[int, Tuple[int, int]]],\n    dilations: Sequence[Union[int, Tuple[int, int]]],\n    output_height: int,\n    output_width: int,\n    output_channels: int,\n    flatten: bool = True,\n    output_relu: bool = True,\n) -> nn.Module:\n    assert (\n        len(layer_channels)\n        == len(kernel_sizes)\n        == len(strides)\n        == len(paddings)\n        == len(dilations)\n    ), \"Mismatched sizes: layers {} kernels {} strides {} paddings {} dilations {}\".format(\n        layer_channels, kernel_sizes, strides, paddings, dilations\n    )\n\n    net = nn.Sequential()\n\n    input_channels_list = [input_channels] + list(layer_channels)\n\n    for it, current_channels in enumerate(layer_channels):\n        net.add_module(\n            \"conv_{}\".format(it),\n            nn.Conv2d(\n                in_channels=input_channels_list[it],\n                out_channels=current_channels,\n                kernel_size=kernel_sizes[it],\n                stride=strides[it],\n                padding=paddings[it],\n                dilation=dilations[it],\n            ),\n        )\n        if it < len(layer_channels) - 1:\n            net.add_module(\"relu_{}\".format(it), nn.ReLU(inplace=True))\n\n    if flatten:\n        net.add_module(\"flatten\", Flatten())\n        net.add_module(\n            \"fc\",\n            nn.Linear(\n                layer_channels[-1] * output_width * output_height, output_channels\n            ),\n        )\n    if output_relu:\n        net.add_module(\"out_relu\", nn.ReLU(True))\n\n    return net\n\n\ndef compute_cnn_output(\n    cnn: nn.Module,\n    cnn_input: torch.Tensor,\n    permute_order: Optional[Tuple[int, ...]] = (\n        0,  # FLAT_BATCH (flattening steps, samplers and agents)\n        3,  # CHANNEL\n        1,  # ROW\n        2,  # COL\n    ),  # from [FLAT_BATCH x ROW x COL x CHANNEL] flattened input\n):\n    \"\"\"Computes CNN outputs for given inputs.\n\n    # Parameters\n\n    cnn : A torch CNN.\n    cnn_input: A torch Tensor with inputs.\n    permute_order: A permutation Tuple to provide PyTorch dimension order, default (0, 3, 1, 2), where 0 corresponds to\n                   the flattened batch dimensions (combining step, sampler and agent)\n\n    # Returns\n\n    CNN output with dimensions [STEP, SAMPLER, AGENT, CHANNEL, (HEIGHT, WIDTH)].\n    \"\"\"\n    nsteps: int\n    nsamplers: int\n    nagents: int\n\n    assert len(cnn_input.shape) in [\n        5,\n        6,\n    ], \"CNN input must have shape [STEP, SAMPLER, (AGENT,) dim1, dim2, dim3]\"\n\n    nagents: Optional[int] = None\n    if len(cnn_input.shape) == 6:\n        nsteps, nsamplers, nagents = cnn_input.shape[:3]\n    else:\n        nsteps, nsamplers = cnn_input.shape[:2]\n\n    # Make FLAT_BATCH = nsteps * nsamplers (* nagents)\n    cnn_input = cnn_input.view((-1,) + cnn_input.shape[2 + int(nagents is not None) :])\n\n    if permute_order is not None:\n        cnn_input = cnn_input.permute(*permute_order)\n    cnn_output = cnn(cnn_input)\n\n    if nagents is not None:\n        cnn_output = cnn_output.reshape(\n            (\n                nsteps,\n                nsamplers,\n                nagents,\n            )\n            + cnn_output.shape[1:]\n        )\n    else:\n        cnn_output = cnn_output.reshape(\n            (\n                nsteps,\n                nsamplers,\n            )\n            + cnn_output.shape[1:]\n        )\n\n    return cnn_output\n\n\ndef simple_conv_and_linear_weights_init(m):\n    if type(m) in [\n        nn.Conv1d,\n        nn.Conv2d,\n        nn.Conv3d,\n        nn.ConvTranspose1d,\n        nn.ConvTranspose2d,\n        nn.ConvTranspose3d,\n    ]:\n        weight_shape = list(m.weight.data.size())\n        fan_in = np.prod(weight_shape[1:4])\n        fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]\n        w_bound = np.sqrt(6.0 / (fan_in + fan_out))\n        m.weight.data.uniform_(-w_bound, w_bound)\n        if m.bias is not None:\n            m.bias.data.fill_(0)\n    elif type(m) == nn.Linear:\n        simple_linear_weights_init(m)\n\n\ndef simple_linear_weights_init(m):\n    if type(m) == nn.Linear:\n        weight_shape = list(m.weight.data.size())\n        fan_in = weight_shape[1]\n        fan_out = weight_shape[0]\n        w_bound = np.sqrt(6.0 / (fan_in + fan_out))\n        m.weight.data.uniform_(-w_bound, w_bound)\n        if m.bias is not None:\n            m.bias.data.fill_(0)\n\n\nclass FeatureEmbedding(nn.Module):\n    \"\"\"A wrapper of nn.Embedding but support zero output Used for extracting\n    features for actions/rewards.\"\"\"\n\n    def __init__(self, input_size, output_size):\n        super().__init__()\n        self.input_size = input_size\n        self.output_size = output_size\n        if self.output_size != 0:\n            self.fc = nn.Embedding(input_size, output_size)\n        else:  # automatically be moved to a device\n            self.null_embedding: torch.Tensor\n            self.register_buffer(\n                \"null_embedding\",\n                torch.zeros(\n                    0,\n                ),\n                persistent=False,\n            )\n\n    def forward(self, inputs):\n        if self.output_size != 0:\n            return self.fc(inputs)\n        else:\n            return self.null_embedding\n"
  },
  {
    "path": "allenact/utils/multi_agent_viz_utils.py",
    "content": "from typing import Sequence, Any\n\nimport numpy as np\nfrom matplotlib import pyplot as plt, markers\nfrom matplotlib.collections import LineCollection\n\nfrom allenact.utils.viz_utils import TrajectoryViz\n\n\nclass MultiTrajectoryViz(TrajectoryViz):\n    def __init__(\n        self,\n        path_to_trajectory_prefix: Sequence[str] = (\"task_info\", \"followed_path\"),\n        agent_suffixes: Sequence[str] = (\"1\", \"2\"),\n        label: str = \"trajectories\",\n        trajectory_plt_colormaps: Sequence[str] = (\"cool\", \"spring\"),\n        marker_plt_colors: Sequence[Any] = (\"blue\", \"orange\"),\n        axes_equal: bool = True,\n        **other_base_kwargs,\n    ):\n        super().__init__(label=label, **other_base_kwargs)\n\n        self.path_to_trajectory_prefix = list(path_to_trajectory_prefix)\n        self.agent_suffixes = list(agent_suffixes)\n        self.trajectory_plt_colormaps = list(trajectory_plt_colormaps)\n        self.marker_plt_colors = marker_plt_colors\n        self.axes_equal = axes_equal\n\n    def make_fig(self, episode, episode_id):\n        # From https://nbviewer.jupyter.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb\n        def colorline(\n            x,\n            y,\n            z=None,\n            cmap=plt.get_cmap(\"cool\"),\n            norm=plt.Normalize(0.0, 1.0),\n            linewidth=2,\n            alpha=1.0,\n            zorder=1,\n        ):\n            \"\"\"Plot a colored line with coordinates x and y.\n\n            Optionally specify colors in the array z\n\n            Optionally specify a colormap, a norm function and a line width.\n            \"\"\"\n\n            def make_segments(x, y):\n                \"\"\"Create list of line segments from x and y coordinates, in\n                the correct format for LineCollection:\n\n                an array of the form  numlines x (points per line) x 2\n                (x and y) array\n                \"\"\"\n                points = np.array([x, y]).T.reshape(-1, 1, 2)\n                segments = np.concatenate([points[:-1], points[1:]], axis=1)\n                return segments\n\n            # Default colors equally spaced on [0,1]:\n            if z is None:\n                z = np.linspace(0.0, 1.0, len(x))\n\n            # Special case if a single number:\n            if not hasattr(\n                z, \"__iter__\"\n            ):  # to check for numerical input -- this is a hack\n                z = np.array([z])\n\n            z = np.asarray(z)\n\n            segments = make_segments(x, y)\n\n            lc = LineCollection(\n                segments,\n                array=z,\n                cmap=cmap,\n                norm=norm,\n                linewidth=linewidth,\n                alpha=alpha,\n                zorder=zorder,\n            )\n\n            ax = plt.gca()\n            ax.add_collection(lc)\n\n            return lc\n\n        fig, ax = plt.subplots(figsize=self.figsize)\n        for agent, cmap, marker_color in zip(\n            self.agent_suffixes, self.trajectory_plt_colormaps, self.marker_plt_colors\n        ):\n            path = self.path_to_trajectory_prefix[:]\n            path[-1] = path[-1] + agent\n            trajectory = self._access(episode, path)\n\n            x, y = [], []\n            for xy in trajectory:\n                x.append(float(self._access(xy, self.x)))\n                y.append(float(self._access(xy, self.y)))\n\n            colorline(x, y, zorder=1, cmap=cmap)\n\n            start_marker = markers.MarkerStyle(marker=self.start_marker_shape)\n            if self.path_to_rot_degrees is not None:\n                rot_degrees = float(\n                    self._access(trajectory[0], self.path_to_rot_degrees)\n                )\n                if self.adapt_rotation is not None:\n                    rot_degrees = self.adapt_rotation(rot_degrees)\n                start_marker._transform = start_marker.get_transform().rotate_deg(\n                    rot_degrees\n                )\n\n            ax.scatter(\n                [x[0]],\n                [y[0]],\n                marker=start_marker,\n                zorder=2,\n                s=self.start_marker_scale,\n                color=marker_color,\n            )\n            ax.scatter(\n                [x[-1]], [y[-1]], marker=\"s\", color=marker_color\n            )  # stop (square)\n\n        if self.axes_equal:\n            ax.set_aspect(\"equal\", \"box\")\n        ax.set_title(episode_id, fontsize=self.fontsize)\n        ax.tick_params(axis=\"x\", labelsize=self.fontsize)\n        ax.tick_params(axis=\"y\", labelsize=self.fontsize)\n\n        return fig\n"
  },
  {
    "path": "allenact/utils/spaces_utils.py",
    "content": "# Original work Copyright (c) 2016 OpenAI (https://openai.com).\n# Modified work Copyright (c) Allen Institute for AI\n# This source code is licensed under the MIT license found in the\n# LICENSE file in the root directory of this source tree.\n\nfrom typing import Union, Tuple, List, cast, Iterable, Callable\nfrom collections import OrderedDict\n\nimport numpy as np\nimport torch\nfrom gym import spaces as gym\n\nActionType = Union[torch.Tensor, OrderedDict, Tuple, int]\n\n\ndef flatdim(space):\n    \"\"\"Return the number of dimensions a flattened equivalent of this space\n    would have.\n\n    Accepts a space and returns an integer. Raises\n    ``NotImplementedError`` if the space is not defined in\n    ``gym.spaces``.\n    \"\"\"\n    if isinstance(space, gym.Box):\n        return int(np.prod(space.shape))\n    elif isinstance(space, gym.Discrete):\n        return 1  # we do not expand to one-hot\n    elif isinstance(space, gym.Tuple):\n        return int(sum([flatdim(s) for s in space.spaces]))\n    elif isinstance(space, gym.Dict):\n        return int(sum([flatdim(s) for s in space.spaces.values()]))\n    elif isinstance(space, gym.MultiBinary):\n        return int(space.n)\n    elif isinstance(space, gym.MultiDiscrete):\n        return int(np.prod(space.shape))\n    else:\n        raise NotImplementedError\n\n\ndef flatten(space, torch_x):\n    \"\"\"Flatten data points from a space.\"\"\"\n    if isinstance(space, gym.Box):\n        if len(space.shape) > 0:\n            return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,))\n        else:\n            return torch_x.view(torch_x.shape + (-1,))\n    elif isinstance(space, gym.Discrete):\n        # Assume tensor input does NOT contain a dimension for action\n        if isinstance(torch_x, torch.Tensor):\n            return torch_x.unsqueeze(-1)\n        else:\n            return torch.tensor(torch_x).view(1)\n    elif isinstance(space, gym.Tuple):\n        return torch.cat(\n            [flatten(s, x_part) for x_part, s in zip(torch_x, space.spaces)], dim=-1\n        )\n    elif isinstance(space, gym.Dict):\n        return torch.cat(\n            [flatten(s, torch_x[key]) for key, s in space.spaces.items()], dim=-1\n        )\n    elif isinstance(space, gym.MultiBinary):\n        return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,))\n    elif isinstance(space, gym.MultiDiscrete):\n        return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,))\n    else:\n        raise NotImplementedError\n\n\ndef unflatten(space, torch_x):\n    \"\"\"Unflatten a concatenated data points tensor from a space.\"\"\"\n    if isinstance(space, gym.Box):\n        return torch_x.view(torch_x.shape[:-1] + space.shape).float()\n    elif isinstance(space, gym.Discrete):\n        res = torch_x.view(torch_x.shape[:-1] + space.shape).long()\n        return res if len(res.shape) > 0 else res.item()\n    elif isinstance(space, gym.Tuple):\n        dims = [flatdim(s) for s in space.spaces]\n        list_flattened = torch.split(torch_x, dims, dim=-1)\n        list_unflattened = [\n            unflatten(s, flattened)\n            for flattened, s in zip(list_flattened, space.spaces)\n        ]\n        return tuple(list_unflattened)\n    elif isinstance(space, gym.Dict):\n        dims = [flatdim(s) for s in space.spaces.values()]\n        list_flattened = torch.split(torch_x, dims, dim=-1)\n        list_unflattened = [\n            (key, unflatten(s, flattened))\n            for flattened, (key, s) in zip(list_flattened, space.spaces.items())\n        ]\n        return OrderedDict(list_unflattened)\n    elif isinstance(space, gym.MultiBinary):\n        return torch_x.view(torch_x.shape[:-1] + space.shape).byte()\n    elif isinstance(space, gym.MultiDiscrete):\n        return torch_x.view(torch_x.shape[:-1] + space.shape).long()\n    else:\n        raise NotImplementedError\n\n\ndef torch_point(space, np_x):\n    \"\"\"Convert numpy space point into torch.\"\"\"\n    if isinstance(space, gym.Box):\n        return torch.from_numpy(np_x)\n    elif isinstance(space, gym.Discrete):\n        return np_x\n    elif isinstance(space, gym.Tuple):\n        return tuple([torch_point(s, x_part) for x_part, s in zip(np_x, space.spaces)])\n    elif isinstance(space, gym.Dict):\n        return OrderedDict(\n            [(key, torch_point(s, np_x[key])) for key, s in space.spaces.items()]\n        )\n    elif isinstance(space, gym.MultiBinary):\n        return torch.from_numpy(np_x)\n    elif isinstance(space, gym.MultiDiscrete):\n        return torch.from_numpy(np.asarray(np_x))\n    else:\n        raise NotImplementedError\n\n\ndef numpy_point(\n    space: gym.Space, torch_x: Union[int, torch.Tensor, OrderedDict, Tuple]\n):\n    \"\"\"Convert torch space point into numpy.\"\"\"\n    if isinstance(space, gym.Box):\n        return cast(torch.Tensor, torch_x).cpu().numpy()\n    elif isinstance(space, gym.Discrete):\n        return torch_x\n    elif isinstance(space, gym.Tuple):\n        return tuple(\n            [\n                numpy_point(s, x_part)\n                for x_part, s in zip(cast(Iterable, torch_x), space.spaces)\n            ]\n        )\n    elif isinstance(space, gym.Dict):\n        return OrderedDict(\n            [\n                (key, numpy_point(s, cast(torch.Tensor, torch_x)[key]))\n                for key, s in space.spaces.items()\n            ]\n        )\n    elif isinstance(space, gym.MultiBinary):\n        return cast(torch.Tensor, torch_x).cpu().numpy()\n    elif isinstance(space, gym.MultiDiscrete):\n        return cast(torch.Tensor, torch_x).cpu().numpy()\n    else:\n        raise NotImplementedError\n\n\ndef flatten_space(space: gym.Space):\n    if isinstance(space, gym.Box):\n        return gym.Box(space.low.flatten(), space.high.flatten())\n    if isinstance(space, gym.Discrete):\n        return gym.Box(low=0, high=space.n, shape=(1,))\n    if isinstance(space, gym.Tuple):\n        space = [flatten_space(s) for s in space.spaces]\n        return gym.Box(\n            low=np.concatenate([s.low for s in space]),\n            high=np.concatenate([s.high for s in space]),\n        )\n    if isinstance(space, gym.Dict):\n        space = [flatten_space(s) for s in space.spaces.values()]\n        return gym.Box(\n            low=np.concatenate([s.low for s in space]),\n            high=np.concatenate([s.high for s in space]),\n        )\n    if isinstance(space, gym.MultiBinary):\n        return gym.Box(low=0, high=1, shape=(space.n,))\n    if isinstance(space, gym.MultiDiscrete):\n        return gym.Box(\n            low=np.zeros_like(space.nvec),\n            high=space.nvec,\n        )\n    raise NotImplementedError\n\n\ndef policy_space(\n    action_space: gym.Space,\n    box_space_to_policy: Callable[[gym.Box], gym.Space] = None,\n) -> gym.Space:\n    if isinstance(action_space, gym.Box):\n        if box_space_to_policy is None:\n            # policy = mean (default)\n            return action_space\n        else:\n            return box_space_to_policy(action_space)\n    if isinstance(action_space, gym.Discrete):\n        # policy = prob of each option\n        return gym.Box(\n            low=np.float32(0.0), high=np.float32(1.0), shape=(action_space.n,)\n        )\n    if isinstance(action_space, gym.Tuple):\n        # policy = tuple of sub-policies\n        spaces = [policy_space(s, box_space_to_policy) for s in action_space.spaces]\n        return gym.Tuple(spaces)\n    if isinstance(action_space, gym.Dict):\n        # policy = dict of sub-policies\n        spaces = [\n            (\n                name,\n                policy_space(s, box_space_to_policy),\n            )\n            for name, s in action_space.spaces.items()\n        ]\n        return gym.Dict(spaces)\n    if isinstance(action_space, gym.MultiBinary):\n        # policy = prob of 0, 1 in each entry\n        return gym.Box(\n            low=np.float32(0.0), high=np.float32(1.0), shape=(action_space.n, 2)\n        )\n    if isinstance(action_space, gym.MultiDiscrete):\n        # policy = Tuple of prob of each option for each discrete\n        return gym.Tuple(\n            [\n                gym.Box(low=np.float32(0.0), high=np.float32(1.0), shape=(n,))\n                for n in action_space.nvec\n            ]\n        )\n    raise NotImplementedError\n\n\ndef action_list(\n    action_space: gym.Space, flat_actions: torch.Tensor\n) -> List[ActionType]:\n    \"\"\"Convert flattened actions to list.\n\n    Assumes `flat_actions` are of shape `[step, sampler, flatdim]`.\n    \"\"\"\n\n    def tolist(action):\n        if isinstance(action, torch.Tensor):\n            return action.tolist()\n        if isinstance(action, Tuple):\n            actions = [tolist(ac) for ac in action]\n            return tuple(actions)\n        if isinstance(action, OrderedDict):\n            actions = [(key, tolist(action[key])) for key in action.keys()]\n            return OrderedDict(actions)\n        # else, it's a scalar\n        return action\n\n    return [tolist(unflatten(action_space, ac)) for ac in flat_actions[0]]\n"
  },
  {
    "path": "allenact/utils/system.py",
    "content": "import io\nimport logging\nimport os\nimport socket\nimport sys\nfrom contextlib import closing\nfrom typing import cast, Optional, Tuple\n\nfrom torch import multiprocessing as mp\n\nfrom allenact._constants import ALLENACT_INSTALL_DIR\n\nHUMAN_LOG_LEVELS: Tuple[str, ...] = (\"debug\", \"info\", \"warning\", \"error\", \"none\")\n\"\"\"\nAvailable log levels: \"debug\", \"info\", \"warning\", \"error\", \"none\"\n\"\"\"\n\n_LOGGER: Optional[logging.Logger] = None\n\n\nclass ColoredFormatter(logging.Formatter):\n    \"\"\"Format a log string with colors.\n\n    This implementation taken (with modifications) from\n    https://stackoverflow.com/a/384125.\n    \"\"\"\n\n    BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)\n\n    RESET_SEQ = \"\\033[0m\"\n    COLOR_SEQ = \"\\033[1;%dm\"\n    BOLD_SEQ = \"\\033[1m\"\n\n    COLORS = {\n        \"WARNING\": YELLOW,\n        \"INFO\": GREEN,\n        \"DEBUG\": BLUE,\n        \"ERROR\": RED,\n        \"CRITICAL\": MAGENTA,\n    }\n\n    def __init__(self, fmt: str, datefmt: Optional[str] = None, use_color=True):\n        super().__init__(fmt=fmt, datefmt=datefmt)\n        self.use_color = use_color\n\n    def format(self, record: logging.LogRecord) -> str:\n        levelname = record.levelname\n        if self.use_color and levelname in self.COLORS:\n            levelname_with_color = (\n                self.COLOR_SEQ % (30 + self.COLORS[levelname])\n                + levelname\n                + self.RESET_SEQ\n            )\n            record.levelname = levelname_with_color\n            formated_record = logging.Formatter.format(self, record)\n            record.levelname = (\n                levelname  # Resetting levelname as `record` might be used elsewhere\n            )\n            return formated_record\n        else:\n            return logging.Formatter.format(self, record)\n\n\ndef get_logger() -> logging.Logger:\n    \"\"\"Get a `logging.Logger` to stderr. It can be called whenever we wish to\n    log some message. Messages can get mixed-up\n    (https://docs.python.org/3.6/library/multiprocessing.html#logging), but it\n    works well in most cases.\n\n    # Returns\n\n    logger: the `logging.Logger` object\n    \"\"\"\n    if _new_logger():\n        if mp.current_process().name == \"MainProcess\":\n            _new_logger(logging.DEBUG)\n        _set_log_formatter()\n    return _LOGGER\n\n\ndef _human_log_level_to_int(human_log_level):\n\n    human_log_level = human_log_level.lower().strip()\n    assert human_log_level in HUMAN_LOG_LEVELS, \"unknown human_log_level {}\".format(\n        human_log_level\n    )\n\n    if human_log_level == \"debug\":\n        log_level = logging.DEBUG\n    elif human_log_level == \"info\":\n        log_level = logging.INFO\n    elif human_log_level == \"warning\":\n        log_level = logging.WARNING\n    elif human_log_level == \"error\":\n        log_level = logging.ERROR\n    elif human_log_level == \"none\":\n        log_level = logging.CRITICAL + 1\n    else:\n        raise NotImplementedError(f\"Unknown log level {human_log_level}.\")\n    return log_level\n\n\ndef init_logging(human_log_level: str = \"info\") -> None:\n    \"\"\"Init the `logging.Logger`.\n\n    It should be called only once in the app (e.g. in `main`). It sets\n    the log_level to one of `HUMAN_LOG_LEVELS`. And sets up a handler\n    for stderr. The logging level is propagated to all subprocesses.\n    \"\"\"\n    _new_logger(_human_log_level_to_int(human_log_level))\n    _set_log_formatter()\n\n\ndef update_log_level(logger, human_log_level: str):\n    logger.setLevel(_human_log_level_to_int(human_log_level))\n\n\ndef find_free_port(address: str = \"127.0.0.1\") -> int:\n    \"\"\"Finds a free port for distributed training.\n\n    # Returns\n\n    port: port number that can be used to listen\n    \"\"\"\n    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:\n        s.bind((address, 0))\n        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)\n        port = s.getsockname()[1]\n    return port\n\n\ndef _new_logger(log_level: Optional[int] = None):\n    global _LOGGER\n    if _LOGGER is None:\n        _LOGGER = mp.get_logger()\n        if log_level is not None:\n            get_logger().setLevel(log_level)\n        return True\n    if log_level is not None:\n        get_logger().setLevel(log_level)\n    return False\n\n\ndef _set_log_formatter():\n    assert _LOGGER is not None\n\n    if _LOGGER.getEffectiveLevel() <= logging.CRITICAL:\n        add_style_to_logs = True  # In case someone wants to turn this off manually.\n\n        if add_style_to_logs:\n            default_format = \"$BOLD[%(asctime)s$RESET %(levelname)s$BOLD:]$RESET %(message)s\\t[%(filename)s: %(lineno)d]\"\n            default_format = default_format.replace(\n                \"$BOLD\", ColoredFormatter.BOLD_SEQ\n            ).replace(\"$RESET\", ColoredFormatter.RESET_SEQ)\n        else:\n            default_format = (\n                \"%(asctime)s %(levelname)s: %(message)s\\t[%(filename)s: %(lineno)d]\"\n            )\n        short_date_format = \"%m/%d %H:%M:%S\"\n        log_format = \"default\"\n\n        if log_format == \"default\":\n            fmt = default_format\n            datefmt = short_date_format\n        elif log_format == \"defaultMilliseconds\":\n            fmt = default_format\n            datefmt = None\n        else:\n            fmt = log_format\n            datefmt = short_date_format\n\n        if add_style_to_logs:\n            formatter = ColoredFormatter(\n                fmt=fmt,\n                datefmt=datefmt,\n            )\n        else:\n            formatter = logging.Formatter(fmt=fmt, datefmt=datefmt)\n\n        ch = logging.StreamHandler()\n        ch.setFormatter(formatter)\n        ch.addFilter(cast(logging.Filter, _AllenActMessageFilter(os.getcwd())))\n        _LOGGER.addHandler(ch)\n\n        sys.excepthook = _excepthook\n        sys.stdout = cast(io.TextIOWrapper, _StreamToLogger())\n\n    return _LOGGER\n\n\nclass _StreamToLogger:\n    def __init__(self):\n        self.linebuf = \"\"\n\n    def write(self, buf):\n        temp_linebuf = self.linebuf + buf\n        self.linebuf = \"\"\n        for line in temp_linebuf.splitlines(True):\n            if line[-1] == \"\\n\":\n                cast(logging.Logger, _LOGGER).info(line.rstrip())\n            else:\n                self.linebuf += line\n\n    def flush(self):\n        if self.linebuf != \"\":\n            cast(logging.Logger, _LOGGER).info(self.linebuf.rstrip())\n        self.linebuf = \"\"\n\n\ndef _excepthook(*args):\n    # noinspection PyTypeChecker\n    get_logger().error(msg=\"Uncaught exception:\", exc_info=args)\n\n\nclass _AllenActMessageFilter:\n    def __init__(self, working_directory: str):\n        self.working_directory = working_directory\n\n    # noinspection PyMethodMayBeStatic\n    def filter(self, record):\n        # TODO: Does this work when pip-installing AllenAct?\n        return int(\n            self.working_directory in record.pathname\n            or ALLENACT_INSTALL_DIR in record.pathname\n            or \"main\" in record.pathname\n        )\n\n\nclass ImportChecker:\n    def __init__(self, msg=None):\n        self.msg = msg\n\n    def __enter__(self):\n        pass\n\n    def __exit__(self, exc_type, value, traceback):\n        if exc_type == ModuleNotFoundError and self.msg is not None:\n            value.msg += self.msg\n        return exc_type is None\n"
  },
  {
    "path": "allenact/utils/tensor_utils.py",
    "content": "\"\"\"Functions used to manipulate pytorch tensors and numpy arrays.\"\"\"\n\nimport numbers\nimport os\nimport tempfile\nfrom collections import defaultdict\nfrom typing import List, Dict, Optional, DefaultDict, Union, Any, cast\n\nimport PIL\nimport numpy as np\nimport torch\nfrom PIL import Image\nfrom moviepy import editor as mpy\nfrom moviepy.editor import concatenate_videoclips\nfrom tensorboardX import SummaryWriter as TBXSummaryWriter, summary as tbxsummary\nfrom tensorboardX.proto.summary_pb2 import Summary as TBXSummary\n\n# noinspection PyProtectedMember\nfrom tensorboardX.utils import _prepare_video as tbx_prepare_video\nfrom tensorboardX.x2num import make_np as tbxmake_np\n\nfrom allenact.utils.system import get_logger\n\n\ndef to_device_recursively(\n    input: Any, device: Union[str, torch.device, int], inplace: bool = True\n):\n    \"\"\"Recursively places tensors on the appropriate device.\"\"\"\n    if input is None:\n        return input\n    elif isinstance(input, torch.Tensor):\n        return input.to(device)  # type: ignore\n    elif isinstance(input, tuple):\n        return tuple(\n            to_device_recursively(input=subinput, device=device, inplace=inplace)\n            for subinput in input\n        )\n    elif isinstance(input, list):\n        if inplace:\n            for i in range(len(input)):\n                input[i] = to_device_recursively(\n                    input=input[i], device=device, inplace=inplace\n                )\n            return input\n        else:\n            return [\n                to_device_recursively(input=subpart, device=device, inplace=inplace)\n                for subpart in input\n            ]\n    elif isinstance(input, dict):\n        if inplace:\n            for key in input:\n                input[key] = to_device_recursively(\n                    input=input[key], device=device, inplace=inplace\n                )\n            return input\n        else:\n            return {\n                k: to_device_recursively(input=input[k], device=device, inplace=inplace)\n                for k in input\n            }\n    elif isinstance(input, set):\n        if inplace:\n            for element in list(input):\n                input.remove(element)\n                input.add(\n                    to_device_recursively(element, device=device, inplace=inplace)\n                )\n        else:\n            return set(\n                to_device_recursively(k, device=device, inplace=inplace) for k in input\n            )\n    elif isinstance(input, np.ndarray) or np.isscalar(input) or isinstance(input, str):\n        return input\n    elif hasattr(input, \"to\"):\n        # noinspection PyCallingNonCallable\n        return input.to(device=device, inplace=inplace)\n    else:\n        raise NotImplementedError(\n            \"Sorry, value of type {} is not supported.\".format(type(input))\n        )\n\n\ndef detach_recursively(input: Any, inplace=True):\n    \"\"\"Recursively detaches tensors in some data structure from their\n    computation graph.\"\"\"\n    if input is None:\n        return input\n    elif isinstance(input, torch.Tensor):\n        return input.detach()\n    elif isinstance(input, tuple):\n        return tuple(\n            detach_recursively(input=subinput, inplace=inplace) for subinput in input\n        )\n    elif isinstance(input, list):\n        if inplace:\n            for i in range(len(input)):\n                input[i] = detach_recursively(input[i], inplace=inplace)\n            return input\n        else:\n            return [\n                detach_recursively(input=subinput, inplace=inplace)\n                for subinput in input\n            ]\n    elif isinstance(input, dict):\n        if inplace:\n            for key in input:\n                input[key] = detach_recursively(input[key], inplace=inplace)\n            return input\n        else:\n            return {k: detach_recursively(input[k], inplace=inplace) for k in input}\n    elif isinstance(input, set):\n        if inplace:\n            for element in list(input):\n                input.remove(element)\n                input.add(detach_recursively(element, inplace=inplace))\n        else:\n            return set(detach_recursively(k, inplace=inplace) for k in input)\n    elif isinstance(input, np.ndarray) or np.isscalar(input) or isinstance(input, str):\n        return input\n    elif hasattr(input, \"detach_recursively\"):\n        # noinspection PyCallingNonCallable\n        return input.detach_recursively(inplace=inplace)\n    else:\n        raise NotImplementedError(\n            \"Sorry, hidden state of type {} is not supported.\".format(type(input))\n        )\n\n\ndef batch_observations(\n    observations: List[Dict], device: Optional[torch.device] = None\n) -> Dict[str, Union[Dict, torch.Tensor]]:\n    \"\"\"Transpose a batch of observation dicts to a dict of batched\n    observations.\n\n    # Arguments\n\n    observations :  List of dicts of observations.\n    device : The torch.device to put the resulting tensors on.\n        Will not move the tensors if None.\n\n    # Returns\n\n    Transposed dict of lists of observations.\n    \"\"\"\n\n    def dict_from_observation(\n        observation: Dict[str, Any]\n    ) -> Dict[str, Union[Dict, List]]:\n        batch_dict: DefaultDict = defaultdict(list)\n\n        for sensor in observation:\n            if isinstance(observation[sensor], Dict):\n                batch_dict[sensor] = dict_from_observation(observation[sensor])\n            else:\n                batch_dict[sensor].append(to_tensor(observation[sensor]))\n\n        return batch_dict\n\n    def fill_dict_from_observations(\n        input_batch: Any, observation: Dict[str, Any]\n    ) -> None:\n        for sensor in observation:\n            if isinstance(observation[sensor], Dict):\n                fill_dict_from_observations(input_batch[sensor], observation[sensor])\n            else:\n                input_batch[sensor].append(to_tensor(observation[sensor]))\n\n    def dict_to_batch(input_batch: Any) -> None:\n        for sensor in input_batch:\n            if isinstance(input_batch[sensor], Dict):\n                dict_to_batch(input_batch[sensor])\n            else:\n                input_batch[sensor] = torch.stack(\n                    [batch.to(device=device) for batch in input_batch[sensor]], dim=0\n                )\n\n    if len(observations) == 0:\n        return cast(Dict[str, Union[Dict, torch.Tensor]], observations)\n\n    batch = dict_from_observation(observations[0])\n\n    for obs in observations[1:]:\n        fill_dict_from_observations(batch, obs)\n\n    dict_to_batch(batch)\n\n    return cast(Dict[str, Union[Dict, torch.Tensor]], batch)\n\n\ndef to_tensor(v) -> torch.Tensor:\n    \"\"\"Return a torch.Tensor version of the input.\n\n    # Parameters\n\n    v : Input values that can be coerced into being a tensor.\n\n    # Returns\n\n    A tensor version of the input.\n    \"\"\"\n    if torch.is_tensor(v):\n        return v\n    elif isinstance(v, np.ndarray):\n        return torch.from_numpy(v)\n    else:\n        return torch.tensor(\n            v, dtype=torch.int64 if isinstance(v, numbers.Integral) else torch.float\n        )\n\n\ndef tile_images(images: List[np.ndarray]) -> np.ndarray:\n    \"\"\"Tile multiple images into single image.\n\n    # Parameters\n\n    images : list of images where each image has dimension\n        (height x width x channels)\n\n    # Returns\n\n    Tiled image (new_height x width x channels).\n    \"\"\"\n    assert len(images) > 0, \"empty list of images\"\n    np_images = np.asarray(images)\n    n_images, height, width, n_channels = np_images.shape\n    new_height = int(np.ceil(np.sqrt(n_images)))\n    new_width = int(np.ceil(float(n_images) / new_height))\n    # pad with empty images to complete the rectangle\n    np_images = np.array(\n        images + [images[0] * 0 for _ in range(n_images, new_height * new_width)]\n    )\n    # img_HWhwc\n    out_image = np_images.reshape((new_height, new_width, height, width, n_channels))\n    # img_HhWwc\n    out_image = out_image.transpose(0, 2, 1, 3, 4)\n    # img_Hh_Ww_c\n    out_image = out_image.reshape((new_height * height, new_width * width, n_channels))\n    return out_image\n\n\nclass SummaryWriter(TBXSummaryWriter):\n    @staticmethod\n    def _video(tag, vid):\n        # noinspection PyProtectedMember\n        tag = tbxsummary._clean_tag(tag)\n        return TBXSummary(value=[TBXSummary.Value(tag=tag, image=vid)])\n\n    def add_vid(self, tag, vid, global_step=None, walltime=None):\n        self._get_file_writer().add_summary(\n            self._video(tag, vid), global_step, walltime\n        )\n\n    def add_image(\n        self, tag, img_tensor, global_step=None, walltime=None, dataformats=\"CHW\"\n    ):\n        self._get_file_writer().add_summary(\n            image(tag, img_tensor, dataformats=dataformats), global_step, walltime\n        )\n\n\ndef image(tag, tensor, rescale=1, dataformats=\"CHW\"):\n    \"\"\"Outputs a `Summary` protocol buffer with images. The summary has up to\n    `max_images` summary values containing images. The images are built from\n    `tensor` which must be 3-D with shape `[height, width, channels]` and where\n    `channels` can be:\n\n    *  1: `tensor` is interpreted as Grayscale.\n    *  3: `tensor` is interpreted as RGB.\n    *  4: `tensor` is interpreted as RGBA.\n\n    # Parameters\n    tag: A name for the generated node. Will also serve as a series name in\n        TensorBoard.\n    tensor: A 3-D `uint8` or `float32` `Tensor` of shape `[height, width,\n        channels]` where `channels` is 1, 3, or 4.\n        'tensor' can either have values in [0, 1] (float32) or [0, 255] (uint8).\n        The image() function will scale the image values to [0, 255] by applying\n        a scale factor of either 1 (uint8) or 255 (float32).\n    rescale: The scale.\n    dataformats: Input image shape format.\n\n\n    # Returns\n      A scalar `Tensor` of type `string`. The serialized `Summary` protocol\n      buffer.\n    \"\"\"\n    # noinspection PyProtectedMember\n    tag = tbxsummary._clean_tag(tag)\n    tensor = tbxmake_np(tensor)\n    tensor = convert_to_HWC(tensor, dataformats)\n    # Do not assume that user passes in values in [0, 255], use data type to detect\n    if tensor.dtype != np.uint8:\n        tensor = (tensor * 255.0).astype(np.uint8)\n\n    img = tbxsummary.make_image(tensor, rescale=rescale)\n    return TBXSummary(value=[TBXSummary.Value(tag=tag, image=img)])\n\n\ndef convert_to_HWC(tensor, input_format):  # tensor: numpy array\n    assert len(set(input_format)) == len(\n        input_format\n    ), \"You can not use the same dimension shordhand twice. \\\n        input_format: {}\".format(\n        input_format\n    )\n    assert len(tensor.shape) == len(\n        input_format\n    ), \"size of input tensor and input format are different. \\\n        tensor shape: {}, input_format: {}\".format(\n        tensor.shape, input_format\n    )\n    input_format = input_format.upper()\n\n    if len(input_format) == 4:\n        index = [input_format.find(c) for c in \"NCHW\"]\n        tensor_NCHW = tensor.transpose(index)\n        tensor_CHW = make_grid(tensor_NCHW)\n        # noinspection PyTypeChecker\n        return tensor_CHW.transpose(1, 2, 0)\n\n    if len(input_format) == 3:\n        index = [input_format.find(c) for c in \"HWC\"]\n        tensor_HWC = tensor.transpose(index)\n        if tensor_HWC.shape[2] == 1:\n            tensor_HWC = np.concatenate([tensor_HWC, tensor_HWC, tensor_HWC], 2)\n        return tensor_HWC\n\n    if len(input_format) == 2:\n        index = [input_format.find(c) for c in \"HW\"]\n        tensor = tensor.transpose(index)\n        tensor = np.stack([tensor, tensor, tensor], 2)\n        return tensor\n\n\ndef make_grid(I, ncols=8):\n    # I: N1HW or N3HW\n\n    assert isinstance(I, np.ndarray), \"plugin error, should pass numpy array here\"\n    if I.shape[1] == 1:\n        I = np.concatenate([I, I, I], 1)\n    assert I.ndim == 4 and I.shape[1] == 3 or I.shape[1] == 4\n    nimg = I.shape[0]\n    H = I.shape[2]\n    W = I.shape[3]\n    ncols = min(nimg, ncols)\n    nrows = int(np.ceil(float(nimg) / ncols))\n    canvas = np.zeros((I.shape[1], H * nrows, W * ncols), dtype=I.dtype)\n    i = 0\n    for y in range(nrows):\n        for x in range(ncols):\n            if i >= nimg:\n                break\n            canvas[:, y * H : (y + 1) * H, x * W : (x + 1) * W] = I[i]\n            i = i + 1\n    return canvas\n\n\ndef tensor_to_video(tensor, fps=4):\n    tensor = tbxmake_np(tensor)\n    tensor = tbx_prepare_video(tensor)\n    # If user passes in uint8, then we don't need to rescale by 255\n    if tensor.dtype != np.uint8:\n        tensor = (tensor * 255.0).astype(np.uint8)\n\n    return tbxsummary.make_video(tensor, fps)\n\n\ndef tensor_to_clip(tensor, fps=4):\n    tensor = tbxmake_np(tensor)\n    tensor = tbx_prepare_video(tensor)\n    # If user passes in uint8, then we don't need to rescale by 255\n    if tensor.dtype != np.uint8:\n        tensor = (tensor * 255.0).astype(np.uint8)\n\n    t, h, w, c = tensor.shape\n\n    clip = mpy.ImageSequenceClip(list(tensor), fps=fps)\n\n    return clip, (h, w, c)\n\n\ndef clips_to_video(clips, h, w, c):\n    # encode sequence of images into gif string\n    clip = concatenate_videoclips(clips)\n\n    filename = tempfile.NamedTemporaryFile(suffix=\".gif\", delete=False).name\n\n    # moviepy >= 1.0.0 use logger=None to suppress output.\n    try:\n        clip.write_gif(filename, verbose=False, logger=None)\n    except TypeError:\n        get_logger().warning(\n            \"Upgrade to moviepy >= 1.0.0 to suppress the progress bar.\"\n        )\n        clip.write_gif(filename, verbose=False)\n\n    with open(filename, \"rb\") as f:\n        tensor_string = f.read()\n\n    try:\n        os.remove(filename)\n    except OSError:\n        get_logger().warning(\"The temporary file used by moviepy cannot be deleted.\")\n\n    return TBXSummary.Image(\n        height=h, width=w, colorspace=c, encoded_image_string=tensor_string\n    )\n\n\ndef process_video(render, max_clip_len=500, max_video_len=-1, fps=4):\n    output = []\n    hwc = None\n    if len(render) > 0:\n        if len(render) > max_video_len > 0:\n            get_logger().warning(\n                \"Clipping video to first {} frames out of {} original frames\".format(\n                    max_video_len, len(render)\n                )\n            )\n            render = render[:max_video_len]\n        for clipstart in range(0, len(render), max_clip_len):\n            clip = render[clipstart : clipstart + max_clip_len]\n            try:\n                current = np.stack(clip, axis=0)  # T, H, W, C\n                current = current.transpose((0, 3, 1, 2))  # T, C, H, W\n                current = np.expand_dims(current, axis=0)  # 1, T, C, H, W\n                current, cur_hwc = tensor_to_clip(current, fps=fps)\n\n                if hwc is None:\n                    hwc = cur_hwc\n                else:\n                    assert (\n                        hwc == cur_hwc\n                    ), \"Inconsistent clip shape: previous {} current {}\".format(\n                        hwc, cur_hwc\n                    )\n\n                output.append(current)\n            except MemoryError:\n                get_logger().error(\n                    \"Skipping video due to memory error with clip of length {}\".format(\n                        len(clip)\n                    )\n                )\n                return None\n    else:\n        get_logger().warning(\"Calling process_video with 0 frames\")\n        return None\n\n    assert len(output) > 0, \"No clips to concatenate\"\n    assert hwc is not None, \"No tensor dims assigned\"\n\n    try:\n        result = clips_to_video(output, *hwc)\n    except MemoryError:\n        get_logger().error(\"Skipping video due to memory error calling clips_to_video\")\n        result = None\n\n    return result\n\n\nclass ScaleBothSides(object):\n    \"\"\"Rescales the input PIL.Image to the given 'width' and `height`.\n\n    Attributes\n        width: new width\n        height: new height\n        interpolation: Default: PIL.Image.BILINEAR\n    \"\"\"\n\n    def __init__(self, width: int, height: int, interpolation=Image.BILINEAR):\n        self.width = width\n        self.height = height\n        self.interpolation = interpolation\n\n    def __call__(self, img: PIL.Image) -> PIL.Image:\n        return img.resize((self.width, self.height), self.interpolation)\n"
  },
  {
    "path": "allenact/utils/viz_utils.py",
    "content": "import abc\nimport json\nimport os\nimport sys\nfrom collections import defaultdict\nfrom typing import (\n    Dict,\n    Any,\n    Union,\n    Optional,\n    List,\n    Tuple,\n    Sequence,\n    Callable,\n    cast,\n    Set,\n)\n\nimport numpy as np\n\nfrom allenact.utils.experiment_utils import Builder\nfrom allenact.utils.tensor_utils import SummaryWriter, tile_images, process_video\n\ntry:\n    # Tensorflow not installed for testing\n    from tensorflow.core.util import event_pb2\n    from tensorflow.python.lib.io import tf_record\n\n    _TF_AVAILABLE = True\nexcept ImportError as _:\n    event_pb2 = None\n    tf_record = None\n\n    _TF_AVAILABLE = False\n\nimport matplotlib\n\ntry:\n    # When debugging we don't want to use the interactive version of matplotlib\n    # as it causes all sorts of problems.\n\n    # noinspection PyPackageRequirements\n    import pydevd\n\n    matplotlib.use(\"agg\")\nexcept ImportError as _:\n    pass\n\nimport matplotlib.pyplot as plt\nimport matplotlib.markers as markers\nimport cv2\n\nfrom allenact.utils.system import get_logger\n\n\nclass AbstractViz:\n    def __init__(\n        self,\n        label: Optional[str] = None,\n        vector_task_sources: Sequence[Tuple[str, Dict[str, Any]]] = (),\n        rollout_sources: Sequence[Union[str, Sequence[str]]] = (),\n        actor_critic_source: bool = False,\n        **kwargs,  # accepts `max_episodes_in_group`\n    ):\n        self.label = label\n        self.vector_task_sources = list(vector_task_sources)\n        self.rollout_sources = [\n            [entry] if isinstance(entry, str) else list(entry)\n            for entry in rollout_sources\n        ]\n        self.actor_critic_source = actor_critic_source\n\n        self.mode: Optional[str] = None\n        self.path_to_id: Optional[Sequence[str]] = None\n        self.episode_ids: Optional[List[Sequence[str]]] = None\n\n        if \"max_episodes_in_group\" in kwargs:\n            self.max_episodes_in_group = kwargs[\"max_episodes_in_group\"]\n            self.assigned_max_eps_in_group = True\n        else:\n            self.max_episodes_in_group = 8\n            self.assigned_max_eps_in_group = False\n\n    @staticmethod\n    def _source_to_str(source, is_vector_task):\n        source_type = \"vector_task\" if is_vector_task else \"rollout_or_actor_critic\"\n        return \"{}__{}\".format(\n            source_type,\n            \"__{}_sep__\".format(source_type).join([\"{}\".format(s) for s in source]),\n        )\n\n    @staticmethod\n    def _access(dictionary, path):\n        path = path[::-1]\n        while len(path) > 0:\n            dictionary = dictionary[path.pop()]\n        return dictionary\n\n    def _auto_viz_order(self, task_outputs):\n        if task_outputs is None:\n            return None, None\n\n        all_episodes = {\n            self._access(episode, self.path_to_id): episode for episode in task_outputs\n        }\n\n        if self.episode_ids is None:\n            all_episode_keys = list(all_episodes.keys())\n            viz_order = []\n            for page_start in range(\n                0, len(all_episode_keys), self.max_episodes_in_group\n            ):\n                viz_order.append(\n                    all_episode_keys[\n                        page_start : page_start + self.max_episodes_in_group\n                    ]\n                )\n            get_logger().debug(\"visualizing with order {}\".format(viz_order))\n        else:\n            viz_order = self.episode_ids\n\n        return viz_order, all_episodes\n\n    def _setup(\n        self,\n        mode: str,\n        path_to_id: Sequence[str],\n        episode_ids: Optional[Sequence[Union[Sequence[str], str]]],\n        max_episodes_in_group: int,\n        force: bool = False,\n    ):\n        self.mode = mode\n        self.path_to_id = list(path_to_id)\n        if (self.episode_ids is None or force) and episode_ids is not None:\n            self.episode_ids = (\n                list(episode_ids)\n                if not isinstance(episode_ids[0], str)\n                else [list(cast(List[str], episode_ids))]\n            )\n        if not self.assigned_max_eps_in_group or force:\n            self.max_episodes_in_group = max_episodes_in_group\n\n    @abc.abstractmethod\n    def log(\n        self,\n        log_writer: SummaryWriter,\n        task_outputs: Optional[List[Any]],\n        render: Optional[Dict[str, List[Dict[str, Any]]]],\n        num_steps: int,\n    ):\n        raise NotImplementedError()\n\n\nclass TrajectoryViz(AbstractViz):\n    def __init__(\n        self,\n        path_to_trajectory: Sequence[str] = (\"task_info\", \"followed_path\"),\n        path_to_target_location: Optional[Sequence[str]] = (\n            \"task_info\",\n            \"target_position\",\n        ),\n        path_to_x: Sequence[str] = (\"x\",),\n        path_to_y: Sequence[str] = (\"z\",),\n        path_to_rot_degrees: Optional[Sequence[str]] = (\"rotation\", \"y\"),\n        adapt_rotation: Optional[Callable[[float], float]] = None,\n        label: str = \"trajectory\",\n        figsize: Tuple[float, float] = (2, 2),\n        fontsize: float = 5,\n        start_marker_shape: str = r\"$\\spadesuit$\",\n        start_marker_scale: int = 100,\n        **other_base_kwargs,\n    ):\n        super().__init__(label, **other_base_kwargs)\n        self.path_to_trajectory = list(path_to_trajectory)\n        self.path_to_target_location = (\n            list(path_to_target_location)\n            if path_to_target_location is not None\n            else None\n        )\n        self.adapt_rotation = adapt_rotation\n        self.x = list(path_to_x)\n        self.y = list(path_to_y)\n        self.path_to_rot_degrees = (\n            list(path_to_rot_degrees) if path_to_rot_degrees is not None else None\n        )\n        self.figsize = figsize\n        self.fontsize = fontsize\n        self.start_marker_shape = start_marker_shape\n        self.start_marker_scale = start_marker_scale\n\n    def log(\n        self,\n        log_writer: SummaryWriter,\n        task_outputs: Optional[List[Any]],\n        render: Optional[Dict[str, List[Dict[str, Any]]]],\n        num_steps: int,\n    ):\n        viz_order, all_episodes = self._auto_viz_order(task_outputs)\n        if viz_order is None:\n            get_logger().debug(\"trajectory viz returning without visualizing\")\n            return\n\n        for page, current_ids in enumerate(viz_order):\n            figs = []\n            for episode_id in current_ids:\n                # assert episode_id in all_episodes\n                if episode_id not in all_episodes:\n                    get_logger().warning(\n                        \"skipping viz for missing episode {}\".format(episode_id)\n                    )\n                    continue\n                figs.append(self.make_fig(all_episodes[episode_id], episode_id))\n            if len(figs) == 0:\n                continue\n            log_writer.add_figure(\n                \"{}/{}_group{}\".format(self.mode, self.label, page),\n                figs,\n                global_step=num_steps,\n            )\n            plt.close(\n                \"all\"\n            )  # close all current figures (SummaryWriter already closes all figures we log)\n\n    def make_fig(self, episode, episode_id):\n        # From https://nbviewer.jupyter.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb\n        def colorline(\n            x,\n            y,\n            z=None,\n            cmap=plt.get_cmap(\"cool\"),\n            norm=plt.Normalize(0.0, 1.0),\n            linewidth=2,\n            alpha=1.0,\n            zorder=1,\n        ):\n            \"\"\"Plot a colored line with coordinates x and y.\n\n            Optionally specify colors in the array z\n\n            Optionally specify a colormap, a norm function and a line width.\n            \"\"\"\n\n            def make_segments(x, y):\n                \"\"\"Create list of line segments from x and y coordinates, in\n                the correct format for LineCollection:\n\n                an array of the form  numlines x (points per line) x 2\n                (x and y) array\n                \"\"\"\n                points = np.array([x, y]).T.reshape(-1, 1, 2)\n                segments = np.concatenate([points[:-1], points[1:]], axis=1)\n                return segments\n\n            # Default colors equally spaced on [0,1]:\n            if z is None:\n                z = np.linspace(0.0, 1.0, len(x))\n\n            # Special case if a single number:\n            if not hasattr(\n                z, \"__iter__\"\n            ):  # to check for numerical input -- this is a hack\n                z = np.array([z])\n\n            z = np.asarray(z)\n\n            segments = make_segments(x, y)\n            lc = matplotlib.collections.LineCollection(\n                segments,\n                array=z,\n                cmap=cmap,\n                norm=norm,\n                linewidth=linewidth,\n                alpha=alpha,\n                zorder=zorder,\n            )\n\n            ax = plt.gca()\n            ax.add_collection(lc)\n\n            return lc\n\n        trajectory = self._access(episode, self.path_to_trajectory)\n\n        x, y = [], []\n        for xy in trajectory:\n            x.append(float(self._access(xy, self.x)))\n            y.append(float(self._access(xy, self.y)))\n\n        fig, ax = plt.subplots(figsize=self.figsize)\n        colorline(x, y, zorder=1)\n\n        start_marker = markers.MarkerStyle(marker=self.start_marker_shape)\n        if self.path_to_rot_degrees is not None:\n            rot_degrees = float(self._access(trajectory[0], self.path_to_rot_degrees))\n            if self.adapt_rotation is not None:\n                rot_degrees = self.adapt_rotation(rot_degrees)\n            start_marker._transform = start_marker.get_transform().rotate_deg(\n                rot_degrees\n            )\n\n        ax.scatter(\n            [x[0]], [y[0]], marker=start_marker, zorder=2, s=self.start_marker_scale\n        )\n        ax.scatter([x[-1]], [y[-1]], marker=\"s\")  # stop\n\n        if self.path_to_target_location is not None:\n            target = self._access(episode, self.path_to_target_location)\n            ax.scatter(\n                [float(self._access(target, self.x))],\n                [float(self._access(target, self.y))],\n                marker=\"*\",\n            )\n\n        ax.set_title(episode_id, fontsize=self.fontsize)\n        ax.tick_params(axis=\"x\", labelsize=self.fontsize)\n        ax.tick_params(axis=\"y\", labelsize=self.fontsize)\n\n        return fig\n\n\nclass AgentViewViz(AbstractViz):\n    def __init__(\n        self,\n        label: str = \"agent_view\",\n        max_clip_length: int = 100,  # control memory used when converting groups of images into clips\n        max_video_length: int = -1,  # no limit, if > 0, limit the maximum video length (discard last frames)\n        vector_task_source: Tuple[str, Dict[str, Any]] = (\n            \"render\",\n            {\"mode\": \"raw_rgb_list\"},\n        ),\n        episode_ids: Optional[Sequence[Union[Sequence[str], str]]] = None,\n        fps: int = 4,\n        max_render_size: int = 400,\n        **other_base_kwargs,\n    ):\n        super().__init__(\n            label,\n            vector_task_sources=[vector_task_source],\n            **other_base_kwargs,\n        )\n        self.max_clip_length = max_clip_length\n        self.max_video_length = max_video_length\n        self.fps = fps\n        self.max_render_size = max_render_size\n\n        self.episode_ids = (\n            (\n                list(episode_ids)\n                if not isinstance(episode_ids[0], str)\n                else [list(cast(List[str], episode_ids))]\n            )\n            if episode_ids is not None\n            else None\n        )\n\n    def log(\n        self,\n        log_writer: SummaryWriter,\n        task_outputs: Optional[List[Any]],\n        render: Optional[Dict[str, List[Dict[str, Any]]]],\n        num_steps: int,\n    ):\n        if render is None:\n            return\n\n        datum_id = self._source_to_str(self.vector_task_sources[0], is_vector_task=True)\n\n        viz_order, _ = self._auto_viz_order(task_outputs)\n        if viz_order is None:\n            get_logger().debug(\"agent view viz returning without visualizing\")\n            return\n\n        for page, current_ids in enumerate(viz_order):\n            images = []  # list of lists of rgb frames\n            for episode_id in current_ids:\n                # assert episode_id in render\n                if episode_id not in render:\n                    get_logger().warning(\n                        \"skipping viz for missing episode {}\".format(episode_id)\n                    )\n                    continue\n                images.append(\n                    [\n                        self._overlay_label(step[datum_id], episode_id)\n                        for step in render[episode_id]\n                    ]\n                )\n            if len(images) == 0:\n                continue\n            vid = self.make_vid(images)\n            if vid is not None:\n                log_writer.add_vid(\n                    f\"{self.mode}/{self.label}_group{page}\",\n                    vid,\n                    global_step=num_steps,\n                )\n\n    @staticmethod\n    def _overlay_label(\n        img,\n        text,\n        pos=(0, 0),\n        bg_color=(255, 255, 255),\n        fg_color=(0, 0, 0),\n        scale=0.4,\n        thickness=1,\n        margin=2,\n        font_face=cv2.FONT_HERSHEY_SIMPLEX,\n    ):\n        txt_size = cv2.getTextSize(text, font_face, scale, thickness)\n\n        end_x = pos[0] + txt_size[0][0] + margin\n        end_y = pos[1]\n\n        pos = (pos[0], pos[1] + txt_size[0][1] + margin)\n\n        cv2.rectangle(img, pos, (end_x, end_y), bg_color, cv2.FILLED)\n        cv2.putText(\n            img=img,\n            text=text,\n            org=pos,\n            fontFace=font_face,\n            fontScale=scale,\n            color=fg_color,\n            thickness=thickness,\n            lineType=cv2.LINE_AA,\n        )\n        return img\n\n    def make_vid(self, images):\n        max_length = max([len(ep) for ep in images])\n\n        if max_length == 0:\n            return None\n\n        valid_im = None\n        for ep in images:\n            if len(ep) > 0:\n                valid_im = ep[0]\n                break\n\n        frames = []\n        for it in range(max_length):\n            current_images = []\n            for ep in images:\n                if it < len(ep):\n                    current_images.append(ep[it])\n                else:\n                    if it == 0:\n                        current_images.append(np.zeros_like(valid_im))\n                    else:\n                        gray = ep[-1].copy()\n                        gray[:, :, 0] = gray[:, :, 2] = gray[:, :, 1]\n                        current_images.append(gray)\n            frames.append(tile_images(current_images))\n\n        return process_video(\n            frames, self.max_clip_length, self.max_video_length, fps=self.fps\n        )\n\n\nclass AbstractTensorViz(AbstractViz):\n    def __init__(\n        self,\n        rollout_source: Union[str, Sequence[str]],\n        label: Optional[str] = None,\n        figsize: Tuple[float, float] = (3, 3),\n        **other_base_kwargs,\n    ):\n        if label is None:\n            if isinstance(rollout_source, str):\n                label = rollout_source[:]\n            else:\n                label = \"/\".join(rollout_source)\n\n        super().__init__(label, rollout_sources=[rollout_source], **other_base_kwargs)\n\n        self.figsize = figsize\n        self.datum_id = self._source_to_str(\n            self.rollout_sources[0], is_vector_task=False\n        )\n\n    def log(\n        self,\n        log_writer: SummaryWriter,\n        task_outputs: Optional[List[Any]],\n        render: Optional[Dict[str, List[Dict[str, Any]]]],\n        num_steps: int,\n    ):\n        if render is None:\n            return\n\n        viz_order, _ = self._auto_viz_order(task_outputs)\n        if viz_order is None:\n            get_logger().debug(\"tensor viz returning without visualizing\")\n            return\n\n        for page, current_ids in enumerate(viz_order):\n            figs = []\n            for episode_id in current_ids:\n                if episode_id not in render or len(render[episode_id]) == 0:\n                    get_logger().warning(\n                        \"skipping viz for missing or 0-length episode {}\".format(\n                            episode_id\n                        )\n                    )\n                    continue\n                episode_src = [\n                    step[self.datum_id]\n                    for step in render[episode_id]\n                    if self.datum_id in step\n                ]\n                if len(episode_src) > 0:\n                    # If the last episode for an inference worker is of length 1, there's no captured rollout sources\n                    figs.append(self.make_fig(episode_src, episode_id))\n            if len(figs) == 0:\n                continue\n            log_writer.add_figure(\n                \"{}/{}_group{}\".format(self.mode, self.label, page),\n                figs,\n                global_step=num_steps,\n            )\n            plt.close(\n                \"all\"\n            )  # close all current figures (SummaryWriter already closes all figures we log)\n\n    @abc.abstractmethod\n    def make_fig(\n        self, episode_src: Sequence[np.ndarray], episode_id: str\n    ) -> matplotlib.figure.Figure:\n        raise NotImplementedError()\n\n\nclass TensorViz1D(AbstractTensorViz):\n    def __init__(\n        self,\n        rollout_source: Union[str, Sequence[str]] = \"action_log_probs\",\n        label: Optional[str] = None,\n        figsize: Tuple[float, float] = (3, 3),\n        **other_base_kwargs,\n    ):\n        super().__init__(rollout_source, label, figsize, **other_base_kwargs)\n\n    def make_fig(self, episode_src, episode_id):\n        assert episode_src[0].size == 1\n\n        # Concatenate along step axis (0)\n        seq = np.concatenate(episode_src, axis=0).squeeze()  # remove all singleton dims\n\n        fig, ax = plt.subplots(figsize=self.figsize)\n        ax.plot(seq)\n        ax.set_title(episode_id)\n\n        ax.set_aspect(\"auto\")\n        plt.tight_layout()\n\n        return fig\n\n\nclass TensorViz2D(AbstractTensorViz):\n    def __init__(\n        self,\n        rollout_source: Union[str, Sequence[str]] = (\"memory_first_last\", \"rnn\"),\n        label: Optional[str] = None,\n        figsize: Tuple[float, float] = (10, 10),\n        fontsize: float = 5,\n        **other_base_kwargs,\n    ):\n        super().__init__(rollout_source, label, figsize, **other_base_kwargs)\n        self.fontsize = fontsize\n\n    def make_fig(self, episode_src, episode_id):\n        # Concatenate along step axis (0)\n        seq = np.concatenate(\n            episode_src, axis=0\n        ).squeeze()  # remove num_layers if it's equal to 1, else die\n        assert len(seq.shape) == 2, \"No support for higher-dimensions\"\n\n        # get_logger().debug(\"basic {} h render {}\".format(episode_id, seq[:10, 0]))\n\n        fig, ax = plt.subplots(figsize=self.figsize)\n        ax.matshow(seq)\n\n        ax.set_xlabel(episode_id, fontsize=self.fontsize)\n        ax.tick_params(axis=\"x\", labelsize=self.fontsize)\n        ax.tick_params(axis=\"y\", labelsize=self.fontsize)\n        ax.tick_params(bottom=False)\n\n        ax.set_aspect(\"auto\")\n        plt.tight_layout()\n\n        return fig\n\n\nclass ActorViz(AbstractViz):\n    def __init__(\n        self,\n        label: str = \"action_probs\",\n        action_names_path: Optional[Sequence[str]] = (\"task_info\", \"action_names\"),\n        figsize: Tuple[float, float] = (1, 5),\n        fontsize: float = 5,\n        **other_base_kwargs,\n    ):\n        super().__init__(label, actor_critic_source=True, **other_base_kwargs)\n        self.action_names_path: Optional[Sequence[str]] = (\n            list(action_names_path) if action_names_path is not None else None\n        )\n        self.figsize = figsize\n        self.fontsize = fontsize\n        self.action_names: Optional[List[str]] = None\n\n    def log(\n        self,\n        log_writer: SummaryWriter,\n        task_outputs: Optional[List[Any]],\n        render: Optional[Dict[str, List[Dict[str, Any]]]],\n        num_steps: int,\n    ):\n        if render is None:\n            return\n\n        if (\n            self.action_names is None\n            and task_outputs is not None\n            and len(task_outputs) > 0\n            and self.action_names_path is not None\n        ):\n            self.action_names = list(\n                self._access(task_outputs[0], self.action_names_path)\n            )\n\n        viz_order, _ = self._auto_viz_order(task_outputs)\n        if viz_order is None:\n            get_logger().debug(\"actor viz returning without visualizing\")\n            return\n\n        for page, current_ids in enumerate(viz_order):\n            figs = []\n            for episode_id in current_ids:\n                # assert episode_id in render\n                if episode_id not in render:\n                    get_logger().warning(\n                        \"skipping viz for missing episode {}\".format(episode_id)\n                    )\n                    continue\n                episode_src = [\n                    step[\"actor_probs\"]\n                    for step in render[episode_id]\n                    if \"actor_probs\" in step\n                ]\n                assert len(episode_src) == len(render[episode_id])\n                figs.append(self.make_fig(episode_src, episode_id))\n            if len(figs) == 0:\n                continue\n            log_writer.add_figure(\n                \"{}/{}_group{}\".format(self.mode, self.label, page),\n                figs,\n                global_step=num_steps,\n            )\n            plt.close(\n                \"all\"\n            )  # close all current figures (SummaryWriter already closes all figures we log)\n\n    def make_fig(self, episode_src, episode_id):\n        # Concatenate along step axis (0, reused from kept sampler axis)\n        mat = np.concatenate(episode_src, axis=0)\n\n        fig, ax = plt.subplots(figsize=self.figsize)\n        ax.matshow(mat)\n\n        if self.action_names is not None:\n            assert len(self.action_names) == mat.shape[-1]\n            ax.set_xticklabels([\"\"] + self.action_names, rotation=\"vertical\")\n\n        ax.set_xlabel(episode_id, fontsize=self.fontsize)\n        ax.tick_params(axis=\"x\", labelsize=self.fontsize)\n        ax.tick_params(axis=\"y\", labelsize=self.fontsize)\n        ax.tick_params(bottom=False)\n\n        # Gridlines based on minor ticks\n        ax.set_yticks(np.arange(-0.5, mat.shape[0], 1), minor=True)\n        ax.set_xticks(np.arange(-0.5, mat.shape[1], 1), minor=True)\n        ax.grid(which=\"minor\", color=\"w\", linestyle=\"-\", linewidth=0.05)\n        ax.tick_params(\n            axis=\"both\", which=\"minor\", left=False, top=False, right=False, bottom=False\n        )\n\n        ax.set_aspect(\"auto\")\n        plt.tight_layout()\n        return fig\n\n\nclass VizSuite(AbstractViz):\n    def __init__(\n        self,\n        episode_ids: Optional[Sequence[Union[Sequence[str], str]]] = None,\n        path_to_id: Sequence[str] = (\"task_info\", \"id\"),\n        mode: str = \"valid\",\n        force_episodes_and_max_episodes_in_group: bool = False,\n        max_episodes_in_group: int = 8,\n        *viz,\n        **kw_viz,\n    ):\n        super().__init__(max_episodes_in_group=max_episodes_in_group)\n        self._setup(\n            mode=mode,\n            path_to_id=path_to_id,\n            episode_ids=episode_ids,\n            max_episodes_in_group=max_episodes_in_group,\n        )\n        self.force_episodes_and_max_episodes_in_group = (\n            force_episodes_and_max_episodes_in_group\n        )\n\n        self.all_episode_ids = self._episodes_set()\n\n        self.viz = [\n            v() if isinstance(v, Builder) else v\n            for v in viz\n            if isinstance(v, Builder) or isinstance(v, AbstractViz)\n        ] + [\n            v() if isinstance(v, Builder) else v\n            for k, v in kw_viz.items()\n            if isinstance(v, Builder) or isinstance(v, AbstractViz)\n        ]\n\n        self.max_render_size: Optional[int] = None\n\n        (\n            self.rollout_sources,\n            self.vector_task_sources,\n            self.actor_critic_source,\n        ) = self._setup_sources()\n\n        self.data: Dict[str, List[Dict]] = (\n            {}\n        )  # dict of episode id to list of dicts with collected data\n        self.last_it2epid: List[str] = []\n\n    def _setup_sources(self):\n        rollout_sources, vector_task_sources = [], []\n        labels = []\n        actor_critic_source = False\n        new_episodes = []\n        for v in self.viz:\n            labels.append(v.label)\n            rollout_sources += v.rollout_sources\n            vector_task_sources += v.vector_task_sources\n            actor_critic_source |= v.actor_critic_source\n\n            if (\n                v.episode_ids is not None\n                and not self.force_episodes_and_max_episodes_in_group\n            ):\n                cur_episodes = self._episodes_set(v.episode_ids)\n                for ep in cur_episodes:\n                    if (\n                        self.all_episode_ids is not None\n                        and ep not in self.all_episode_ids\n                    ):\n                        new_episodes.append(ep)\n                        get_logger().info(\n                            \"Added new episode {} from {}\".format(ep, v.label)\n                        )\n\n            v._setup(\n                mode=self.mode,\n                path_to_id=self.path_to_id,\n                episode_ids=self.episode_ids,\n                max_episodes_in_group=self.max_episodes_in_group,\n                force=self.force_episodes_and_max_episodes_in_group,\n            )\n\n            if isinstance(v, AgentViewViz):\n                self.max_render_size = v.max_render_size\n\n        get_logger().info(\"Logging labels {}\".format(labels))\n\n        if len(new_episodes) > 0:\n            get_logger().info(\"Added new episodes {}\".format(new_episodes))\n            self.episode_ids.append(new_episodes)  # new group with all added episodes\n            self.all_episode_ids = self._episodes_set()\n\n        rol_flat = {json.dumps(src, sort_keys=True): src for src in rollout_sources}\n        vt_flat = {json.dumps(src, sort_keys=True): src for src in vector_task_sources}\n\n        rol_keys = list(set(rol_flat.keys()))\n        vt_keys = list(set(vt_flat.keys()))\n\n        return (\n            [rol_flat[k] for k in rol_keys],\n            [vt_flat[k] for k in vt_keys],\n            actor_critic_source,\n        )\n\n    def _episodes_set(self, episode_list=None) -> Optional[Set[str]]:\n        source = self.episode_ids if episode_list is None else episode_list\n        if source is None:\n            return None\n\n        all_episode_ids: List[str] = []\n        for group in source:\n            all_episode_ids += group\n        return set(all_episode_ids)\n\n    def empty(self):\n        return len(self.data) == 0\n\n    def _update(self, collected_data):\n        for epid in collected_data:\n            assert epid in self.data\n            self.data[epid][-1].update(collected_data[epid])\n\n    def _append(self, vector_task_data):\n        for epid in vector_task_data:\n            if epid in self.data:\n                self.data[epid].append(vector_task_data[epid])\n            else:\n                self.data[epid] = [vector_task_data[epid]]\n\n    def _collect_actor_critic(self, actor_critic):\n        actor_critic_data = {\n            epid: dict()\n            for epid in self.last_it2epid\n            if self.all_episode_ids is None or epid in self.all_episode_ids\n        }\n        if len(actor_critic_data) > 0 and actor_critic is not None:\n            if self.actor_critic_source:\n                # TODO this code only supports Discrete action spaces!\n                probs = (\n                    actor_critic.distributions.probs\n                )  # step (=1) x sampler x agent (=1) x action\n                values = actor_critic.values  # step x sampler x agent x 1\n                for it, epid in enumerate(self.last_it2epid):\n                    if epid in actor_critic_data:\n                        # Select current episode (sampler axis will be reused as step axis)\n                        prob = (\n                            # probs.narrow(dim=0, start=it, length=1)  # works for sampler x action\n                            probs.narrow(\n                                dim=1, start=it, length=1\n                            )  # step x sampler x agent x action -> step x 1 x agent x action\n                            .squeeze(\n                                0\n                            )  # step x 1 x agent x action -> 1 x agent x action\n                            # .squeeze(-2)  # 1 x agent x action -> 1 x action\n                            .to(\"cpu\")\n                            .detach()\n                            .numpy()\n                        )\n                        assert \"actor_probs\" not in actor_critic_data[epid]\n                        actor_critic_data[epid][\"actor_probs\"] = prob\n                        val = (\n                            # values.narrow(dim=0, start=it, length=1)  # works for sampler x 1\n                            values.narrow(\n                                dim=1, start=it, length=1\n                            )  # step x sampler x agent x 1 -> step x 1 x agent x 1\n                            .squeeze(0)  # step x 1 x agent x 1 -> 1 x agent x 1\n                            # .squeeze(-2)  # 1 x agent x 1 -> 1 x 1\n                            .to(\"cpu\")\n                            .detach()\n                            .numpy()\n                        )\n                        assert \"critic_value\" not in actor_critic_data[epid]\n                        actor_critic_data[epid][\"critic_value\"] = val\n\n        self._update(actor_critic_data)\n\n    def _collect_rollout(self, rollout, alive):\n        alive_set = set(alive)\n        assert len(alive_set) == len(alive)\n        alive_it2epid = [\n            epid for it, epid in enumerate(self.last_it2epid) if it in alive_set\n        ]\n        rollout_data = {\n            epid: dict()\n            for epid in alive_it2epid\n            if self.all_episode_ids is None or epid in self.all_episode_ids\n        }\n        if len(rollout_data) > 0 and rollout is not None:\n            for source in self.rollout_sources:\n                datum_id = self._source_to_str(source, is_vector_task=False)\n\n                storage, path = source[0], source[1:]\n\n                # Access storage\n                res = getattr(rollout, storage)\n                episode_dim = rollout.dim_names.index(\"sampler\")\n\n                # Access sub-storage if path not empty\n                if len(path) > 0:\n                    if storage == \"memory_first_last\":\n                        storage = \"memory\"\n\n                    flattened_name = rollout.unflattened_to_flattened[storage][\n                        tuple(path)\n                    ]\n                    # for path_step in path:\n                    #     res = res[path_step]\n                    res = res[flattened_name]\n                    res, episode_dim = res\n\n                if rollout.step > 0:\n                    if rollout.step > res.shape[0]:\n                        # e.g. rnn with only latest memory saved\n                        rollout_step = res.shape[0] - 1\n                    else:\n                        rollout_step = rollout.step - 1\n                else:\n                    if rollout.num_steps - 1 < res.shape[0]:\n                        rollout_step = rollout.num_steps - 1\n                    else:\n                        # e.g. rnn with only latest memory saved\n                        rollout_step = res.shape[0] - 1\n\n                # Select latest step\n                res = res.narrow(\n                    dim=0,\n                    start=rollout_step,\n                    length=1,  # step dimension\n                )  # 1 x ... x sampler x ...\n\n                # get_logger().debug(\"basic collect h {}\".format(res[..., 0]))\n\n                for it, epid in enumerate(alive_it2epid):\n                    if epid in rollout_data:\n                        # Select current episode and remove episode/sampler axis\n                        datum = (\n                            res.narrow(dim=episode_dim, start=it, length=1)\n                            .squeeze(axis=episode_dim)\n                            .to(\"cpu\")\n                            .detach()\n                            .numpy()\n                        )  # 1 x ... (no sampler dim)\n                        # get_logger().debug(\"basic collect ep {} h {}\".format(epid, res[..., 0]))\n                        assert datum_id not in rollout_data[epid]\n                        rollout_data[epid][\n                            datum_id\n                        ] = datum.copy()  # copy needed when running on CPU!\n\n        self._update(rollout_data)\n\n    def _collect_vector_task(self, vector_task):\n        it2epid = [\n            self._access(info, self.path_to_id[1:])\n            for info in vector_task.attr(\"task_info\")\n        ]\n        # get_logger().debug(\"basic epids {}\".format(it2epid))\n\n        def limit_spatial_res(data: np.ndarray, max_size=400):\n            if data.shape[0] <= max_size and data.shape[1] <= max_size:\n                return data\n            else:\n                f = float(max_size) / max(data.shape[0], data.shape[1])\n                size = (int(data.shape[1] * f), int(data.shape[0] * f))\n                return cv2.resize(data, size, 0, 0, interpolation=cv2.INTER_AREA)\n\n        vector_task_data = {\n            epid: dict()\n            for epid in it2epid\n            if self.all_episode_ids is None or epid in self.all_episode_ids\n        }\n        if len(vector_task_data) > 0:\n            for (\n                source\n            ) in self.vector_task_sources:  # these are observations for next step!\n                datum_id = self._source_to_str(source, is_vector_task=True)\n                method, kwargs = source\n                res = getattr(vector_task, method)(**kwargs)\n                if not isinstance(res, Sequence):\n                    assert len(it2epid) == 1\n                    res = [res]\n                if method == \"render\":\n                    res = [limit_spatial_res(r, self.max_render_size) for r in res]\n                assert len(res) == len(it2epid)\n                for datum, epid in zip(res, it2epid):\n                    if epid in vector_task_data:\n                        assert datum_id not in vector_task_data[epid]\n                        vector_task_data[epid][datum_id] = datum\n\n        self._append(vector_task_data)\n\n        return it2epid\n\n    # to be called by engine\n    def collect(self, vector_task=None, alive=None, rollout=None, actor_critic=None):\n        if actor_critic is not None:\n            # in phase with last_it2epid\n            try:\n                self._collect_actor_critic(actor_critic)\n            except (AssertionError, RuntimeError):\n                get_logger().debug(\n                    msg=f\"Failed collect (actor_critic) for viz due to exception:\",\n                    exc_info=sys.exc_info(),\n                )\n                get_logger().error(f\"Failed collect (actor_critic) for viz\")\n\n        if alive is not None and rollout is not None:\n            # in phase with last_it2epid that stay alive\n            try:\n                self._collect_rollout(rollout=rollout, alive=alive)\n            except (AssertionError, RuntimeError):\n                get_logger().debug(\n                    msg=f\"Failed collect (rollout) for viz due to exception:\",\n                    exc_info=sys.exc_info(),\n                )\n                get_logger().error(f\"Failed collect (rollout) for viz\")\n\n        # Always call this one last!\n        if vector_task is not None:\n            # in phase with identifiers of current episodes from vector_task\n            try:\n                self.last_it2epid = self._collect_vector_task(vector_task)\n            except (AssertionError, RuntimeError):\n                get_logger().debug(\n                    msg=f\"Failed collect (vector_task) for viz due to exception:\",\n                    exc_info=sys.exc_info(),\n                )\n                get_logger().error(f\"Failed collect (vector_task) for viz\")\n\n    def read_and_reset(self) -> Dict[str, List[Dict[str, Any]]]:\n        res = self.data\n        self.data = {}\n        # get_logger().debug(\"Returning episodes {}\".format(list(res.keys())))\n        return res\n\n    # to be called by logger\n    def log(\n        self,\n        log_writer: SummaryWriter,\n        task_outputs: Optional[List[Any]],\n        render: Optional[Dict[str, List[Dict[str, Any]]]],\n        num_steps: int,\n    ):\n        for v in self.viz:\n            try:\n                v.log(log_writer, task_outputs, render, num_steps)\n            except (AssertionError, RuntimeError):\n                get_logger().debug(\n                    msg=f\"Dropped {v.label} viz due to exception:\",\n                    exc_info=sys.exc_info(),\n                )\n                get_logger().error(f\"Dropped {v.label} viz\")\n\n\nclass TensorboardSummarizer:\n    \"\"\"Assumption: tensorboard tags/labels include a valid/test/train substr indicating the data modality\"\"\"\n\n    def __init__(\n        self,\n        experiment_to_train_events_paths_map: Dict[str, Sequence[str]],\n        experiment_to_test_events_paths_map: Dict[str, Sequence[str]],\n        eval_min_mega_steps: Optional[Sequence[float]] = None,\n        tensorboard_tags_to_labels_map: Optional[Dict[str, str]] = None,\n        tensorboard_output_summary_folder: str = \"tensorboard_plotter_output\",\n    ):\n        if not _TF_AVAILABLE:\n            raise ImportError(\n                \"Please install tensorflow e.g. with `pip install tensorflow` to enable TensorboardSummarizer\"\n            )\n\n        self.experiment_to_train_events_paths_map = experiment_to_train_events_paths_map\n        self.experiment_to_test_events_paths_map = experiment_to_test_events_paths_map\n        train_experiments = set(list(experiment_to_train_events_paths_map.keys()))\n        test_experiments = set(list(experiment_to_test_events_paths_map.keys()))\n        assert (train_experiments - test_experiments) in [\n            set(),\n            train_experiments,\n        ], (\n            f\"`experiment_to_test_events_paths_map` must have identical keys (experiment names) to those\"\n            f\" in `experiment_to_train_events_paths_map`, or be empty.\"\n            f\" Got {train_experiments} train keys and {test_experiments} test keys.\"\n        )\n\n        self.eval_min_mega_steps = eval_min_mega_steps\n        self.tensorboard_tags_to_labels_map = tensorboard_tags_to_labels_map\n        if self.tensorboard_tags_to_labels_map is not None:\n            for tag, label in self.tensorboard_tags_to_labels_map.items():\n                assert (\"valid\" in label) + (\"train\" in label) + (\n                    \"test\" in label\n                ) == 1, (\n                    f\"One (and only one) of {'train', 'valid', 'test'} must be part of the label for\"\n                    f\" tag {tag} ({label} given).\"\n                )\n        self.tensorboard_output_summary_folder = tensorboard_output_summary_folder\n\n        self.train_data = self._read_tensorflow_experiment_events(\n            self.experiment_to_train_events_paths_map\n        )\n        self.test_data = self._read_tensorflow_experiment_events(\n            self.experiment_to_test_events_paths_map\n        )\n\n    def _read_tensorflow_experiment_events(\n        self, experiment_to_events_paths_map, skip_map=False\n    ):\n        def my_summary_iterator(path):\n            try:\n                for r in tf_record.tf_record_iterator(path):\n                    yield event_pb2.Event.FromString(r)\n            except IOError:\n                get_logger().debug(f\"IOError for path {path}\")\n                return None\n\n        collected_data = {}\n        for experiment_name, path_list in experiment_to_events_paths_map.items():\n            experiment_data = defaultdict(list)\n            for filename_path in path_list:\n                for event in my_summary_iterator(filename_path):\n                    if event is None:\n                        break\n                    for value in event.summary.value:\n                        if self.tensorboard_tags_to_labels_map is None or skip_map:\n                            label = value.tag\n                        elif value.tag in self.tensorboard_tags_to_labels_map:\n                            label = self.tensorboard_tags_to_labels_map[value.tag]\n                        else:\n                            continue\n                        experiment_data[label].append(\n                            dict(\n                                score=value.simple_value,\n                                time=event.wall_time,\n                                steps=event.step,\n                            )\n                        )\n            collected_data[experiment_name] = experiment_data\n\n        return collected_data\n\n    def _eval_vs_train_time_steps(self, eval_data, train_data):\n        min_mega_steps = self.eval_min_mega_steps\n        if min_mega_steps is None:\n            min_mega_steps = [(item[\"steps\"] - 1) / 1e6 for item in eval_data]\n\n        scores, times, steps = [], [], []\n\n        i, t, last_i = 0, 0, -1\n        while len(times) < len(min_mega_steps):\n            while eval_data[i][\"steps\"] / min_mega_steps[len(times)] / 1e6 < 1:\n                i += 1\n            while train_data[t][\"steps\"] / min_mega_steps[len(times)] / 1e6 < 1:\n                t += 1\n\n            # step might be missing in valid! (and would duplicate future value at previous steps!)\n            # solution: move forward last entry's time if no change in i (instead of new entry)\n            if i == last_i:\n                times[-1] = train_data[t][\"time\"]\n            else:\n                scores.append(eval_data[i][\"score\"])\n                times.append(train_data[t][\"time\"])\n                steps.append(eval_data[i][\"steps\"])\n\n            last_i = i\n\n        scores.insert(0, train_data[0][\"score\"])\n        times.insert(0, train_data[0][\"time\"])\n        steps.insert(0, 0)\n\n        return scores, times, steps\n\n    def _train_vs_time_steps(self, train_data):\n        last_eval_step = (\n            self.eval_min_mega_steps[-1] * 1e6\n            if self.eval_min_mega_steps is not None\n            else float(\"inf\")\n        )\n\n        scores = [train_data[0][\"score\"]]\n        times = [train_data[0][\"time\"]]\n        steps = [train_data[0][\"steps\"]]\n\n        t = 1\n        while steps[-1] < last_eval_step and t < len(train_data):\n            scores.append(train_data[t][\"score\"])\n            times.append(train_data[t][\"time\"])\n            steps.append(train_data[t][\"steps\"])\n            t += 1\n\n        return scores, times, steps\n\n    def make_tensorboard_summary(self):\n        all_experiments = list(self.experiment_to_train_events_paths_map.keys())\n\n        for experiment_name in all_experiments:\n            summary_writer = SummaryWriter(\n                os.path.join(self.tensorboard_output_summary_folder, experiment_name)\n            )\n\n            test_labels = (\n                sorted(list(self.test_data[experiment_name].keys()))\n                if len(self.test_data) > 0\n                else []\n            )\n            for test_label in test_labels:\n                train_label = test_label.replace(\"valid\", \"test\").replace(\n                    \"test\", \"train\"\n                )\n                if train_label not in self.train_data[experiment_name]:\n                    print(\n                        f\"Missing matching 'train' label {train_label} for eval label {test_label}. Skipping\"\n                    )\n                    continue\n                train_data = self.train_data[experiment_name][train_label]\n                test_data = self.test_data[experiment_name][test_label]\n                scores, times, steps = self._eval_vs_train_time_steps(\n                    test_data, train_data\n                )\n                for score, t, step in zip(scores, times, steps):\n                    summary_writer.add_scalar(\n                        test_label, score, global_step=step, walltime=t\n                    )\n\n            valid_labels = sorted(\n                [\n                    key\n                    for key in list(self.train_data[experiment_name].keys())\n                    if \"valid\" in key\n                ]\n            )\n            for valid_label in valid_labels:\n                train_label = valid_label.replace(\"valid\", \"train\")\n                assert (\n                    train_label in self.train_data[experiment_name]\n                ), f\"Missing matching 'train' label {train_label} for valid label {valid_label}\"\n                train_data = self.train_data[experiment_name][train_label]\n                valid_data = self.train_data[experiment_name][valid_label]\n                scores, times, steps = self._eval_vs_train_time_steps(\n                    valid_data, train_data\n                )\n                for score, t, step in zip(scores, times, steps):\n                    summary_writer.add_scalar(\n                        valid_label, score, global_step=step, walltime=t\n                    )\n\n            train_labels = sorted(\n                [\n                    key\n                    for key in list(self.train_data[experiment_name].keys())\n                    if \"train\" in key\n                ]\n            )\n            for train_label in train_labels:\n                scores, times, steps = self._train_vs_time_steps(\n                    self.train_data[experiment_name][train_label]\n                )\n                for score, t, step in zip(scores, times, steps):\n                    summary_writer.add_scalar(\n                        train_label, score, global_step=step, walltime=t\n                    )\n\n            summary_writer.close()\n"
  },
  {
    "path": "allenact_plugins/__init__.py",
    "content": "try:\n    # noinspection PyProtectedMember,PyUnresolvedReferences\n    from allenact_plugins._version import __version__\nexcept ModuleNotFoundError:\n    __version__ = None\n"
  },
  {
    "path": "allenact_plugins/babyai_plugin/__init__.py",
    "content": "from allenact.utils.system import ImportChecker\n\nwith ImportChecker(\n    \"\\n\\nPlease install babyai with:\\n\\n\"\n    \"pip install -e git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\\n\",\n):\n    # noinspection PyUnresolvedReferences\n    import babyai\n"
  },
  {
    "path": "allenact_plugins/babyai_plugin/babyai_constants.py",
    "content": "import os\nfrom pathlib import Path\n\nBABYAI_EXPERT_TRAJECTORIES_DIR = os.path.abspath(\n    os.path.join(os.path.dirname(Path(__file__)), \"data\", \"demos\")\n)\n"
  },
  {
    "path": "allenact_plugins/babyai_plugin/babyai_models.py",
    "content": "from typing import Dict, Optional, List, cast, Tuple, Any\n\nimport babyai.model\nimport babyai.rl\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom gym.spaces.dict import Dict as SpaceDict\n\nfrom allenact.algorithms.onpolicy_sync.policy import (\n    ActorCriticModel,\n    ObservationType,\n    Memory,\n    DistributionType,\n)\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput\n\n\nclass BabyAIACModelWrapped(babyai.model.ACModel):\n    def __init__(\n        self,\n        obs_space: Dict[str, int],\n        action_space: gym.spaces.Discrete,\n        image_dim=128,\n        memory_dim=128,\n        instr_dim=128,\n        use_instr=False,\n        lang_model=\"gru\",\n        use_memory=False,\n        arch=\"cnn1\",\n        aux_info=None,\n        include_auxiliary_head: bool = False,\n    ):\n        self.use_cnn2 = arch == \"cnn2\"\n        super().__init__(\n            obs_space=obs_space,\n            action_space=action_space,\n            image_dim=image_dim,\n            memory_dim=memory_dim,\n            instr_dim=instr_dim,\n            use_instr=use_instr,\n            lang_model=lang_model,\n            use_memory=use_memory,\n            arch=\"cnn1\" if self.use_cnn2 else arch,\n            aux_info=aux_info,\n        )\n\n        self.semantic_embedding = None\n        if self.use_cnn2:\n            self.semantic_embedding = nn.Embedding(33, embedding_dim=8)\n            self.image_conv = nn.Sequential(\n                nn.Conv2d(in_channels=24, out_channels=16, kernel_size=(2, 2)),\n                *self.image_conv[1:]  # type:ignore\n            )\n            self.image_conv[0].apply(babyai.model.initialize_parameters)\n\n        self.include_auxiliary_head = include_auxiliary_head\n        if self.use_memory and self.lang_model == \"gru\":\n            self.memory_rnn = nn.LSTM(self.image_dim, self.memory_dim)\n\n        if self.include_auxiliary_head:\n            self.aux = nn.Sequential(\n                nn.Linear(self.memory_dim, 64),\n                nn.Tanh(),\n                nn.Linear(64, action_space.n),\n            )\n            self.aux.apply(babyai.model.initialize_parameters)\n\n        self.train()\n\n    def forward_once(self, obs, memory, instr_embedding=None):\n        \"\"\"Copied (with minor modifications) from\n        `babyai.model.ACModel.forward(...)`.\"\"\"\n        if self.use_instr and instr_embedding is None:\n            instr_embedding = self._get_instr_embedding(obs.instr)\n        if self.use_instr and self.lang_model == \"attgru\":\n            # outputs: B x L x D\n            # memory: B x M\n            mask = (obs.instr != 0).float()\n            # The mask tensor has the same length as obs.instr, and\n            # thus can be both shorter and longer than instr_embedding.\n            # It can be longer if instr_embedding is computed\n            # for a subbatch of obs.instr.\n            # It can be shorter if obs.instr is a subbatch of\n            # the batch that instr_embeddings was computed for.\n            # Here, we make sure that mask and instr_embeddings\n            # have equal length along dimension 1.\n            mask = mask[:, : instr_embedding.shape[1]]\n            instr_embedding = instr_embedding[:, : mask.shape[1]]\n\n            keys = self.memory2key(memory)\n            pre_softmax = (keys[:, None, :] * instr_embedding).sum(2) + 1000 * mask\n            attention = F.softmax(pre_softmax, dim=1)\n            instr_embedding = (instr_embedding * attention[:, :, None]).sum(1)\n\n        x = torch.transpose(torch.transpose(obs.image, 1, 3), 2, 3)\n\n        if self.arch.startswith(\"expert_filmcnn\"):\n            x = self.image_conv(x)\n            for controler in self.controllers:\n                x = controler(x, instr_embedding)\n            x = F.relu(self.film_pool(x))\n        else:\n            x = self.image_conv(x.contiguous())\n\n        x = x.reshape(x.shape[0], -1)\n\n        if self.use_memory:\n            hidden = (\n                memory[:, : self.semi_memory_size],\n                memory[:, self.semi_memory_size :],\n            )\n            hidden = self.memory_rnn(x, hidden)\n            embedding = hidden[0]\n            memory = torch.cat(hidden, dim=1)  # type: ignore\n        else:\n            embedding = x\n\n        if self.use_instr and not \"filmcnn\" in self.arch:\n            embedding = torch.cat((embedding, instr_embedding), dim=1)\n\n        if hasattr(self, \"aux_info\") and self.aux_info:\n            extra_predictions = {\n                info: self.extra_heads[info](embedding) for info in self.extra_heads\n            }\n        else:\n            extra_predictions = dict()\n\n        return {\n            \"embedding\": embedding,\n            \"memory\": memory,\n            \"extra_predictions\": extra_predictions,\n        }\n\n    def forward_loop(\n        self,\n        observations: ObservationType,\n        recurrent_hidden_states: torch.FloatTensor,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ):\n        results = []\n        images = cast(torch.FloatTensor, observations[\"minigrid_ego_image\"]).float()\n        instrs: Optional[torch.Tensor] = None\n        if \"minigrid_mission\" in observations:\n            instrs = cast(torch.Tensor, observations[\"minigrid_mission\"])\n\n        _, nsamplers, _ = recurrent_hidden_states.shape\n        rollouts_len = images.shape[0] // nsamplers\n        obs = babyai.rl.DictList()\n\n        images = images.view(rollouts_len, nsamplers, *images.shape[1:])\n        masks = masks.view(rollouts_len, nsamplers, *masks.shape[1:])  # type:ignore\n\n        # needs_reset = (masks != 1.0).view(nrollouts, -1).any(-1)\n        if instrs is not None:\n            instrs = instrs.view(rollouts_len, nsamplers, instrs.shape[-1])\n\n        needs_instr_reset_mask = masks != 1.0\n        needs_instr_reset_mask[0] = 1\n        needs_instr_reset_mask = needs_instr_reset_mask.squeeze(-1)\n        instr_embeddings: Optional[torch.Tensor] = None\n        if self.use_instr:\n            instr_reset_multi_inds = list(\n                (int(a), int(b))\n                for a, b in zip(*np.where(needs_instr_reset_mask.cpu().numpy()))\n            )\n            time_ind_to_which_need_instr_reset: List[List] = [\n                [] for _ in range(rollouts_len)\n            ]\n            reset_multi_ind_to_index = {\n                mi: i for i, mi in enumerate(instr_reset_multi_inds)\n            }\n            for a, b in instr_reset_multi_inds:\n                time_ind_to_which_need_instr_reset[a].append(b)\n\n            unique_instr_embeddings = self._get_instr_embedding(\n                instrs[needs_instr_reset_mask]\n            )\n\n            instr_embeddings_list = [unique_instr_embeddings[:nsamplers]]\n            current_instr_embeddings_list = list(instr_embeddings_list[-1])\n\n            for time_ind in range(1, rollouts_len):\n                if len(time_ind_to_which_need_instr_reset[time_ind]) == 0:\n                    instr_embeddings_list.append(instr_embeddings_list[-1])\n                else:\n                    for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[\n                        time_ind\n                    ]:\n                        current_instr_embeddings_list[sampler_needing_reset_ind] = (\n                            unique_instr_embeddings[\n                                reset_multi_ind_to_index[\n                                    (time_ind, sampler_needing_reset_ind)\n                                ]\n                            ]\n                        )\n\n                    instr_embeddings_list.append(\n                        torch.stack(current_instr_embeddings_list, dim=0)\n                    )\n\n            instr_embeddings = torch.stack(instr_embeddings_list, dim=0)\n\n        assert recurrent_hidden_states.shape[0] == 1\n        memory = recurrent_hidden_states[0]\n        # instr_embedding: Optional[torch.Tensor] = None\n        for i in range(rollouts_len):\n            obs.image = images[i]\n            if \"minigrid_mission\" in observations:\n                obs.instr = instrs[i]\n\n            # reset = needs_reset[i].item()\n            # if self.baby_ai_model.use_instr and (reset or i == 0):\n            #     instr_embedding = self.baby_ai_model._get_instr_embedding(obs.instr)\n\n            results.append(\n                self.forward_once(\n                    obs, memory=memory * masks[i], instr_embedding=instr_embeddings[i]\n                )\n            )\n            memory = results[-1][\"memory\"]\n\n        embedding = torch.cat([r[\"embedding\"] for r in results], dim=0)\n\n        extra_predictions_list = [r[\"extra_predictions\"] for r in results]\n        extra_predictions = {\n            key: torch.cat([ep[key] for ep in extra_predictions_list], dim=0)\n            for key in extra_predictions_list[0]\n        }\n        return (\n            ActorCriticOutput(\n                distributions=CategoricalDistr(\n                    logits=self.actor(embedding),\n                ),\n                values=self.critic(embedding),\n                extras=(\n                    extra_predictions\n                    if not self.include_auxiliary_head\n                    else {\n                        **extra_predictions,\n                        \"auxiliary_distributions\": cast(\n                            Any, CategoricalDistr(logits=self.aux(embedding))\n                        ),\n                    }\n                ),\n            ),\n            torch.stack([r[\"memory\"] for r in results], dim=0),\n        )\n\n    # noinspection PyMethodOverriding\n    def forward(\n        self,\n        observations: ObservationType,\n        recurrent_hidden_states: torch.FloatTensor,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ):\n        (\n            observations,\n            recurrent_hidden_states,\n            prev_actions,\n            masks,\n            num_steps,\n            num_samplers,\n            num_agents,\n            num_layers,\n        ) = self.adapt_inputs(\n            observations, recurrent_hidden_states, prev_actions, masks\n        )\n\n        if self.lang_model != \"gru\":\n            ac_output, hidden_states = self.forward_loop(\n                observations=observations,\n                recurrent_hidden_states=recurrent_hidden_states,\n                prev_actions=prev_actions,\n                masks=masks,  # type: ignore\n            )\n\n            return self.adapt_result(\n                ac_output,\n                hidden_states[-1:],\n                num_steps,\n                num_samplers,\n                num_agents,\n                num_layers,\n                observations,\n            )\n\n        assert recurrent_hidden_states.shape[0] == 1\n\n        images = cast(torch.FloatTensor, observations[\"minigrid_ego_image\"])\n        if self.use_cnn2:\n            images_shape = images.shape\n            # noinspection PyArgumentList\n            images = images + torch.LongTensor([0, 11, 22]).view(  # type:ignore\n                1, 1, 1, 3\n            ).to(images.device)\n            images = self.semantic_embedding(images).view(  # type:ignore\n                *images_shape[:3], 24\n            )\n        images = images.permute(0, 3, 1, 2).float()  # type:ignore\n\n        _, nsamplers, _ = recurrent_hidden_states.shape\n        rollouts_len = images.shape[0] // nsamplers\n\n        masks = cast(\n            torch.FloatTensor, masks.view(rollouts_len, nsamplers, *masks.shape[1:])\n        )\n        instrs: Optional[torch.Tensor] = None\n        if \"minigrid_mission\" in observations and self.use_instr:\n            instrs = cast(torch.FloatTensor, observations[\"minigrid_mission\"])\n            instrs = instrs.view(rollouts_len, nsamplers, instrs.shape[-1])\n\n        needs_instr_reset_mask = masks != 1.0\n        needs_instr_reset_mask[0] = 1\n        needs_instr_reset_mask = needs_instr_reset_mask.squeeze(-1)\n        blocking_inds: List[int] = np.where(\n            needs_instr_reset_mask.view(rollouts_len, -1).any(-1).cpu().numpy()\n        )[0].tolist()\n        blocking_inds.append(rollouts_len)\n\n        instr_embeddings: Optional[torch.Tensor] = None\n        if self.use_instr:\n            instr_reset_multi_inds = list(\n                (int(a), int(b))\n                for a, b in zip(*np.where(needs_instr_reset_mask.cpu().numpy()))\n            )\n            time_ind_to_which_need_instr_reset: List[List] = [\n                [] for _ in range(rollouts_len)\n            ]\n            reset_multi_ind_to_index = {\n                mi: i for i, mi in enumerate(instr_reset_multi_inds)\n            }\n            for a, b in instr_reset_multi_inds:\n                time_ind_to_which_need_instr_reset[a].append(b)\n\n            unique_instr_embeddings = self._get_instr_embedding(\n                instrs[needs_instr_reset_mask]\n            )\n\n            instr_embeddings_list = [unique_instr_embeddings[:nsamplers]]\n            current_instr_embeddings_list = list(instr_embeddings_list[-1])\n\n            for time_ind in range(1, rollouts_len):\n                if len(time_ind_to_which_need_instr_reset[time_ind]) == 0:\n                    instr_embeddings_list.append(instr_embeddings_list[-1])\n                else:\n                    for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[\n                        time_ind\n                    ]:\n                        current_instr_embeddings_list[sampler_needing_reset_ind] = (\n                            unique_instr_embeddings[\n                                reset_multi_ind_to_index[\n                                    (time_ind, sampler_needing_reset_ind)\n                                ]\n                            ]\n                        )\n\n                    instr_embeddings_list.append(\n                        torch.stack(current_instr_embeddings_list, dim=0)\n                    )\n\n            instr_embeddings = torch.stack(instr_embeddings_list, dim=0)\n\n        # The following code can be used to compute the instr_embeddings in another way\n        # and thus verify that the above logic is (more likely to be) correct\n        # needs_instr_reset_mask = (masks != 1.0)\n        # needs_instr_reset_mask[0] *= 0\n        # needs_instr_reset_inds = needs_instr_reset_mask.view(nrollouts, -1).any(-1).cpu().numpy()\n        #\n        # # Get inds where a new task has started\n        # blocking_inds: List[int] = np.where(needs_instr_reset_inds)[0].tolist()\n        # blocking_inds.append(needs_instr_reset_inds.shape[0])\n        # if nrollouts != 1:\n        #     pdb.set_trace()\n        # if blocking_inds[0] != 0:\n        #     blocking_inds.insert(0, 0)\n        # if self.use_instr:\n        #     instr_embeddings_list = []\n        #     for ind0, ind1 in zip(blocking_inds[:-1], blocking_inds[1:]):\n        #         instr_embeddings_list.append(\n        #             self._get_instr_embedding(instrs[ind0])\n        #             .unsqueeze(0)\n        #             .repeat(ind1 - ind0, 1, 1)\n        #         )\n        #     tmp_instr_embeddings = torch.cat(instr_embeddings_list, dim=0)\n        # assert (instr_embeddings - tmp_instr_embeddings).abs().max().item() < 1e-6\n\n        # Embed images\n        # images = images.view(nrollouts, nsamplers, *images.shape[1:])\n        image_embeddings = self.image_conv(images)\n        if self.arch.startswith(\"expert_filmcnn\"):\n            instr_embeddings_flatter = instr_embeddings.view(\n                -1, *instr_embeddings.shape[2:]\n            )\n            for controller in self.controllers:\n                image_embeddings = controller(\n                    image_embeddings, instr_embeddings_flatter\n                )\n            image_embeddings = F.relu(self.film_pool(image_embeddings))\n\n        image_embeddings = image_embeddings.view(rollouts_len, nsamplers, -1)\n\n        if self.use_instr and self.lang_model == \"attgru\":\n            raise NotImplementedError(\"Currently attgru is not implemented.\")\n\n        memory = None\n        if self.use_memory:\n            assert recurrent_hidden_states.shape[0] == 1\n            hidden = (\n                recurrent_hidden_states[:, :, : self.semi_memory_size],\n                recurrent_hidden_states[:, :, self.semi_memory_size :],\n            )\n            embeddings_list = []\n            for ind0, ind1 in zip(blocking_inds[:-1], blocking_inds[1:]):\n                hidden = (hidden[0] * masks[ind0], hidden[1] * masks[ind0])\n                rnn_out, hidden = self.memory_rnn(image_embeddings[ind0:ind1], hidden)\n                embeddings_list.append(rnn_out)\n\n            # embedding = hidden[0]\n            embedding = torch.cat(embeddings_list, dim=0)\n            memory = torch.cat(hidden, dim=-1)\n        else:\n            embedding = image_embeddings\n\n        if self.use_instr and not \"filmcnn\" in self.arch:\n            embedding = torch.cat((embedding, instr_embeddings), dim=-1)\n\n        if hasattr(self, \"aux_info\") and self.aux_info:\n            extra_predictions = {\n                info: self.extra_heads[info](embedding) for info in self.extra_heads\n            }\n        else:\n            extra_predictions = dict()\n\n        embedding = embedding.view(rollouts_len * nsamplers, -1)\n\n        ac_output = ActorCriticOutput(\n            distributions=CategoricalDistr(\n                logits=self.actor(embedding),\n            ),\n            values=self.critic(embedding),\n            extras=(\n                extra_predictions\n                if not self.include_auxiliary_head\n                else {\n                    **extra_predictions,\n                    \"auxiliary_distributions\": CategoricalDistr(\n                        logits=self.aux(embedding)\n                    ),\n                }\n            ),\n        )\n        hidden_states = memory\n\n        return self.adapt_result(\n            ac_output,\n            hidden_states,\n            num_steps,\n            num_samplers,\n            num_agents,\n            num_layers,\n            observations,\n        )\n\n    @staticmethod\n    def adapt_inputs(  # type: ignore\n        observations: ObservationType,\n        recurrent_hidden_states: torch.FloatTensor,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ):\n        # INPUTS\n        # observations are of shape [num_steps, num_samplers, ...]\n        # recurrent_hidden_states are of shape [num_layers, num_samplers, (num_agents,) num_dims]\n        # prev_actions are of shape [num_steps, num_samplers, ...]\n        # masks are of shape [num_steps, num_samplers, 1]\n        # num_agents is assumed to be 1\n\n        num_steps, num_samplers = masks.shape[:2]\n        num_layers = recurrent_hidden_states.shape[0]\n        num_agents = 1\n\n        # Flatten all observation batch dims\n        def recursively_adapt_observations(obs):\n            for entry in obs:\n                if isinstance(obs[entry], Dict):\n                    recursively_adapt_observations(obs[entry])\n                else:\n                    assert isinstance(obs[entry], torch.Tensor)\n                    if entry in [\"minigrid_ego_image\", \"minigrid_mission\"]:\n                        final_dims = obs[entry].shape[2:]\n                        obs[entry] = obs[entry].view(\n                            num_steps * num_samplers, *final_dims\n                        )\n\n        # Old-style inputs need to be\n        # observations [num_steps * num_samplers, ...]\n        # recurrent_hidden_states [num_layers, num_samplers (* num_agents), num_dims]\n        # prev_actions [num_steps * num_samplers, -1]\n        # masks [num_steps * num_samplers, 1]\n\n        recursively_adapt_observations(observations)\n        recurrent_hidden_states = cast(\n            torch.FloatTensor,\n            recurrent_hidden_states.view(num_layers, num_samplers * num_agents, -1),\n        )\n        if prev_actions is not None:\n            prev_actions = prev_actions.view(  # type:ignore\n                num_steps * num_samplers, -1\n            )\n        masks = masks.view(num_steps * num_samplers, 1)  # type:ignore\n\n        return (\n            observations,\n            recurrent_hidden_states,\n            prev_actions,\n            masks,\n            num_steps,\n            num_samplers,\n            num_agents,\n            num_layers,\n        )\n\n    @staticmethod\n    def adapt_result(ac_output, hidden_states, num_steps, num_samplers, num_agents, num_layers, observations):  # type: ignore\n        distributions = CategoricalDistr(\n            logits=ac_output.distributions.logits.view(num_steps, num_samplers, -1),\n        )\n        values = ac_output.values.view(num_steps, num_samplers, num_agents)\n        extras = ac_output.extras  # ignore shape\n        # TODO confirm the shape of the auxiliary distribution is the same as the actor's\n        if \"auxiliary_distributions\" in extras:\n            extras[\"auxiliary_distributions\"] = CategoricalDistr(\n                logits=extras[\"auxiliary_distributions\"].logits.view(\n                    num_steps, num_samplers, -1  # assume single-agent\n                ),\n            )\n\n        hidden_states = hidden_states.view(num_layers, num_samplers * num_agents, -1)\n\n        # Unflatten all observation batch dims\n        def recursively_adapt_observations(obs):\n            for entry in obs:\n                if isinstance(obs[entry], Dict):\n                    recursively_adapt_observations(obs[entry])\n                else:\n                    assert isinstance(obs[entry], torch.Tensor)\n                    if entry in [\"minigrid_ego_image\", \"minigrid_mission\"]:\n                        final_dims = obs[entry].shape[\n                            1:\n                        ]  # assumes no agents dim in observations!\n                        obs[entry] = obs[entry].view(\n                            num_steps, num_samplers * num_agents, *final_dims\n                        )\n\n        recursively_adapt_observations(observations)\n\n        return (\n            ActorCriticOutput(\n                distributions=distributions, values=values, extras=extras\n            ),\n            hidden_states,\n        )\n\n\nclass BabyAIRecurrentACModel(ActorCriticModel[CategoricalDistr]):\n    def __init__(\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        image_dim=128,\n        memory_dim=128,\n        instr_dim=128,\n        use_instr=False,\n        lang_model=\"gru\",\n        use_memory=False,\n        arch=\"cnn1\",\n        aux_info=None,\n        include_auxiliary_head: bool = False,\n    ):\n        super().__init__(action_space=action_space, observation_space=observation_space)\n\n        assert \"minigrid_ego_image\" in observation_space.spaces\n        assert not use_instr or \"minigrid_mission\" in observation_space.spaces\n\n        self.memory_dim = memory_dim\n        self.include_auxiliary_head = include_auxiliary_head\n\n        self.baby_ai_model = BabyAIACModelWrapped(\n            obs_space={\n                \"image\": 7 * 7 * 3,\n                \"instr\": 100,\n            },\n            action_space=action_space,\n            image_dim=image_dim,\n            memory_dim=memory_dim,\n            instr_dim=instr_dim,\n            use_instr=use_instr,\n            lang_model=lang_model,\n            use_memory=use_memory,\n            arch=arch,\n            aux_info=aux_info,\n            include_auxiliary_head=self.include_auxiliary_head,\n        )\n        self.memory_key = \"rnn\"\n\n    @property\n    def recurrent_hidden_state_size(self) -> int:\n        return 2 * self.memory_dim\n\n    @property\n    def num_recurrent_layers(self):\n        return 1\n\n    def _recurrent_memory_specification(self):\n        return {\n            self.memory_key: (\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n        }\n\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        out, recurrent_hidden_states = self.baby_ai_model.forward(\n            observations=observations,\n            recurrent_hidden_states=cast(\n                torch.FloatTensor, memory.tensor(self.memory_key)\n            ),\n            prev_actions=prev_actions,\n            masks=masks,\n        )\n        return out, memory.set_tensor(self.memory_key, recurrent_hidden_states)\n"
  },
  {
    "path": "allenact_plugins/babyai_plugin/babyai_tasks.py",
    "content": "import random\nimport signal\nfrom typing import Tuple, Any, List, Dict, Optional, Union, Callable\n\nimport babyai\nimport babyai.bot\nimport gym\nimport numpy as np\nfrom gym.utils import seeding\nfrom gym_minigrid.minigrid import MiniGridEnv\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import Sensor, SensorSuite\nfrom allenact.base_abstractions.task import Task, TaskSampler\nfrom allenact.utils.system import get_logger\n\n\nclass BabyAITask(Task[MiniGridEnv]):\n    def __init__(\n        self,\n        env: MiniGridEnv,\n        sensors: Union[SensorSuite, List[Sensor]],\n        task_info: Dict[str, Any],\n        expert_view_size: int = 7,\n        expert_can_see_through_walls: bool = False,\n        **kwargs,\n    ):\n        super().__init__(\n            env=env,\n            sensors=sensors,\n            task_info=task_info,\n            max_steps=env.max_steps,\n            **kwargs,\n        )\n        self._was_successful: bool = False\n        self.bot: Optional[babyai.bot.Bot] = None\n        self._bot_died = False\n        self.expert_view_size = expert_view_size\n        self.expert_can_see_through_walls = expert_can_see_through_walls\n        self._last_action: Optional[int] = None\n\n        env.max_steps = env.max_steps + 1\n\n    @property\n    def action_space(self) -> gym.spaces.Discrete:\n        return self.env.action_space\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        return self.env.render(mode=mode)\n\n    def _step(self, action: int) -> RLStepResult:\n        assert isinstance(action, int)\n\n        minigrid_obs, reward, done, info = self.env.step(action=action)\n        self._last_action = action\n\n        self._was_successful = done and reward > 0\n\n        return RLStepResult(\n            observation=self.get_observations(minigrid_output_obs=minigrid_obs),\n            reward=reward,\n            done=self.is_done(),\n            info=info,\n        )\n\n    def get_observations(\n        self, *args, minigrid_output_obs: Optional[Dict[str, Any]] = None, **kwargs\n    ) -> Any:\n        return self.sensor_suite.get_observations(\n            env=self.env, task=self, minigrid_output_obs=minigrid_output_obs\n        )\n\n    def reached_terminal_state(self) -> bool:\n        return self._was_successful\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return tuple(\n            x\n            for x, _ in sorted(\n                [(str(a), a.value) for a in MiniGridEnv.Actions], key=lambda x: x[1]\n            )\n        )\n\n    def close(self) -> None:\n        pass\n\n    def _expert_timeout_hander(self, signum, frame):\n        raise TimeoutError\n\n    def query_expert(self, **kwargs) -> Tuple[Any, bool]:\n        see_through_walls = self.env.see_through_walls\n        agent_view_size = self.env.agent_view_size\n\n        if self._bot_died:\n            return 0, False\n\n        try:\n            self.env.agent_view_size = self.expert_view_size\n            self.env.expert_can_see_through_walls = self.expert_can_see_through_walls\n\n            if self.bot is None:\n                self.bot = babyai.bot.Bot(self.env)\n\n            signal.signal(signal.SIGALRM, self._expert_timeout_hander)\n            signal.alarm(kwargs.get(\"timeout\", 4 if self.num_steps_taken() == 0 else 2))\n            return self.bot.replan(self._last_action), True\n        except TimeoutError as _:\n            self._bot_died = True\n            return 0, False\n        finally:\n            signal.alarm(0)\n            self.env.see_through_walls = see_through_walls\n            self.env.agent_view_size = agent_view_size\n\n    def metrics(self) -> Dict[str, Any]:\n        metrics = {\n            **super(BabyAITask, self).metrics(),\n            \"success\": 1.0 * (self.reached_terminal_state()),\n        }\n        return metrics\n\n\nclass BabyAITaskSampler(TaskSampler):\n    def __init__(\n        self,\n        env_builder: Union[str, Callable[..., MiniGridEnv]],\n        sensors: Union[SensorSuite, List[Sensor]],\n        max_tasks: Optional[int] = None,\n        num_unique_seeds: Optional[int] = None,\n        task_seeds_list: Optional[List[int]] = None,\n        deterministic_sampling: bool = False,\n        extra_task_kwargs: Optional[Dict] = None,\n        **kwargs,\n    ):\n        super(BabyAITaskSampler, self).__init__()\n        self.sensors = (\n            SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors\n        )\n        self.max_tasks = max_tasks\n        self.num_unique_seeds = num_unique_seeds\n        self.deterministic_sampling = deterministic_sampling\n        self.extra_task_kwargs = (\n            extra_task_kwargs if extra_task_kwargs is not None else {}\n        )\n\n        self._last_env_seed: Optional[int] = None\n        self._last_task: Optional[BabyAITask] = None\n\n        assert (self.num_unique_seeds is None) or (\n            0 < self.num_unique_seeds\n        ), \"`num_unique_seeds` must be a positive integer.\"\n\n        self.num_unique_seeds = num_unique_seeds\n        self.task_seeds_list = task_seeds_list\n        if self.task_seeds_list is not None:\n            if self.num_unique_seeds is not None:\n                assert self.num_unique_seeds == len(\n                    self.task_seeds_list\n                ), \"`num_unique_seeds` must equal the length of `task_seeds_list` if both specified.\"\n            self.num_unique_seeds = len(self.task_seeds_list)\n        elif self.num_unique_seeds is not None:\n            self.task_seeds_list = list(range(self.num_unique_seeds))\n\n        if (not deterministic_sampling) and self.max_tasks:\n            get_logger().warning(\n                \"`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,\"\n                \" this might be a mistake when running testing.\"\n            )\n\n        if isinstance(env_builder, str):\n            self.env = gym.make(env_builder)\n        else:\n            self.env = env_builder()\n\n        self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1))\n        self.num_tasks_generated = 0\n\n    @property\n    def length(self) -> Union[int, float]:\n        return (\n            float(\"inf\")\n            if self.max_tasks is None\n            else self.max_tasks - self.num_tasks_generated\n        )\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        return None if self.num_unique_seeds is None else self.num_unique_seeds\n\n    @property\n    def last_sampled_task(self) -> Optional[Task]:\n        raise NotImplementedError\n\n    def next_task(self, force_advance_scene: bool = False) -> Optional[BabyAITask]:\n        if self.length <= 0:\n            return None\n\n        if self.num_unique_seeds is not None:\n            if self.deterministic_sampling:\n                self._last_env_seed = self.task_seeds_list[\n                    self.num_tasks_generated % len(self.task_seeds_list)\n                ]\n            else:\n                self._last_env_seed = self.np_seeded_random_gen.choice(\n                    self.task_seeds_list\n                )\n        else:\n            self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)\n\n        self.env.seed(self._last_env_seed)\n        self.env.saved_seed = self._last_env_seed\n        self.env.reset()\n\n        self.num_tasks_generated += 1\n        self._last_task = BabyAITask(env=self.env, sensors=self.sensors, task_info={})\n        return self._last_task\n\n    def close(self) -> None:\n        self.env.close()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        return True\n\n    def reset(self) -> None:\n        self.num_tasks_generated = 0\n        self.env.reset()\n\n    def set_seed(self, seed: int) -> None:\n        self.np_seeded_random_gen, _ = seeding.np_random(seed)\n"
  },
  {
    "path": "allenact_plugins/babyai_plugin/configs/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/babyai_plugin/data/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/babyai_plugin/extra_environment.yml",
    "content": "dependencies:\n  - networkx\n  - pip\n  - pip:\n      - \"--editable=git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\"\n"
  },
  {
    "path": "allenact_plugins/babyai_plugin/extra_requirements.txt",
    "content": "babyai @ git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd\nnetworkx"
  },
  {
    "path": "allenact_plugins/babyai_plugin/scripts/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py",
    "content": "import argparse\nimport os\nimport platform\n\nfrom allenact_plugins.babyai_plugin.babyai_constants import (\n    BABYAI_EXPERT_TRAJECTORIES_DIR,\n)\n\nLEVEL_TO_TRAIN_VALID_IDS = {\n    \"BossLevel\": (\n        \"1DkVVpIEVtpyo1LxOXQL_bVyjFCTO3cHD\",\n        \"1ccEFA_n5RT4SWD0Wa_qO65z2HACJBace\",\n    ),\n    \"GoToObjMaze\": (\n        \"1P1CuMbGDJtZit1f-8hmd-HwweXZMj77T\",\n        \"1MVlVsIpJUZ0vjrYGXY6Ku4m4vBxtWjRZ\",\n    ),\n    \"GoTo\": (\"1ABR1q-TClgjSlbhVdVJjzOBpTmTtlTN1\", \"13DlEx5woi31MIs_dzyLxfi7dPe1g59l2\"),\n    \"GoToLocal\": (\n        \"1U8YWdd3viN2lxOP5BByNUZRPVDKVvDAN\",\n        \"1Esy-J0t8eJUg6_RT8F4kkegHYDWwqmSl\",\n    ),\n}\n\n\ndef get_args():\n    \"\"\"Creates the argument parser and parses input arguments.\"\"\"\n\n    # noinspection PyTypeChecker\n    parser = argparse.ArgumentParser(\n        description=\"download_babyai_expert_demos\",\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n    )\n\n    parser.add_argument(\n        \"dataset\",\n        nargs=\"?\",\n        default=\"all\",\n        help=\"dataset name (one of {}, or all)\".format(\n            \", \".join(LEVEL_TO_TRAIN_VALID_IDS.keys())\n        ),\n    )\n\n    return parser.parse_args()\n\n\nif __name__ == \"__main__\":\n    args = get_args()\n\n    if platform.system() == \"Linux\":\n        download_template = \"\"\"wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\\\1\\\\n/p')&id={}\" -O {}\"\"\"\n    elif platform.system() == \"Darwin\":\n        download_template = \"\"\"wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={}' -O- | gsed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\\\1\\\\n/p')&id={}\" -O {}\"\"\"\n    else:\n        raise NotImplementedError(\"{} is not supported\".format(platform.system()))\n\n    try:\n        os.makedirs(BABYAI_EXPERT_TRAJECTORIES_DIR, exist_ok=True)\n\n        if args.dataset == \"all\":\n            id_items = LEVEL_TO_TRAIN_VALID_IDS\n        else:\n            assert (\n                args.dataset in LEVEL_TO_TRAIN_VALID_IDS\n            ), \"Only {} are valid datasets\".format(\n                \", \".join(LEVEL_TO_TRAIN_VALID_IDS.keys())\n            )\n            id_items = {args.dataset: LEVEL_TO_TRAIN_VALID_IDS[args.dataset]}\n\n        for level_name, (train_id, valid_id) in id_items.items():\n            train_path = os.path.join(\n                BABYAI_EXPERT_TRAJECTORIES_DIR, \"BabyAI-{}-v0.pkl\".format(level_name)\n            )\n            if os.path.exists(train_path):\n                print(\"{} already exists, skipping...\".format(train_path))\n            else:\n                os.system(download_template.format(train_id, train_id, train_path))\n                print(\"Demos saved to {}.\".format(train_path))\n\n            valid_path = os.path.join(\n                BABYAI_EXPERT_TRAJECTORIES_DIR,\n                \"BabyAI-{}-v0_valid.pkl\".format(level_name),\n            )\n            if os.path.exists(valid_path):\n                print(\"{} already exists, skipping...\".format(valid_path))\n            else:\n                os.system(download_template.format(valid_id, valid_id, valid_path))\n                print(\"Demos saved to {}.\".format(valid_path))\n    except Exception as _:\n        raise Exception(\n            \"Failed to download babyai demos. Make sure you have the appropriate command line\"\n            \" tools installed for your platform. For MacOS you'll need to install `gsed` and `gwget (the gnu version\"\n            \" of sed) using homebrew or some other method.\"\n        )\n"
  },
  {
    "path": "allenact_plugins/babyai_plugin/scripts/get_instr_length_percentiles.py",
    "content": "import glob\nimport os\n\nimport babyai\nimport numpy as np\n\nfrom allenact_plugins.babyai_plugin.babyai_constants import (\n    BABYAI_EXPERT_TRAJECTORIES_DIR,\n)\n\n# Boss level\n# [(50, 11.0), (90, 22.0), (99, 32.0), (99.9, 38.0), (99.99, 43.0)]\n\nif __name__ == \"__main__\":\n    # level = \"BossLevel\"\n    level = \"GoToLocal\"\n    files = glob.glob(\n        os.path.join(BABYAI_EXPERT_TRAJECTORIES_DIR, \"*{}-v0.pkl\".format(level))\n    )\n    assert len(files) == 1\n\n    demos = babyai.utils.load_demos(files[0])\n\n    percentiles = [50, 90, 99, 99.9, 99.99, 100]\n    print(\n        list(\n            zip(\n                percentiles,\n                np.percentile([len(d[0].split(\" \")) for d in demos], percentiles),\n            )\n        )\n    )\n"
  },
  {
    "path": "allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py",
    "content": "import glob\nimport os\n\nimport babyai\n\nfrom allenact_plugins.babyai_plugin.babyai_constants import (\n    BABYAI_EXPERT_TRAJECTORIES_DIR,\n)\n\n\ndef make_small_demos(dir: str):\n    for file_path in glob.glob(os.path.join(dir, \"*.pkl\")):\n        if \"valid\" not in file_path and \"small\" not in file_path:\n            new_file_path = file_path.replace(\".pkl\", \"-small.pkl\")\n            if os.path.exists(new_file_path):\n                continue\n            print(\n                \"Saving small version of {} to {}...\".format(\n                    os.path.basename(file_path), new_file_path\n                )\n            )\n            babyai.utils.save_demos(\n                babyai.utils.load_demos(file_path)[:1000], new_file_path\n            )\n            print(\"Done.\")\n\n\nif __name__ == \"__main__\":\n    make_small_demos(BABYAI_EXPERT_TRAJECTORIES_DIR)\n"
  },
  {
    "path": "allenact_plugins/clip_plugin/__init__.py",
    "content": "from allenact.utils.system import ImportChecker\n\nwith ImportChecker(\n    \"Cannot `import clip`. Please install clip from the openai/CLIP git repository:\"\n    \"\\n`pip install git+https://github.com/openai/CLIP.git@b46f5ac7587d2e1862f8b7b1573179d80dcdd620`\"\n):\n    # noinspection PyUnresolvedReferences\n    import clip\n"
  },
  {
    "path": "allenact_plugins/clip_plugin/clip_preprocessors.py",
    "content": "from typing import List, Optional, Any, cast, Dict, Tuple\n\nimport clip\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom clip.model import CLIP\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.misc_utils import prepare_locals_for_super\n\n\nclass ClipResNetEmbedder(nn.Module):\n    def __init__(self, resnet: CLIP, pool=True, pooling_type=\"avg\"):\n        super().__init__()\n        self.model = resnet\n        self.pool = pool\n        self.pooling_type = pooling_type\n\n        if not pool:\n            self.model.visual.attnpool = nn.Identity()\n        elif self.pooling_type == \"attn\":\n            pass\n        elif self.pooling_type == \"avg\":\n            self.model.visual.attnpool = nn.Sequential(\n                nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(start_dim=-3, end_dim=-1)\n            )\n        else:\n            raise NotImplementedError(\"`pooling_type` must be 'avg' or 'attn'.\")\n\n        self.eval()\n\n    def forward(self, x):\n        with torch.no_grad():\n            return self.model.visual(x)\n\n\nclass ClipResNetPreprocessor(Preprocessor):\n    \"\"\"Preprocess RGB or depth image using a ResNet model with CLIP model\n    weights.\"\"\"\n\n    CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073)\n    CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711)\n\n    def __init__(\n        self,\n        rgb_input_uuid: str,\n        clip_model_type: str,\n        pool: bool,\n        device: Optional[torch.device] = None,\n        device_ids: Optional[List[torch.device]] = None,\n        input_img_height_width: Tuple[int, int] = (224, 224),\n        chunk_size: Optional[int] = None,\n        **kwargs: Any,\n    ):\n        assert clip_model_type in clip.available_models()\n        assert pool == False or input_img_height_width == (224, 224)\n        assert all(iis % 32 == 0 for iis in input_img_height_width)\n\n        output_height_width = tuple(iis // 32 for iis in input_img_height_width)\n        if clip_model_type == \"RN50\":\n            output_shape = (2048,) + output_height_width\n        elif clip_model_type == \"RN50x16\":\n            output_shape = (3072,) + output_height_width\n        else:\n            raise NotImplementedError(\n                f\"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'\"\n            )\n\n        if pool:\n            output_shape = output_shape[:1]\n\n        self.clip_model_type = clip_model_type\n\n        self.pool = pool\n\n        self.device = torch.device(\"cpu\") if device is None else device\n        self.device_ids = device_ids or cast(\n            List[torch.device], list(range(torch.cuda.device_count()))\n        )\n        self._resnet: Optional[ClipResNetEmbedder] = None\n\n        self.chunk_size = chunk_size\n\n        low = -np.inf\n        high = np.inf\n        shape = output_shape\n\n        input_uuids = [rgb_input_uuid]\n        assert (\n            len(input_uuids) == 1\n        ), \"resnet preprocessor can only consume one observation type\"\n\n        observation_space = gym.spaces.Box(low=low, high=high, shape=shape)\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    @property\n    def resnet(self) -> ClipResNetEmbedder:\n        if self._resnet is None:\n            self._resnet = ClipResNetEmbedder(\n                clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool\n            ).to(self.device)\n            for module in self._resnet.modules():\n                if \"BatchNorm\" in type(module).__name__:\n                    module.momentum = 0.0\n            self._resnet.eval()\n        return self._resnet\n\n    def to(self, device: torch.device) -> \"ClipResNetPreprocessor\":\n        self._resnet = self.resnet.to(device)\n        self.device = device\n        return self\n\n    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:\n        x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)  # bhwc -> bchw\n        # If the input is depth, repeat it across all 3 channels\n        if x.shape[1] == 1:\n            x = x.repeat(1, 3, 1, 1)\n\n        n = x.shape[0]\n        if self.chunk_size is not None and x.shape[0] > self.chunk_size:\n            processed_chunks = []\n            for idx in range(0, n, self.chunk_size):\n                processed_chunks.append(\n                    self.resnet(x[idx : min(idx + self.chunk_size, n)]).float()\n                )\n            x = torch.cat(processed_chunks, dim=0)\n        else:\n            x = self.resnet(x).float()\n        return x\n\n\nclass ClipViTEmbedder(nn.Module):\n    def __init__(self, model: CLIP, class_emb_only: bool = False):\n        super().__init__()\n        self.model = model\n        self.model.visual.transformer.resblocks = nn.Sequential(\n            *list(self.model.visual.transformer.resblocks)[:-1]\n        )\n        self.class_emb_only = class_emb_only\n\n        self.eval()\n\n    def forward(self, x):\n        m = self.model.visual\n        with torch.no_grad():\n            x = m.conv1(x)  # shape = [*, width, grid, grid]\n            x = x.reshape(x.shape[0], x.shape[1], -1)  # shape = [*, width, grid ** 2]\n            x = x.permute(0, 2, 1)  # shape = [*, grid ** 2, width]\n            x = torch.cat(\n                [\n                    m.class_embedding.to(x.dtype)\n                    + torch.zeros(\n                        x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device\n                    ),\n                    x,\n                ],\n                dim=1,\n            )  # shape = [*, grid ** 2 + 1, width]\n            x = x + m.positional_embedding.to(x.dtype)\n            x = m.ln_pre(x)\n\n            x = x.permute(1, 0, 2)  # NLD -> LND\n            x = m.transformer(x)\n            x = x.permute(1, 0, 2)  # LND -> NLD\n\n            if self.class_emb_only:\n                return x[:, 0, :]\n            else:\n                return x\n\n\nclass ClipViTPreprocessor(Preprocessor):\n    \"\"\"Preprocess RGB or depth image using a ResNet model with CLIP model\n    weights.\"\"\"\n\n    CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073)\n    CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711)\n\n    def __init__(\n        self,\n        rgb_input_uuid: str,\n        clip_model_type: str,\n        class_emb_only: bool,\n        device: Optional[torch.device] = None,\n        device_ids: Optional[List[torch.device]] = None,\n        **kwargs: Any,\n    ):\n        assert clip_model_type in clip.available_models()\n\n        if clip_model_type == \"ViT-B/32\":\n            output_shape = (7 * 7 + 1, 768)\n        elif clip_model_type == \"ViT-B/16\":\n            output_shape = (14 * 14 + 1, 768)\n        elif clip_model_type == \"ViT-L/14\":\n            output_shape = (16 * 16 + 1, 1024)\n        else:\n            raise NotImplementedError(\n                f\"Currently `clip_model_type` must be one of 'ViT-B/32', 'ViT-B/16', or 'ViT-B/14'\"\n            )\n\n        if class_emb_only:\n            output_shape = output_shape[1:]\n\n        self.clip_model_type = clip_model_type\n\n        self.class_emb_only = class_emb_only\n\n        self.device = torch.device(\"cpu\") if device is None else device\n        self.device_ids = device_ids or cast(\n            List[torch.device], list(range(torch.cuda.device_count()))\n        )\n        self._vit: Optional[ClipViTEmbedder] = None\n\n        low = -np.inf\n        high = np.inf\n        shape = output_shape\n\n        input_uuids = [rgb_input_uuid]\n        assert (\n            len(input_uuids) == 1\n        ), \"resnet preprocessor can only consume one observation type\"\n\n        observation_space = gym.spaces.Box(low=low, high=high, shape=shape)\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    @property\n    def vit(self) -> ClipViTEmbedder:\n        if self._vit is None:\n            self._vit = ClipViTEmbedder(\n                model=clip.load(self.clip_model_type, device=self.device)[0],\n                class_emb_only=self.class_emb_only,\n            ).to(self.device)\n            for module in self._vit.modules():\n                if \"BatchNorm\" in type(module).__name__:\n                    module.momentum = 0.0\n            self._vit.eval()\n        return self._vit\n\n    def to(self, device: torch.device) -> \"ClipViTPreprocessor\":\n        self._vit = self.vit.to(device)\n        self.device = device\n        return self\n\n    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:\n        x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)  # bhwc -> bchw\n        # If the input is depth, repeat it across all 3 channels\n        if x.shape[1] == 1:\n            x = x.repeat(1, 3, 1, 1)\n        x = self.vit(x).float()\n        return x\n"
  },
  {
    "path": "allenact_plugins/clip_plugin/extra_environment.yml",
    "content": "channels:\n  - pytorch\n  - defaults\n  - conda-forge\ndependencies:\n  - pytorch>=1.7.1\n  - torchvision\n  - pip:\n      - ftfy\n      - regex\n      - tqdm\n      - \"--editable=git+https://github.com/openai/CLIP.git@e184f608c5d5e58165682f7c332c3a8b4c1545f2#egg=clip\"\n"
  },
  {
    "path": "allenact_plugins/clip_plugin/extra_requirements.txt",
    "content": "torch>=1.7.1\ntorchvision\nftfy\nregex\ntqdm\nclip @ git+https://github.com/openai/clip@e184f608c5d5e58165682f7c332c3a8b4c1545f2#egg=clip\n"
  },
  {
    "path": "allenact_plugins/gym_plugin/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/gym_plugin/extra_environment.yml",
    "content": "channels:\n  - defaults\n  - conda-forge\ndependencies:\n  - gym-box2d>=0.17.0,<0.20.0\n"
  },
  {
    "path": "allenact_plugins/gym_plugin/extra_requirements.txt",
    "content": "gym[box2d]>=0.17.0,<0.20.0\n"
  },
  {
    "path": "allenact_plugins/gym_plugin/gym_distributions.py",
    "content": "import torch\n\nfrom allenact.base_abstractions.distributions import Distr\n\n\nclass GaussianDistr(torch.distributions.Normal, Distr):\n    \"\"\"PyTorch's Normal distribution with a `mode` method.\"\"\"\n\n    def mode(self) -> torch.FloatTensor:\n        return super().mean\n"
  },
  {
    "path": "allenact_plugins/gym_plugin/gym_environment.py",
    "content": "from typing import Optional\n\nimport gym\nimport numpy as np\n\n\nclass GymEnvironment(gym.Wrapper):\n    \"\"\"gym.Wrapper with minimal bookkeeping (initial observation).\"\"\"\n\n    def __init__(self, gym_env_name: str):\n        super().__init__(gym.make(gym_env_name))\n        self._initial_observation: Optional[np.ndarray] = None\n        self.reset()  # generate initial observation\n\n    def reset(self) -> np.ndarray:\n        self._initial_observation = self.env.reset()\n        return self._initial_observation\n\n    @property\n    def initial_observation(self) -> np.ndarray:\n        assert (\n            self._initial_observation is not None\n        ), \"Attempted to read initial_observation without calling reset()\"\n        res = self._initial_observation\n        self._initial_observation = None\n        return res\n"
  },
  {
    "path": "allenact_plugins/gym_plugin/gym_models.py",
    "content": "from typing import Dict, Union, Optional, Tuple, Any, Sequence, cast\n\nimport gym\nimport torch\nimport torch.nn as nn\n\nfrom allenact.algorithms.onpolicy_sync.policy import (\n    ActorCriticModel,\n    DistributionType,\n)\nfrom allenact.base_abstractions.misc import ActorCriticOutput, Memory\nfrom allenact_plugins.gym_plugin.gym_distributions import GaussianDistr\n\n\nclass MemorylessActorCritic(ActorCriticModel[GaussianDistr]):\n    \"\"\"ActorCriticModel for gym tasks with continuous control in the range [-1,\n    1].\"\"\"\n\n    def __init__(\n        self,\n        input_uuid: str,\n        action_space: gym.spaces.Box,\n        observation_space: gym.spaces.Dict,\n        action_std: float = 0.5,\n        mlp_hidden_dims: Sequence[int] = (64, 32),\n    ):\n        super().__init__(action_space, observation_space)\n\n        self.input_uuid = input_uuid\n        assert len(observation_space[self.input_uuid].shape) == 1\n        state_dim = observation_space[self.input_uuid].shape[0]\n        assert len(action_space.shape) == 1\n        action_dim = action_space.shape[0]\n\n        mlp_hidden_dims = (state_dim,) + tuple(mlp_hidden_dims)\n\n        # action mean range -1 to 1\n        self.actor = nn.Sequential(\n            *self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims),\n            nn.Linear(32, action_dim),\n            nn.Tanh(),\n        )\n\n        # critic\n        self.critic = nn.Sequential(\n            *self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims),\n            nn.Linear(32, 1),\n        )\n\n        # maximum standard deviation\n        self.register_buffer(\n            \"action_std\",\n            torch.tensor([action_std] * action_dim).view(1, 1, -1),\n            persistent=False,\n        )\n\n    @staticmethod\n    def make_mlp_hidden(nl, *dims):\n        res = []\n        for it, dim in enumerate(dims[:-1]):\n            res.append(\n                nn.Linear(dim, dims[it + 1]),\n            )\n            res.append(nl())\n        return res\n\n    def _recurrent_memory_specification(self):\n        return None\n\n    def forward(  # type:ignore\n        self,\n        observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]],\n        memory: Memory,\n        prev_actions: Any,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        means = self.actor(observations[self.input_uuid])\n        values = self.critic(observations[self.input_uuid])\n\n        return (\n            ActorCriticOutput(\n                cast(DistributionType, GaussianDistr(loc=means, scale=self.action_std)),\n                values,\n                {},\n            ),\n            None,  # no Memory\n        )\n"
  },
  {
    "path": "allenact_plugins/gym_plugin/gym_sensors.py",
    "content": "from typing import Optional, Any\n\nimport gym\nimport numpy as np\n\nfrom allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super\nfrom allenact.base_abstractions.task import Task, SubTaskType\nfrom allenact_plugins.gym_plugin.gym_environment import GymEnvironment\n\n\nclass GymBox2DSensor(Sensor[gym.Env, Task[gym.Env]]):\n    \"\"\"Wrapper for gym Box2D tasks' observations.\"\"\"\n\n    def __init__(\n        self,\n        gym_env_name: str = \"LunarLanderContinuous-v2\",\n        uuid: str = \"gym_box2d_sensor\",\n        **kwargs: Any\n    ):\n        self.gym_env_name = gym_env_name\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self) -> gym.Space:\n        if self.gym_env_name in [\"LunarLanderContinuous-v2\", \"LunarLander-v2\"]:\n            return gym.spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)\n        elif self.gym_env_name in [\"BipedalWalker-v2\", \"BipedalWalkerHardcore-v2\"]:\n            high = np.array([np.inf] * 24)\n            return gym.spaces.Box(-high, high, dtype=np.float32)\n        elif self.gym_env_name == \"CarRacing-v0\":\n            state_w, state_h = 96, 96\n            return gym.spaces.Box(\n                low=0, high=255, shape=(state_h, state_w, 3), dtype=np.uint8\n            )\n        raise NotImplementedError()\n\n    def get_observation(\n        self,\n        env: GymEnvironment,\n        task: Optional[SubTaskType],\n        *args,\n        gym_obs: Optional[np.ndarray] = None,\n        **kwargs: Any\n    ) -> np.ndarray:\n        if gym_obs is not None:\n            return gym_obs\n        else:\n            return env.initial_observation\n\n\nclass GymMuJoCoSensor(Sensor[gym.Env, Task[gym.Env]]):\n    \"\"\"Wrapper for gym MuJoCo and Robotics tasks observations.\"\"\"\n\n    def __init__(self, gym_env_name: str, uuid: str, **kwargs: Any):\n        self.gym_env_name = gym_env_name\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self) -> gym.Space:\n        # observation space for gym MoJoCo\n        if self.gym_env_name == \"InvertedPendulum-v2\":\n            return gym.spaces.Box(-np.inf, np.inf, shape=(4,), dtype=\"float32\")\n        elif self.gym_env_name == \"Ant-v2\":\n            return gym.spaces.Box(-np.inf, np.inf, shape=(111,), dtype=\"float32\")\n        elif self.gym_env_name in [\"Reacher-v2\", \"Hopper-v2\"]:\n            return gym.spaces.Box(-np.inf, np.inf, shape=(11,), dtype=\"float32\")\n        elif self.gym_env_name == \"InvertedDoublePendulum-v2\":\n            return gym.spaces.Box(-np.inf, np.inf, (11,), \"float32\")\n        elif self.gym_env_name in [\"HumanoidStandup-v2\", \"Humanoid-v2\"]:\n            return gym.spaces.Box(-np.inf, np.inf, (376,), \"float32\")\n        elif self.gym_env_name in [\"HalfCheetah-v2\", \"Walker2d-v2\"]:\n            return gym.spaces.Box(-np.inf, np.inf, (17,), \"float32\")\n        elif self.gym_env_name == \"Swimmer-v2\":\n            return gym.spaces.Box(-np.inf, np.inf, (8,), \"float32\")\n        # TODO observation space for gym Robotics\n        elif self.gym_env_name == \"HandManipulateBlock-v0\":\n            return gym.spaces.Dict(\n                dict(\n                    desired_goal=gym.spaces.Box(\n                        -np.inf, np.inf, shape=(7,), dtype=\"float32\"\n                    ),\n                    achieved_goal=gym.spaces.Box(\n                        -np.inf, np.inf, shape=(7,), dtype=\"float32\"\n                    ),\n                    observation=gym.spaces.Box(\n                        -np.inf, np.inf, shape=(61,), dtype=\"float32\"\n                    ),\n                )\n            )\n        else:\n            raise NotImplementedError\n\n    def get_observation(\n        self,\n        env: GymEnvironment,\n        task: Optional[SubTaskType],\n        *args,\n        gym_obs: Optional[np.ndarray] = None,\n        **kwargs: Any\n    ) -> np.ndarray:\n        if gym_obs is not None:\n            return np.array(gym_obs, dtype=np.float32)  # coerce to be float32\n        else:\n            return np.array(env.initial_observation, dtype=np.float32)\n"
  },
  {
    "path": "allenact_plugins/gym_plugin/gym_tasks.py",
    "content": "import random\nfrom typing import Any, List, Dict, Optional, Union, Callable, Sequence, Tuple\n\nimport gym\nimport numpy as np\nfrom gym.utils import seeding\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import Sensor, SensorSuite\nfrom allenact.base_abstractions.task import Task, TaskSampler\nfrom allenact.utils.experiment_utils import set_seed\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.gym_plugin.gym_environment import GymEnvironment\nfrom allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor, GymMuJoCoSensor\n\n\nclass GymTask(Task[gym.Env]):\n    \"\"\"Abstract gym task.\n\n    Subclasses need to implement `class_action_names` and `_step`.\n    \"\"\"\n\n    def __init__(\n        self,\n        env: GymEnvironment,\n        sensors: Union[SensorSuite, List[Sensor]],\n        task_info: Dict[str, Any],\n        **kwargs,\n    ):\n        max_steps = env.spec.max_episode_steps\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n        self._gym_done = False\n        self.task_name: str = self.env.spec.id\n\n    @property\n    def action_space(self) -> gym.spaces.Space:\n        return self.env.action_space\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        if mode == \"rgb\":\n            mode = \"rgb_array\"\n        return self.env.render(mode=mode)\n\n    def get_observations(\n        self, *args, gym_obs: Optional[Dict[str, Any]] = None, **kwargs\n    ) -> Any:\n        return self.sensor_suite.get_observations(\n            env=self.env, task=self, gym_obs=gym_obs\n        )\n\n    def reached_terminal_state(self) -> bool:\n        return self._gym_done\n\n    def close(self) -> None:\n        pass\n\n    def metrics(self) -> Dict[str, Any]:\n        # noinspection PyUnresolvedReferences,PyCallingNonCallable\n        env_metrics = self.env.metrics() if hasattr(self.env, \"metrics\") else {}\n        return {\n            **super().metrics(),\n            **{k: float(v) for k, v in env_metrics.items()},\n            \"success\": int(\n                self.env.was_successful\n                if hasattr(self.env, \"was_successful\")\n                else self.cumulative_reward > 0\n            ),\n        }\n\n\nclass GymContinuousTask(GymTask):\n    \"\"\"Task for a continuous-control gym Box2D & MuJoCo Env; it allows\n    interfacing allenact with gym tasks.\"\"\"\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return tuple()\n\n    def _step(self, action: Sequence[float]) -> RLStepResult:\n        action = np.array(action)\n\n        gym_obs, reward, self._gym_done, info = self.env.step(action=action)\n\n        return RLStepResult(\n            observation=self.get_observations(gym_obs=gym_obs),\n            reward=reward,\n            done=self.is_done(),\n            info=info,\n        )\n\n\ndef default_task_selector(env_name: str) -> type:\n    \"\"\"Helper function for `GymTaskSampler`.\"\"\"\n    if env_name in [\n        # Box2d Env\n        \"CarRacing-v0\",\n        \"LunarLanderContinuous-v2\",\n        \"BipedalWalker-v2\",\n        \"BipedalWalkerHardcore-v2\",\n        # MuJoCo Env\n        \"InvertedPendulum-v2\",\n        \"Ant-v2\",\n        \"InvertedDoublePendulum-v2\",\n        \"Humanoid-v2\",\n        \"Reacher-v2\",\n        \"Hopper-v2\",\n        \"HalfCheetah-v2\",\n        \"Swimmer-v2\",\n        \"Walker2d-v2\",\n    ]:\n        return GymContinuousTask\n    raise NotImplementedError()\n\n\ndef sensor_selector(env_name: str) -> Sensor:\n    \"\"\"Helper function for `GymTaskSampler`.\"\"\"\n    if env_name in [\n        \"CarRacing-v0\",\n        \"LunarLanderContinuous-v2\",\n        \"BipedalWalker-v2\",\n        \"BipedalWalkerHardcore-v2\",\n        \"LunarLander-v2\",\n    ]:\n        return GymBox2DSensor(env_name)\n    elif env_name in [\n        \"InvertedPendulum-v2\",\n        \"Ant-v2\",\n        \"InvertedDoublePendulum-v2\",\n        \"Humanoid-v2\",\n        \"Reacher-v2\",\n        \"Hopper-v2\",\n        \"HalfCheetah-v2\",\n        \"Swimmer-v2\",\n        \"Walker2d-v2\",\n    ]:\n        return GymMuJoCoSensor(gym_env_name=env_name, uuid=\"gym_mujoco_data\")\n    raise NotImplementedError()\n\n\nclass GymTaskSampler(TaskSampler):\n    \"\"\"TaskSampler for gym environments.\"\"\"\n\n    def __init__(\n        self,\n        gym_env_type: str = \"LunarLanderContinuous-v2\",\n        sensors: Optional[Union[SensorSuite, List[Sensor]]] = None,\n        max_tasks: Optional[int] = None,\n        num_unique_seeds: Optional[int] = None,\n        task_seeds_list: Optional[List[int]] = None,\n        deterministic_sampling: bool = False,\n        task_selector: Callable[[str], type] = default_task_selector,\n        repeat_failed_task_for_min_steps: int = 0,\n        extra_task_kwargs: Optional[Dict] = None,\n        seed: Optional[int] = None,\n        **kwargs,\n    ):\n        super().__init__()\n\n        self.gym_env_type = gym_env_type\n\n        self.sensors: SensorSuite\n        if sensors is None:\n            self.sensors = SensorSuite([sensor_selector(self.gym_env_type)])\n        else:\n            self.sensors = (\n                SensorSuite(sensors)\n                if not isinstance(sensors, SensorSuite)\n                else sensors\n            )\n\n        self.max_tasks = max_tasks\n        self.num_unique_seeds = num_unique_seeds\n        self.deterministic_sampling = deterministic_sampling\n        self.repeat_failed_task_for_min_steps = repeat_failed_task_for_min_steps\n        self.extra_task_kwargs = (\n            extra_task_kwargs if extra_task_kwargs is not None else {}\n        )\n\n        self._last_env_seed: Optional[int] = None\n        self._last_task: Optional[GymTask] = None\n        self._number_of_steps_taken_with_task_seed = 0\n\n        assert (not deterministic_sampling) or repeat_failed_task_for_min_steps <= 0, (\n            \"If `deterministic_sampling` is True then we require\"\n            \" `repeat_failed_task_for_min_steps <= 0`\"\n        )\n        assert (self.num_unique_seeds is None) or (\n            0 < self.num_unique_seeds\n        ), \"`num_unique_seeds` must be a positive integer.\"\n\n        self.num_unique_seeds = num_unique_seeds\n        self.task_seeds_list = task_seeds_list\n        if self.task_seeds_list is not None:\n            if self.num_unique_seeds is not None:\n                assert self.num_unique_seeds == len(\n                    self.task_seeds_list\n                ), \"`num_unique_seeds` must equal the length of `task_seeds_list` if both specified.\"\n            self.num_unique_seeds = len(self.task_seeds_list)\n        elif self.num_unique_seeds is not None:\n            self.task_seeds_list = list(range(self.num_unique_seeds))\n        if num_unique_seeds is not None and repeat_failed_task_for_min_steps > 0:\n            raise NotImplementedError(\n                \"`repeat_failed_task_for_min_steps` must be <=0 if number\"\n                \" of unique seeds is not None.\"\n            )\n\n        assert (not deterministic_sampling) or (\n            self.num_unique_seeds is not None\n        ), \"Cannot use deterministic sampling when `num_unique_seeds` is `None`.\"\n\n        if (not deterministic_sampling) and self.max_tasks:\n            get_logger().warning(\n                \"`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,\"\n                \" this might be a mistake when running testing.\"\n            )\n\n        if seed is not None:\n            self.set_seed(seed)\n        else:\n            self.np_seeded_random_gen, _ = seeding.np_random(\n                random.randint(0, 2**31 - 1)\n            )\n\n        self.num_tasks_generated = 0\n        self.task_type = task_selector(self.gym_env_type)\n        self.env: GymEnvironment = GymEnvironment(self.gym_env_type)\n\n    @property\n    def length(self) -> Union[int, float]:\n        return (\n            float(\"inf\")\n            if self.max_tasks is None\n            else self.max_tasks - self.num_tasks_generated\n        )\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        return None if self.num_unique_seeds is None else self.num_unique_seeds\n\n    @property\n    def last_sampled_task(self) -> Optional[Task]:\n        raise NotImplementedError\n\n    def next_task(self, force_advance_scene: bool = False) -> Optional[GymTask]:\n        if self.length <= 0:\n            return None\n\n        repeating = False\n        if self.num_unique_seeds is not None:\n            if self.deterministic_sampling:\n                self._last_env_seed = self.task_seeds_list[\n                    self.num_tasks_generated % len(self.task_seeds_list)\n                ]\n            else:\n                self._last_env_seed = self.np_seeded_random_gen.choice(\n                    self.task_seeds_list\n                )\n        else:\n            if self._last_task is not None:\n                self._number_of_steps_taken_with_task_seed += (\n                    self._last_task.num_steps_taken()\n                )\n\n            if (\n                self._last_env_seed is not None\n                and self._number_of_steps_taken_with_task_seed\n                < self.repeat_failed_task_for_min_steps\n                and self._last_task.cumulative_reward == 0\n            ):\n                repeating = True\n            else:\n                self._number_of_steps_taken_with_task_seed = 0\n                self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)\n\n        task_has_same_seed_reset = hasattr(self.env, \"same_seed_reset\")\n\n        if repeating and task_has_same_seed_reset:\n            # noinspection PyUnresolvedReferences\n            self.env.same_seed_reset()\n        else:\n            self.env.seed(self._last_env_seed)\n            self.env.saved_seed = self._last_env_seed\n            self.env.reset()\n\n        self.num_tasks_generated += 1\n\n        task_info = {\"id\": \"random%d\" % random.randint(0, 2**63 - 1)}\n\n        self._last_task = self.task_type(\n            **dict(env=self.env, sensors=self.sensors, task_info=task_info),\n            **self.extra_task_kwargs,\n        )\n\n        return self._last_task\n\n    def close(self) -> None:\n        self.env.close()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        return True\n\n    def reset(self) -> None:\n        self.num_tasks_generated = 0\n        self.env.reset()\n\n    def set_seed(self, seed: int) -> None:\n        self.np_seeded_random_gen, _ = seeding.np_random(seed)\n        if seed is not None:\n            set_seed(seed)\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/__init__.py",
    "content": "from allenact.utils.system import ImportChecker\n\nwith ImportChecker(\n    \"\\n\\nPlease install habitat following\\n\\n\"\n    \"https://allenact.org/installation/installation-framework/#installation-of-habitat\\n\\n\"\n):\n    import habitat\n    import habitat_sim\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/data/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/habitat_plugin/extra_environment.yml",
    "content": "channels:\n  - defaults\n  - conda-forge\n  - aihabitat\ndependencies:\n  - habitat-sim=0.1.5\n  - numba\n  - pip\n  - pip:\n      - \"--editable=git+https://github.com/Lucaweihs/habitat-lab.git@99124c785bd5ca51e321ea20462f71071cd43ae2#egg=habitat\"\n      - numpy-quaternion\n      - pyquaternion>=0.9.9\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/extra_environment_headless.yml",
    "content": "channels:\n  - defaults\n  - conda-forge\n  - aihabitat\ndependencies:\n  - habitat-sim=0.1.5\n  - headless\n  - numba\n  - pip\n  - pip:\n      - \"--editable=git+https://github.com/Lucaweihs/habitat-lab.git@99124c785bd5ca51e321ea20462f71071cd43ae2#egg=habitat\"\n      - numpy-quaternion\n      - pyquaternion>=0.9.9\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/extra_requirements.txt",
    "content": "habitat @ git+https://github.com/facebookresearch/habitat-lab.git@33654923dc733f5fcea23aea6391034c3f694a67\nnumpy-quaternion\npyquaternion>=0.9.9\nnumba\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/habitat_constants.py",
    "content": "import os\n\nHABITAT_BASE = os.getenv(\n    \"HABITAT_BASE_DIR\",\n    default=os.path.join(os.getcwd(), \"external_projects\", \"habitat-lab\"),\n)\nHABITAT_DATA_BASE = os.path.join(\n    os.getcwd(),\n    \"data\",\n)\n\nif (not os.path.exists(HABITAT_BASE)) or (not os.path.exists(HABITAT_DATA_BASE)):\n    raise ImportError(\n        \"In order to run properly the Habitat environment makes several assumptions about the file structure of\"\n        \" the local system. The file structure of the current environment does not seem to respect this required\"\n        \" file structure. Please see https://allenact.org/installation/installation-framework/#installation-of-habitat\"\n        \" for details as to how to set up your local environment to make it possible to use the habitat plugin of\"\n        \" AllenAct.\"\n    )\n\nHABITAT_DATASETS_DIR = os.path.join(HABITAT_DATA_BASE, \"datasets\")\nHABITAT_SCENE_DATASETS_DIR = os.path.join(HABITAT_DATA_BASE, \"scene_datasets\")\nHABITAT_CONFIGS_DIR = os.path.join(HABITAT_BASE, \"configs\")\n\nTESTED_HABITAT_COMMIT = \"33654923dc733f5fcea23aea6391034c3f694a67\"\n\nMOVE_AHEAD = \"MOVE_FORWARD\"\nROTATE_LEFT = \"TURN_LEFT\"\nROTATE_RIGHT = \"TURN_RIGHT\"\nLOOK_DOWN = \"LOOK_DOWN\"\nLOOK_UP = \"LOOK_UP\"\nEND = \"STOP\"\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/habitat_environment.py",
    "content": "\"\"\"A wrapper for interacting with the Habitat environment.\"\"\"\n\nimport os\nfrom typing import Dict, Union, List, Optional\n\nimport numpy as np\n\nimport habitat\nfrom allenact.utils.cache_utils import DynamicDistanceCache\nfrom allenact.utils.system import get_logger\nfrom habitat.config import Config\nfrom habitat.core.dataset import Dataset\nfrom habitat.core.simulator import Observations, AgentState, ShortestPathPoint\nfrom habitat.tasks.nav.nav import NavigationEpisode as HabitatNavigationEpisode\n\n\nclass HabitatEnvironment:\n    def __init__(self, config: Config, dataset: Dataset, verbose: bool = False) -> None:\n        self.env = habitat.Env(config=config, dataset=dataset)\n\n        if not verbose:\n            os.environ[\"GLOG_minloglevel\"] = \"2\"\n            os.environ[\"MAGNUM_LOG\"] = \"quiet\"\n\n        # Set the target to a random goal from the provided list for this episode\n        self.goal_index = 0\n        self.last_geodesic_distance = None\n        self.distance_cache = DynamicDistanceCache(rounding=1)\n        self._current_frame: Optional[np.ndarray] = None\n\n    @property\n    def scene_name(self) -> str:\n        return self.env.current_episode.scene_id\n\n    @property\n    def current_frame(self) -> np.ndarray:\n        assert self._current_frame is not None\n        return self._current_frame\n\n    def step(self, action_dict: Dict[str, Union[str, int]]) -> Observations:\n        obs = self.env.step(action_dict[\"action\"])\n        self._current_frame = obs\n        return obs\n\n    def get_location(self) -> Optional[np.ndarray]:\n        return self.env.sim.get_agent_state().position\n\n    def get_rotation(self) -> Optional[List[float]]:\n        return self.env.sim.get_agent_state().rotation\n\n    def get_shortest_path(\n        self,\n        source_state: AgentState,\n        target_state: AgentState,\n    ) -> List[ShortestPathPoint]:\n        return self.env.sim.action_space_shortest_path(source_state, [target_state])\n\n    def get_current_episode(self) -> HabitatNavigationEpisode:\n        return self.env.current_episode  # type: ignore\n\n    # noinspection PyMethodMayBeStatic\n    def start(self):\n        get_logger().debug(\"No need to start a habitat_plugin env\")\n\n    def stop(self):\n        self.env.close()\n\n    def reset(self):\n        self._current_frame = self.env.reset()\n\n    @property\n    def last_action_success(self) -> bool:\n        # For now we can not have failure of actions\n        return True\n\n    @property\n    def num_episodes(self) -> int:\n        ep_iterator = self.env.episode_iterator\n        assert isinstance(ep_iterator, habitat.core.dataset.EpisodeIterator)\n        return len(ep_iterator.episodes)\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/habitat_preprocessors.py",
    "content": "\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/habitat_sensors.py",
    "content": "from typing import Any, Optional, Tuple, TYPE_CHECKING\n\nimport gym\nimport numpy as np\nfrom pyquaternion import Quaternion\n\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment\n\nif TYPE_CHECKING:\n    from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask, ObjectNavTask  # type: ignore\n\n\nclass RGBSensorHabitat(RGBSensor[HabitatEnvironment, Task[HabitatEnvironment]]):\n    # For backwards compatibility\n    def __init__(\n        self,\n        use_resnet_normalization: bool = False,\n        mean: Optional[np.ndarray] = np.array(\n            [[[0.485, 0.456, 0.406]]], dtype=np.float32\n        ),\n        stdev: Optional[np.ndarray] = np.array(\n            [[[0.229, 0.224, 0.225]]], dtype=np.float32\n        ),\n        height: Optional[int] = None,\n        width: Optional[int] = None,\n        uuid: str = \"rgb\",\n        output_shape: Optional[Tuple[int, ...]] = None,\n        output_channels: int = 3,\n        unnormalized_infimum: float = 0.0,\n        unnormalized_supremum: float = 1.0,\n        scale_first: bool = True,\n        **kwargs: Any\n    ):\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def frame_from_env(\n        self, env: HabitatEnvironment, task: Optional[Task[HabitatEnvironment]]\n    ) -> np.ndarray:\n        return env.current_frame[\"rgb\"].copy()\n\n\nclass DepthSensorHabitat(DepthSensor[HabitatEnvironment, Task[HabitatEnvironment]]):\n    # For backwards compatibility\n    def __init__(\n        self,\n        use_resnet_normalization: Optional[bool] = None,\n        use_normalization: Optional[bool] = None,\n        mean: Optional[np.ndarray] = np.array([[0.5]], dtype=np.float32),\n        stdev: Optional[np.ndarray] = np.array([[0.25]], dtype=np.float32),\n        height: Optional[int] = None,\n        width: Optional[int] = None,\n        uuid: str = \"depth\",\n        output_shape: Optional[Tuple[int, ...]] = None,\n        output_channels: int = 1,\n        unnormalized_infimum: float = 0.0,\n        unnormalized_supremum: float = 5.0,\n        scale_first: bool = False,\n        **kwargs: Any\n    ):\n        # Give priority to use_normalization, but use_resnet_normalization for backward compat. if not set\n        if use_resnet_normalization is not None and use_normalization is None:\n            use_normalization = use_resnet_normalization\n        elif use_normalization is None:\n            use_normalization = False\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def frame_from_env(\n        self, env: HabitatEnvironment, task: Optional[Task[HabitatEnvironment]]\n    ) -> np.ndarray:\n        return env.current_frame[\"depth\"].copy()\n\n\nclass TargetCoordinatesSensorHabitat(Sensor[HabitatEnvironment, \"PointNavTask\"]):\n    def __init__(\n        self, coordinate_dims: int, uuid: str = \"target_coordinates_ind\", **kwargs: Any\n    ):\n        self.coordinate_dims = coordinate_dims\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self):\n        # Distance is a non-negative real and angle is normalized to the range (-Pi, Pi] or [-Pi, Pi)\n        return gym.spaces.Box(\n            np.float32(-3.15), np.float32(1000), shape=(self.coordinate_dims,)\n        )\n\n    def get_observation(\n        self,\n        env: HabitatEnvironment,\n        task: Optional[\"PointNavTask\"],\n        *args: Any,\n        **kwargs: Any\n    ) -> Any:\n        frame = env.current_frame\n        goal = frame[\"pointgoal_with_gps_compass\"]\n        return goal\n\n\nclass TargetObjectSensorHabitat(Sensor[HabitatEnvironment, \"ObjectNavTask\"]):\n    def __init__(self, num_objects: int, uuid: str = \"target_object_id\", **kwargs: Any):\n        observation_space = self._get_observation_space(num_objects)\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    @staticmethod\n    def _get_observation_space(num_objects: int):\n        return gym.spaces.Discrete(num_objects)\n\n    def get_observation(\n        self,\n        env: HabitatEnvironment,\n        task: Optional[\"ObjectNavTask\"],\n        *args: Any,\n        **kwargs: Any\n    ) -> Any:\n        frame = env.current_frame\n        goal = frame[\"objectgoal\"][0]\n        return goal\n\n\nclass AgentCoordinatesSensorHabitat(Sensor[HabitatEnvironment, \"PointNavTask\"]):\n    def __init__(self, uuid: str = \"agent_position_and_rotation\", **kwargs: Any):\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    @staticmethod\n    def _get_observation_space():\n        return gym.spaces.Box(np.float32(-1000), np.float32(1000), shape=(4,))\n\n    @staticmethod\n    def get_observation(\n        env: HabitatEnvironment,\n        task: Optional[\"PointNavTask\"],\n        *args: Any,\n        **kwargs: Any\n    ) -> Any:\n        position = env.env.sim.get_agent_state().position\n        quaternion = Quaternion(env.env.sim.get_agent_state().rotation.components)\n        return np.array([position[0], position[1], position[2], quaternion.radians])\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/habitat_task_samplers.py",
    "content": "from typing import List, Optional, Union, Callable, Any, Dict, Type\n\nimport gym\n\nimport habitat\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import Builder\nfrom allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment\nfrom allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask, ObjectNavTask  # type: ignore\nfrom habitat.config import Config\n\n\nclass PointNavTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        env_config: Config,\n        sensors: List[Sensor],\n        max_steps: int,\n        action_space: gym.Space,\n        distance_to_goal: float,\n        filter_dataset_func: Optional[\n            Callable[[habitat.Dataset], habitat.Dataset]\n        ] = None,\n        **task_init_kwargs,\n    ) -> None:\n        self.grid_size = 0.25\n        self.env: Optional[HabitatEnvironment] = None\n        self.max_tasks: Optional[int] = None\n        self.reset_tasks: Optional[int] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_space = action_space\n        self.env_config = env_config\n        self.distance_to_goal = distance_to_goal\n        self.seed: Optional[int] = None\n        self.filter_dataset_func = filter_dataset_func\n\n        self._last_sampled_task: Optional[PointNavTask] = None\n\n        self.task_init_kwargs = task_init_kwargs\n\n    def _create_environment(self) -> HabitatEnvironment:\n        dataset = habitat.make_dataset(\n            self.env_config.DATASET.TYPE, config=self.env_config.DATASET\n        )\n        if len(dataset.episodes) == 0:\n            raise RuntimeError(\"Empty input dataset.\")\n\n        if self.filter_dataset_func is not None:\n            dataset = self.filter_dataset_func(dataset)\n            if len(dataset.episodes) == 0:\n                raise RuntimeError(\"Empty dataset after filtering.\")\n\n        env = HabitatEnvironment(config=self.env_config, dataset=dataset)\n        self.max_tasks = None if self.env_config.MODE == \"train\" else env.num_episodes\n        self.reset_tasks = self.max_tasks\n        return env\n\n    @property\n    def length(self) -> Union[int, float]:\n        \"\"\"\n        @return: Number of total tasks remaining that can be sampled. Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n    @property\n    def total_unique(self) -> Union[int, float, None]:\n        return self.env.num_episodes\n\n    @property\n    def last_sampled_task(self) -> Optional[PointNavTask]:\n        return self._last_sampled_task\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"\n        @return: True if all Tasks that can be sampled by this sampler have the\n            same observation space. Otherwise False.\n        \"\"\"\n        return True\n\n    def next_task(self, force_advance_scene=False) -> Optional[PointNavTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            return None\n\n        if self.env is not None:\n            self.env.reset()\n        else:\n            self.env = self._create_environment()\n            self.env.reset()\n        ep_info = self.env.get_current_episode()\n        assert len(ep_info.goals) == 1\n        target = ep_info.goals[0].position\n\n        task_info = {\n            \"target\": target,\n            \"distance_to_goal\": self.distance_to_goal,\n            \"episode_id\": ep_info.episode_id,\n            \"scene_id\": ep_info.scene_id.split(\"/\")[-1],\n            **ep_info.info,\n        }\n\n        self._last_sampled_task = PointNavTask(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n            **self.task_init_kwargs,\n        )\n\n        if self.max_tasks is not None:\n            self.max_tasks -= 1\n\n        return self._last_sampled_task\n\n    def reset(self):\n        self.max_tasks = self.reset_tasks\n\n    def set_seed(self, seed: int):\n        self.seed = seed\n        if seed is not None:\n            self.env.env.seed(seed)\n\n\nclass ObjectNavTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        env_config: Config,\n        sensors: List[Sensor],\n        max_steps: int,\n        action_space: gym.Space,\n        filter_dataset_func: Optional[\n            Callable[[habitat.Dataset], habitat.Dataset]\n        ] = None,\n        task_kwargs: Dict[str, Any] = None,\n        objectnav_task_type: Union[\n            Type[ObjectNavTask], Builder[ObjectNavTask]\n        ] = ObjectNavTask,\n        **kwargs,\n    ) -> None:\n        self.grid_size = 0.25\n        self.env: Optional[HabitatEnvironment] = None\n        self.max_tasks: Optional[int] = None\n        self.reset_tasks: Optional[int] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_space = action_space\n        self.env_config = env_config\n        self.seed: Optional[int] = None\n        self.filter_dataset_func = filter_dataset_func\n        self.objectnav_task_type = objectnav_task_type\n\n        self.task_kwargs = {} if task_kwargs is None else task_kwargs\n        self._last_sampled_task: Optional[ObjectNavTask] = None\n\n    def _create_environment(self) -> HabitatEnvironment:\n        dataset = habitat.make_dataset(\n            self.env_config.DATASET.TYPE, config=self.env_config.DATASET\n        )\n\n        if self.filter_dataset_func is not None:\n            dataset = self.filter_dataset_func(dataset)\n            if len(dataset.episodes) == 0:\n                raise RuntimeError(\"Empty dataset after filtering.\")\n\n        env = HabitatEnvironment(config=self.env_config, dataset=dataset)\n        self.max_tasks = (\n            None if self.env_config.MODE == \"train\" else env.num_episodes\n        )  # mp3d objectnav val -> 2184\n        self.reset_tasks = self.max_tasks\n        return env\n\n    @property\n    def length(self) -> Union[int, float]:\n        \"\"\"\n        @return: Number of total tasks remaining that can be sampled. Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n    @property\n    def total_unique(self) -> Union[int, float, None]:\n        return self.env.num_episodes\n\n    @property\n    def last_sampled_task(self) -> Optional[ObjectNavTask]:\n        return self._last_sampled_task\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"\n        @return: True if all Tasks that can be sampled by this sampler have the\n            same observation space. Otherwise False.\n        \"\"\"\n        return True\n\n    def next_task(self, force_advance_scene=False) -> Optional[ObjectNavTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            return None\n\n        if self.env is not None:\n            if force_advance_scene:\n                self.env.env._episode_iterator._forced_scene_switch()\n                self.env.env._episode_iterator._set_shuffle_intervals()\n            self.env.reset()\n        else:\n            self.env = self._create_environment()\n            self.env.reset()\n        ep_info = self.env.get_current_episode()\n\n        target_categories = {g.object_category for g in ep_info.goals}\n        assert len(target_categories) == 1\n\n        target_category = list(target_categories)[0]\n\n        task_info = {\n            \"target_category\": target_category,\n            \"episode_id\": ep_info.episode_id,\n            \"scene_id\": ep_info.scene_id.split(\"/\")[-1],\n            **ep_info.info,\n        }\n\n        self._last_sampled_task = self.objectnav_task_type(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n            **self.task_kwargs,\n        )\n\n        if self.max_tasks is not None:\n            self.max_tasks -= 1\n\n        return self._last_sampled_task\n\n    def reset(self):\n        self.max_tasks = self.reset_tasks\n\n    def set_seed(self, seed: int):\n        self.seed = seed\n        if seed is not None:\n            self.env.env.seed(seed)\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/habitat_tasks.py",
    "content": "from abc import ABC\nfrom typing import Tuple, List, Dict, Any, Optional, Union, Sequence, cast\n\nimport gym\nimport numpy as np\nfrom habitat.sims.habitat_simulator.actions import HabitatSimActions\nfrom habitat.sims.habitat_simulator.habitat_simulator import HabitatSim\nfrom habitat.tasks.nav.shortest_path_follower import ShortestPathFollower\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.habitat_plugin.habitat_constants import (\n    MOVE_AHEAD,\n    ROTATE_LEFT,\n    ROTATE_RIGHT,\n    END,\n    LOOK_UP,\n    LOOK_DOWN,\n)\nfrom allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment\nfrom allenact_plugins.habitat_plugin.habitat_sensors import (\n    AgentCoordinatesSensorHabitat,\n)\n\n\nclass HabitatTask(Task[HabitatEnvironment], ABC):\n    def __init__(\n        self,\n        env: HabitatEnvironment,\n        sensors: List[Sensor],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        **kwargs,\n    ) -> None:\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n\n        self._last_action: Optional[str] = None\n        self._last_action_ind: Optional[int] = None\n        self._last_action_success: Optional[bool] = None\n        self._actions_taken: List[str] = []\n        self._positions = []\n        pos = self.get_agent_position_and_rotation()\n        self._positions.append(\n            {\"x\": pos[0], \"y\": pos[1], \"z\": pos[2], \"rotation\": pos[3]}\n        )\n        ep = self.env.get_current_episode()\n        # Extract the scene name from the scene path and append the episode id to generate\n        # a globally unique episode_id\n        self._episode_id = ep.scene_id.split(\"/\")[-1][:-4] + \"_\" + ep.episode_id\n\n    def get_agent_position_and_rotation(self):\n        return AgentCoordinatesSensorHabitat.get_observation(self.env, self)\n\n    @property\n    def last_action(self):\n        return self._last_action\n\n    @last_action.setter\n    def last_action(self, value: str):\n        self._last_action = value\n\n    @property\n    def last_action_success(self):\n        return self._last_action_success\n\n    @last_action_success.setter\n    def last_action_success(self, value: Optional[bool]):\n        self._last_action_success = value\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        if mode == \"rgb\":\n            return self.env.current_frame[\"rgb\"]\n        elif mode == \"depth\":\n            return self.env.current_frame[\"depth\"]\n        else:\n            raise NotImplementedError()\n\n\nclass PointNavTask(Task[HabitatEnvironment]):\n    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END)\n\n    def __init__(\n        self,\n        env: HabitatEnvironment,\n        sensors: List[Sensor],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        failed_end_reward: float = 0.0,\n        **kwargs,\n    ) -> None:\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n        self._took_end_action: bool = False\n        self._success: Optional[bool] = False\n        self._subsampled_locations_from_which_obj_visible = None\n\n        # Get the geodesic distance to target from the environment and make sure it is\n        # a valid value\n        self.last_geodesic_distance = self.current_geodesic_dist_to_target()\n        self.start_distance = self.last_geodesic_distance\n        assert self.last_geodesic_distance is not None\n\n        # noinspection PyProtectedMember\n        self._shortest_path_follower = ShortestPathFollower(\n            cast(HabitatSim, env.env.sim), env.env._config.TASK.SUCCESS_DISTANCE, False\n        )\n        self._shortest_path_follower.mode = \"geodesic_path\"\n\n        self._rewards: List[float] = []\n        self._metrics = None\n        self.failed_end_reward = failed_end_reward\n\n    def current_geodesic_dist_to_target(self) -> Optional[float]:\n        metrics = self.env.env.get_metrics()\n        if metrics[\"distance_to_goal\"] is None:\n            habitat_env = self.env.env\n            habitat_env.task.measurements.update_measures(\n                episode=habitat_env.current_episode, action=None, task=habitat_env.task\n            )\n            metrics = self.env.env.get_metrics()\n\n        return metrics[\"distance_to_goal\"]\n\n    @property\n    def action_space(self):\n        return gym.spaces.Discrete(len(self._actions))\n\n    def reached_terminal_state(self) -> bool:\n        return self.env.env.episode_over\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return cls._actions\n\n    def close(self) -> None:\n        self.env.stop()\n\n    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        action_str = self.class_action_names()[action]\n\n        self.env.step({\"action\": action_str})\n\n        if action_str == END:\n            self._took_end_action = True\n            self._success = self._is_goal_in_range()\n            self.last_action_success = self._success\n        else:\n            self.last_action_success = self.env.last_action_success\n\n        step_result = RLStepResult(\n            observation=self.get_observations(),\n            reward=self.judge(),\n            done=self.is_done(),\n            info={\"last_action_success\": self.last_action_success},\n        )\n        return step_result\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        assert mode in [\"rgb\", \"depth\"], \"only rgb and depth rendering is implemented\"\n        return self.env.current_frame[\"rgb\"]\n\n    def _is_goal_in_range(self) -> bool:\n        return (\n            self.current_geodesic_dist_to_target() <= self.task_info[\"distance_to_goal\"]\n        )\n\n    def judge(self) -> float:\n        reward = -0.01\n\n        new_geodesic_distance = self.current_geodesic_dist_to_target()\n        if self.last_geodesic_distance is None:\n            self.last_geodesic_distance = new_geodesic_distance\n\n        if self.last_geodesic_distance is not None:\n            if (\n                new_geodesic_distance is None\n                or new_geodesic_distance in [float(\"-inf\"), float(\"inf\")]\n                or np.isnan(new_geodesic_distance)\n            ):\n                new_geodesic_distance = self.last_geodesic_distance\n            delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance\n            reward += delta_distance_reward\n            self.last_geodesic_distance = new_geodesic_distance\n\n            if self.is_done():\n                reward += 10.0 if self._success else self.failed_end_reward\n        else:\n            get_logger().warning(\"Could not get geodesic distance from habitat env.\")\n\n        self._rewards.append(float(reward))\n\n        return float(reward)\n\n    def metrics(self) -> Dict[str, Any]:\n        if not self.is_done():\n            return {}\n\n        _metrics = self.env.env.get_metrics()\n        metrics = {\n            **super(PointNavTask, self).metrics(),\n            \"success\": 1 * self._success,\n            \"ep_length\": self.num_steps_taken(),\n            \"reward\": np.sum(self._rewards),\n            \"spl\": _metrics[\"spl\"] if _metrics[\"spl\"] is not None else 0.0,\n            \"dist_to_target\": self.current_geodesic_dist_to_target(),\n        }\n        self._rewards = []\n        return metrics\n\n    def query_expert(self, **kwargs) -> Tuple[int, bool]:\n        if self._is_goal_in_range():\n            return self.class_action_names().index(END), True\n\n        target = self.task_info[\"target\"]\n        habitat_action = self._shortest_path_follower.get_next_action(target)\n        if habitat_action == HabitatSimActions.MOVE_FORWARD:\n            return self.class_action_names().index(MOVE_AHEAD), True\n        elif habitat_action == HabitatSimActions.TURN_LEFT:\n            return self.class_action_names().index(ROTATE_LEFT), True\n        elif habitat_action == HabitatSimActions.TURN_RIGHT:\n            return self.class_action_names().index(ROTATE_RIGHT), True\n        else:\n            return 0, False\n\n\nclass ObjectNavTask(HabitatTask):\n    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN)\n\n    def __init__(\n        self,\n        env: HabitatEnvironment,\n        sensors: List[Sensor],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        look_constraints: Optional[Tuple[int, int]] = None,\n        **kwargs,\n    ) -> None:\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n        self.look_constraints = look_constraints\n        self._look_state = 0\n\n        self._took_end_action: bool = False\n        self._success: Optional[bool] = False\n        self._subsampled_locations_from_which_obj_visible = None\n\n        # Get the geodesic distance to target from the environemnt and make sure it is\n        # a valid value\n        self.last_geodesic_distance = self.current_geodesic_dist_to_target()\n        assert not (\n            self.last_geodesic_distance is None\n            or self.last_geodesic_distance in [float(\"-inf\"), float(\"inf\")]\n            or np.isnan(self.last_geodesic_distance)\n        ), \"Bad geodesic distance\"\n        self._min_distance_to_goal = self.last_geodesic_distance\n        self._num_invalid_actions = 0\n\n        # noinspection PyProtectedMember\n        self._shortest_path_follower = ShortestPathFollower(\n            env.env.sim, env.env._config.TASK.SUCCESS.SUCCESS_DISTANCE, False\n        )\n        self._shortest_path_follower.mode = \"geodesic_path\"\n\n        self._rewards: List[float] = []\n        self._metrics = None\n        self.task_info[\"episode_id\"] = self._episode_id\n\n    @property\n    def action_space(self):\n        return gym.spaces.Discrete(len(self._actions))\n\n    def reached_terminal_state(self) -> bool:\n        return self.env.env.episode_over\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return cls._actions\n\n    def action_names(self, **kwargs) -> Tuple[str, ...]:\n        return self._actions\n\n    def close(self) -> None:\n        self.env.stop()\n\n    def current_geodesic_dist_to_target(self) -> Optional[float]:\n        metrics = self.env.env.get_metrics()\n        if metrics[\"distance_to_goal\"] is None:\n            habitat_env = self.env.env\n            habitat_env.task.measurements.update_measures(\n                episode=habitat_env.current_episode, action=None, task=habitat_env.task\n            )\n            metrics = self.env.env.get_metrics()\n\n        return metrics[\"distance_to_goal\"]\n\n    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        old_pos = self.get_agent_position_and_rotation()\n\n        action_str = self.action_names()[action]\n        self._actions_taken.append(action_str)\n\n        skip_action = False\n        if self.look_constraints is not None:\n            max_look_up, max_look_down = self.look_constraints\n\n            if action_str == LOOK_UP:\n                num_look_ups = self._look_state\n                # assert num_look_ups <= max_look_up\n                skip_action = num_look_ups >= max_look_up\n                self._look_state += 1\n\n            if action_str == LOOK_DOWN:\n                num_look_downs = -self._look_state\n                # assert num_look_downs <= max_look_down\n                skip_action = num_look_downs >= max_look_down\n                self._look_state -= 1\n\n            self._look_state = min(max(self._look_state, -max_look_down), max_look_up)\n\n        if not skip_action:\n            self.env.step({\"action\": action_str})\n\n        if action_str == END:\n            self._took_end_action = True\n            self._success = self._is_goal_in_range()\n            self.last_action_success = self._success\n        else:\n            self.last_action_success = self.env.last_action_success\n\n        step_result = RLStepResult(\n            observation=self.get_observations(),\n            reward=self.judge(),\n            done=self.is_done(),\n            info={\"last_action_success\": self.last_action_success},\n        )\n        new_pos = self.get_agent_position_and_rotation()\n        if np.all(old_pos == new_pos):\n            self._num_invalid_actions += 1\n\n        pos = self.get_agent_position_and_rotation()\n        self._positions.append(\n            {\"x\": pos[0], \"y\": pos[1], \"z\": pos[2], \"rotation\": pos[3]}\n        )\n\n        return step_result\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        assert mode in [\"rgb\", \"depth\"], \"only rgb and depth rendering is implemented\"\n        return self.env.current_frame[\"rgb\"]\n\n    def _is_goal_in_range(self) -> bool:\n        # The habitat simulator will return an SPL value of 0.0 whenever the goal is not in range\n        return bool(self.env.env.get_metrics()[\"spl\"])\n\n    def judge(self) -> float:\n        # Set default reward\n        reward = -0.01\n\n        # Get geodesic distance reward\n        new_geodesic_distance = self.current_geodesic_dist_to_target()\n        self._min_distance_to_goal = min(\n            new_geodesic_distance, self._min_distance_to_goal\n        )\n        if (\n            new_geodesic_distance is None\n            or new_geodesic_distance in [float(\"-inf\"), float(\"inf\")]\n            or np.isnan(new_geodesic_distance)\n        ):\n            new_geodesic_distance = self.last_geodesic_distance\n        delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance\n        reward += delta_distance_reward\n\n        if self._took_end_action:\n            reward += 10.0 if self._success else 0.0\n\n        # Get success reward\n        self._rewards.append(float(reward))\n        self.last_geodesic_distance = new_geodesic_distance\n\n        return float(reward)\n\n    def metrics(self) -> Dict[str, Any]:\n        self.task_info[\"taken_actions\"] = self._actions_taken\n        self.task_info[\"action_names\"] = self.action_names()\n        self.task_info[\"followed_path\"] = self._positions\n        if not self.is_done():\n            return {}\n        else:\n            _metrics = self.env.env.get_metrics()\n            metrics = {\n                \"success\": self._success,\n                \"ep_length\": self.num_steps_taken(),\n                \"total_reward\": np.sum(self._rewards),\n                \"spl\": _metrics[\"spl\"] if _metrics[\"spl\"] is not None else 0.0,\n                \"min_distance_to_target\": self._min_distance_to_goal,\n                \"num_invalid_actions\": self._num_invalid_actions,\n                \"task_info\": self.task_info,\n            }\n            self._rewards = []\n            return metrics\n\n    def query_expert(self, **kwargs) -> Tuple[int, bool]:\n        if self._is_goal_in_range():\n            return self.class_action_names().index(END), True\n\n        target = self.task_info[\"target\"]\n        action = self._shortest_path_follower.get_next_action(target)\n        return action, action is not None\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/habitat_utils.py",
    "content": "import os\nfrom typing import List\n\nimport habitat\nfrom allenact_plugins.habitat_plugin.habitat_constants import (\n    HABITAT_BASE,\n    HABITAT_CONFIGS_DIR,\n)\nfrom habitat import Config\n\n\ndef construct_env_configs(\n    config: Config,\n    allow_scene_repeat: bool = False,\n) -> List[Config]:\n    \"\"\"Create list of Habitat Configs for training on multiple processes To\n    allow better performance, dataset are split into small ones for each\n    individual env, grouped by scenes.\n\n    # Parameters\n\n    config : configs that contain num_processes as well as information\n             necessary to create individual environments.\n    allow_scene_repeat: if `True` and the number of distinct scenes\n        in the dataset is less than the total number of processes this will\n        result in scenes being repeated across processes. If `False`, then\n        if the total number of processes is greater than the number of scenes,\n        this will result in a RuntimeError exception being raised.\n\n    # Returns\n\n    List of Configs, one for each process.\n    \"\"\"\n\n    config.freeze()\n    num_processes = config.NUM_PROCESSES\n    configs = []\n    dataset = habitat.make_dataset(config.DATASET.TYPE)\n    scenes = dataset.get_scenes_to_load(config.DATASET)\n\n    if len(scenes) > 0:\n        if len(scenes) < num_processes:\n            if not allow_scene_repeat:\n                raise RuntimeError(\n                    \"reduce the number of processes as there aren't enough number of scenes.\"\n                )\n            else:\n                scenes = (scenes * (1 + (num_processes // len(scenes))))[:num_processes]\n\n    scene_splits: List[List] = [[] for _ in range(num_processes)]\n    for idx, scene in enumerate(scenes):\n        scene_splits[idx % len(scene_splits)].append(scene)\n\n    assert sum(map(len, scene_splits)) == len(scenes)\n\n    for i in range(num_processes):\n\n        task_config = config.clone()\n        task_config.defrost()\n        if len(scenes) > 0:\n            task_config.DATASET.CONTENT_SCENES = scene_splits[i]\n\n        if len(config.SIMULATOR_GPU_IDS) == 0:\n            task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = -1\n        else:\n            task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (\n                config.SIMULATOR_GPU_IDS[i % len(config.SIMULATOR_GPU_IDS)]\n            )\n\n        task_config.freeze()\n\n        configs.append(task_config.clone())\n\n    return configs\n\n\ndef construct_env_configs_mp3d(config: Config) -> List[Config]:\n    r\"\"\"Create list of Habitat Configs for training on multiple processes\n    To allow better performance, dataset are split into small ones for\n    each individual env, grouped by scenes.\n    Args:\n        config: configs that contain num_processes as well as information\n        necessary to create individual environments.\n    Returns:\n        List of Configs, one for each process\n    \"\"\"\n\n    config.freeze()\n    num_processes = config.NUM_PROCESSES\n    configs = []\n    # dataset = habitat.make_dataset(config.DATASET.TYPE)\n    # scenes = dataset.get_scenes_to_load(config.DATASET)\n\n    if num_processes == 1:\n        scene_splits = [[\"pRbA3pwrgk9\"]]\n    else:\n        small = [\n            \"rPc6DW4iMge\",\n            \"e9zR4mvMWw7\",\n            \"uNb9QFRL6hY\",\n            \"qoiz87JEwZ2\",\n            \"sKLMLpTHeUy\",\n            \"s8pcmisQ38h\",\n            \"759xd9YjKW5\",\n            \"XcA2TqTSSAj\",\n            \"SN83YJsR3w2\",\n            \"8WUmhLawc2A\",\n            \"JeFG25nYj2p\",\n            \"17DRP5sb8fy\",\n            \"Uxmj2M2itWa\",\n            \"XcA2TqTSSAj\",\n            \"SN83YJsR3w2\",\n            \"8WUmhLawc2A\",\n            \"JeFG25nYj2p\",\n            \"17DRP5sb8fy\",\n            \"Uxmj2M2itWa\",\n            \"D7N2EKCX4Sj\",\n            \"b8cTxDM8gDG\",\n            \"sT4fr6TAbpF\",\n            \"S9hNv5qa7GM\",\n            \"82sE5b5pLXE\",\n            \"pRbA3pwrgk9\",\n            \"aayBHfsNo7d\",\n            \"cV4RVeZvu5T\",\n            \"i5noydFURQK\",\n            \"YmJkqBEsHnH\",\n            \"jh4fc5c5qoQ\",\n            \"VVfe2KiqLaN\",\n            \"29hnd4uzFmX\",\n            \"Pm6F8kyY3z2\",\n            \"JF19kD82Mey\",\n            \"GdvgFV5R1Z5\",\n            \"HxpKQynjfin\",\n            \"vyrNrziPKCB\",\n        ]\n        med = [\n            \"V2XKFyX4ASd\",\n            \"VFuaQ6m2Qom\",\n            \"ZMojNkEp431\",\n            \"5LpN3gDmAk7\",\n            \"r47D5H71a5s\",\n            \"ULsKaCPVFJR\",\n            \"E9uDoFAP3SH\",\n            \"kEZ7cmS4wCh\",\n            \"ac26ZMwG7aT\",\n            \"dhjEzFoUFzH\",\n            \"mJXqzFtmKg4\",\n            \"p5wJjkQkbXX\",\n            \"Vvot9Ly1tCj\",\n            \"EDJbREhghzL\",\n            \"VzqfbhrpDEA\",\n            \"7y3sRwLe3Va\",\n        ]\n\n        scene_splits = [[] for _ in range(config.NUM_PROCESSES)]\n        distribute(\n            small,\n            scene_splits,\n            num_gpus=8,\n            procs_per_gpu=3,\n            proc_offset=1,\n            scenes_per_process=2,\n        )\n        distribute(\n            med,\n            scene_splits,\n            num_gpus=8,\n            procs_per_gpu=3,\n            proc_offset=0,\n            scenes_per_process=1,\n        )\n\n        # gpu0 = [['pRbA3pwrgk9', '82sE5b5pLXE', 'S9hNv5qa7GM'],\n        #         ['Uxmj2M2itWa', '17DRP5sb8fy', 'JeFG25nYj2p'],\n        #         ['5q7pvUzZiYa', '759xd9YjKW5', 's8pcmisQ38h'],\n        #         ['e9zR4mvMWw7', 'rPc6DW4iMge', 'vyrNrziPKCB']]\n        # gpu1 = [['sT4fr6TAbpF', 'b8cTxDM8gDG', 'D7N2EKCX4Sj'],\n        #         ['8WUmhLawc2A', 'SN83YJsR3w2', 'XcA2TqTSSAj'],\n        #         ['sKLMLpTHeUy', 'qoiz87JEwZ2', 'uNb9QFRL6hY'],\n        #         ['V2XKFyX4ASd', 'VFuaQ6m2Qom', 'ZMojNkEp431']]\n        # gpu2 = [['5LpN3gDmAk7', 'r47D5H71a5s', 'ULsKaCPVFJR', 'E9uDoFAP3SH'],\n        #         ['VVfe2KiqLaN', 'jh4fc5c5qoQ', 'YmJkqBEsHnH'],  # small\n        #         ['i5noydFURQK', 'cV4RVeZvu5T', 'aayBHfsNo7d']]  # small\n        # gpu3 = [['kEZ7cmS4wCh', 'ac26ZMwG7aT', 'dhjEzFoUFzH'],\n        #         ['mJXqzFtmKg4', 'p5wJjkQkbXX', 'Vvot9Ly1tCj']]\n        # gpu4 = [['EDJbREhghzL', 'VzqfbhrpDEA', '7y3sRwLe3Va'],\n        #         ['ur6pFq6Qu1A', 'PX4nDJXEHrG', 'PuKPg4mmafe']]\n        # gpu5 = [['r1Q1Z4BcV1o', 'gTV8FGcVJC9', '1pXnuDYAj8r'],\n        #         ['JF19kD82Mey', 'Pm6F8kyY3z2', '29hnd4uzFmX']]  # small\n        # gpu6 = [['VLzqgDo317F', '1LXtFkjw3qL'],\n        #         ['HxpKQynjfin', 'gZ6f7yhEvPG', 'GdvgFV5R1Z5']]  # small\n        # gpu7 = [['D7G3Y4RVNrH', 'B6ByNegPMKs']]\n        #\n        # scene_splits = gpu0 + gpu1 + gpu2 + gpu3 + gpu4 + gpu5 + gpu6 + gpu7\n\n    for i in range(num_processes):\n\n        task_config = config.clone()\n        task_config.defrost()\n        task_config.DATASET.CONTENT_SCENES = scene_splits[i]\n\n        task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_IDS[\n            i % len(config.SIMULATOR_GPU_IDS)\n        ]\n\n        task_config.freeze()\n\n        configs.append(task_config.clone())\n\n    return configs\n\n\ndef distribute(\n    data: List[str],\n    scene_splits: List[List],\n    num_gpus=8,\n    procs_per_gpu=4,\n    proc_offset=0,\n    scenes_per_process=1,\n) -> None:\n    for idx, scene in enumerate(data):\n        i = (idx // num_gpus) % scenes_per_process\n        j = idx % num_gpus\n        scene_splits[j * procs_per_gpu + i + proc_offset].append(scene)\n\n\ndef get_habitat_config(path: str):\n    assert (\n        path[-4:].lower() == \".yml\" or path[-5:].lower() == \".yaml\"\n    ), f\"path ({path}) must be a .yml or .yaml file.\"\n\n    if not os.path.isabs(path):\n        candidate_paths = [\n            os.path.join(d, path)\n            for d in [os.getcwd(), HABITAT_BASE, HABITAT_CONFIGS_DIR]\n        ]\n        success = False\n        for candidate_path in candidate_paths:\n            if os.path.exists(candidate_path):\n                success = True\n                path = candidate_path\n                break\n\n        if not success:\n            raise FileExistsError(\n                f\"Could not find config file with given relative path {path}. Tried the following possible absolute\"\n                f\" paths {candidate_paths}.\"\n            )\n    elif not os.path.exists(path):\n        raise FileExistsError(f\"Could not find config file with given path {path}.\")\n\n    return habitat.get_config(path)\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/scripts/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/habitat_plugin/scripts/agent_demo.py",
    "content": "import os\n\nimport cv2\nimport habitat\nfrom pyquaternion import Quaternion\n\nfrom allenact_plugins.habitat_plugin.habitat_constants import (\n    HABITAT_CONFIGS_DIR,\n    HABITAT_DATASETS_DIR,\n    HABITAT_SCENE_DATASETS_DIR,\n)\nfrom allenact_plugins.habitat_plugin.habitat_utils import get_habitat_config\n\nFORWARD_KEY = \"w\"\nLEFT_KEY = \"a\"\nRIGHT_KEY = \"d\"\nFINISH = \"f\"\n\n\ndef transform_rgb_bgr(image):\n    return image[:, :, [2, 1, 0]]\n\n\ndef agent_demo():\n    config = get_habitat_config(\n        os.path.join(HABITAT_CONFIGS_DIR, \"tasks/pointnav.yaml\")\n    )\n    config.defrost()\n    config.DATASET.DATA_PATH = os.path.join(\n        HABITAT_DATASETS_DIR, \"pointnav/gibson/v1/train/train.json.gz\"\n    )\n    config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR\n\n    config.DATASET.CONTENT_SCENES = [\"Adrian\"]\n\n    config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = 0\n\n    config.freeze()\n    env = habitat.Env(config=config)\n\n    print(\"Environment creation successful\")\n    observations = env.reset()\n    cv2.imshow(\"RGB\", transform_rgb_bgr(observations[\"rgb\"]))\n\n    print(\"Agent stepping around inside environment.\")\n\n    count_steps = 0\n    action = None\n    while not env.episode_over:\n        keystroke = cv2.waitKey(0)\n\n        if keystroke == ord(FORWARD_KEY):\n            action = 1\n            print(\"action: FORWARD\")\n        elif keystroke == ord(LEFT_KEY):\n            action = 2\n            print(\"action: LEFT\")\n        elif keystroke == ord(RIGHT_KEY):\n            action = 3\n            print(\"action: RIGHT\")\n        elif keystroke == ord(FINISH):\n            action = 0\n            print(\"action: FINISH\")\n        else:\n            print(\"INVALID KEY\")\n            continue\n\n        observations = env.step(action)\n        count_steps += 1\n\n        print(\"Position:\", env.sim.get_agent_state().position)\n        print(\"Quaternions:\", env.sim.get_agent_state().rotation)\n        quat = Quaternion(env.sim.get_agent_state().rotation.components)\n        print(quat.radians)\n        cv2.imshow(\"RGB\", transform_rgb_bgr(observations[\"rgb\"]))\n\n    print(\"Episode finished after {} steps.\".format(count_steps))\n\n    if action == habitat.SimulatorActions.STOP and observations[\"pointgoal\"][0] < 0.2:\n        print(\"you successfully navigated to destination point\")\n    else:\n        print(\"your navigation was unsuccessful\")\n\n\nif __name__ == \"__main__\":\n    agent_demo()\n"
  },
  {
    "path": "allenact_plugins/habitat_plugin/scripts/make_map.py",
    "content": "import os\n\nimport habitat\nimport numpy as np\nfrom tqdm import tqdm\n\nfrom allenact_plugins.habitat_plugin.habitat_constants import (\n    HABITAT_CONFIGS_DIR,\n    HABITAT_DATA_BASE,\n    HABITAT_SCENE_DATASETS_DIR,\n    HABITAT_DATASETS_DIR,\n)\nfrom allenact_plugins.habitat_plugin.habitat_utils import get_habitat_config\n\nmap_resolution = 0.05\nmap_size = 960\n\n\ndef make_map(env, scene):\n    vacancy_map = np.zeros([map_size, map_size], dtype=bool)\n    for i in tqdm(range(map_size)):\n        for j in range(map_size):\n            x = (i - map_size // 2) * map_resolution\n            z = (j - map_size // 2) * map_resolution\n            vacancy_map[j, i] = env.sim.is_navigable([x, 0.0, z])\n\n    np.save(\n        os.path.join(HABITAT_DATA_BASE, \"map_data/pointnav/v1/gibson/data/\" + scene),\n        vacancy_map,\n    )\n\n\ndef generate_maps():\n    config = get_habitat_config(\n        os.path.join(HABITAT_CONFIGS_DIR, \"tasks/pointnav.yaml\")\n    )\n    config.defrost()\n    config.DATASET.DATA_PATH = os.path.join(\n        HABITAT_DATASETS_DIR, \"pointnav/gibson/v1/train/train.json.gz\"\n    )\n    config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR\n    config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = 0\n    config.freeze()\n\n    dataset = habitat.make_dataset(config.DATASET.TYPE)\n    scenes = dataset.get_scenes_to_load(config.DATASET)\n\n    for scene in scenes:\n        print(\"Making environment for:\", scene)\n        config.defrost()\n        config.DATASET.CONTENT_SCENES = [scene]\n        config.freeze()\n        env = habitat.Env(config=config)\n        make_map(env, scene)\n        env.close()\n\n\nif __name__ == \"__main__\":\n    generate_maps()\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/__init__.py",
    "content": "from allenact.utils.system import ImportChecker\n\nwith ImportChecker(\n    \"Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`).\"\n):\n    # noinspection PyUnresolvedReferences\n    import ai2thor\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/extra_environment.yml",
    "content": "channels:\n  - defaults\n  - conda-forge\ndependencies:\n  - ai2thor>=2.5.3\n  - numba\n  - pip\n  - colour\n  - packaging\n  - pip:\n      - numpy-quaternion\n      - pyquaternion>=0.9.9\n      - python-xlib\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/extra_requirements.txt",
    "content": "ai2thor>=2.5.3\nnumpy-quaternion\npyquaternion>=0.9.9\ncolour\nnumba\npackaging\npython-xlib\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/ithor_constants.py",
    "content": "\"\"\"Common constants used when training agents to complete tasks in iTHOR, the\ninteractive version of AI2-THOR.\"\"\"\n\nfrom collections import OrderedDict\nfrom typing import Set, Dict\n\nMOVE_AHEAD = \"MoveAhead\"\nROTATE_LEFT = \"RotateLeft\"\nROTATE_RIGHT = \"RotateRight\"\nLOOK_DOWN = \"LookDown\"\nLOOK_UP = \"LookUp\"\nEND = \"End\"\n\nVISIBILITY_DISTANCE = 1.25\nFOV = 90.0\n\nORDERED_SCENE_TYPES = (\"kitchens\", \"livingrooms\", \"bedrooms\", \"bathrooms\")\n\nNUM_SCENE_TYPES = len(ORDERED_SCENE_TYPES)\n\n\ndef make_scene_name(type_ind, scene_num):\n    if type_ind == 1:\n        return \"FloorPlan\" + str(scene_num) + \"_physics\"\n    elif scene_num < 10:\n        return \"FloorPlan\" + str(type_ind) + \"0\" + str(scene_num) + \"_physics\"\n    else:\n        return \"FloorPlan\" + str(type_ind) + str(scene_num) + \"_physics\"\n\n\nSCENES_TYPE_TO_SCENE_NAMES = OrderedDict(\n    [\n        (\n            ORDERED_SCENE_TYPES[type_ind - 1],\n            tuple(\n                make_scene_name(type_ind=type_ind, scene_num=scene_num)\n                for scene_num in range(1, 31)\n            ),\n        )\n        for type_ind in range(1, NUM_SCENE_TYPES + 1)\n    ]\n)\n\nSCENES_TYPE_TO_TRAIN_SCENE_NAMES = OrderedDict(\n    (key, scenes[:20]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items()\n)\n\nSCENES_TYPE_TO_VALID_SCENE_NAMES = OrderedDict(\n    (key, scenes[20:25]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items()\n)\n\nSCENES_TYPE_TO_TEST_SCENE_NAMES = OrderedDict(\n    (key, scenes[25:30]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items()\n)\n\nALL_SCENE_NAMES = sum(SCENES_TYPE_TO_SCENE_NAMES.values(), tuple())\n\nTRAIN_SCENE_NAMES = sum(\n    (scenes for scenes in SCENES_TYPE_TO_TRAIN_SCENE_NAMES.values()), tuple()\n)\n\nVALID_SCENE_NAMES = sum(\n    (scenes for scenes in SCENES_TYPE_TO_VALID_SCENE_NAMES.values()), tuple()\n)\nTEST_SCENE_NAMES = sum(\n    (scenes for scenes in SCENES_TYPE_TO_TEST_SCENE_NAMES.values()), tuple()\n)\n\nTRAIN_SCENE_NAMES_SET = set(TRAIN_SCENE_NAMES)\nVALID_SCENE_NAMES_SET = set(VALID_SCENE_NAMES)\nTEST_SCENE_NAMES_SET = set(TEST_SCENE_NAMES)\n\n_object_type_and_location_tsv = \"\"\"\nAlarmClock\tbedrooms\nApple\tkitchens\nArmChair\tlivingrooms,bedrooms\nBaseballBat\tbedrooms\nBasketBall\tbedrooms\nBathtub\tbathrooms\nBathtubBasin\tbathrooms\nBed\tbedrooms\nBlinds\tkitchens,bedrooms\nBook\tkitchens,livingrooms,bedrooms\nBoots\tlivingrooms,bedrooms\nBottle\tkitchens\nBowl\tkitchens,livingrooms,bedrooms\nBox\tlivingrooms,bedrooms\nBread\tkitchens\nButterKnife\tkitchens\nCabinet\tkitchens,livingrooms,bedrooms,bathrooms\nCandle\tlivingrooms,bathrooms\nCart\tbathrooms\nCD\tbedrooms\nCellPhone\tkitchens,livingrooms,bedrooms\nChair\tkitchens,livingrooms,bedrooms\nCloth\tbedrooms,bathrooms\nCoffeeMachine\tkitchens\nCoffeeTable\tlivingrooms,bedrooms\nCounterTop\tkitchens,livingrooms,bedrooms,bathrooms\nCreditCard\tkitchens,livingrooms,bedrooms\nCup\tkitchens\nCurtains\tkitchens,livingrooms,bedrooms\nDesk\tbedrooms\nDeskLamp\tlivingrooms,bedrooms\nDiningTable\tkitchens,livingrooms,bedrooms\nDishSponge\tkitchens,bathrooms\nDrawer\tkitchens,livingrooms,bedrooms,bathrooms\nDresser\tlivingrooms,bedrooms,bathrooms\nEgg\tkitchens\nFaucet\tkitchens,bathrooms\nFloorLamp\tlivingrooms,bedrooms\nFootstool\tbedrooms\nFork\tkitchens\nFridge\tkitchens\nGarbageCan\tkitchens,livingrooms,bedrooms,bathrooms\nHandTowel\tbathrooms\nHandTowelHolder\tbathrooms\nHousePlant\tkitchens,livingrooms,bedrooms,bathrooms\nKettle\tkitchens\nKeyChain\tlivingrooms,bedrooms\nKnife\tkitchens\nLadle\tkitchens\nLaptop\tkitchens,livingrooms,bedrooms\nLaundryHamper\tbedrooms\nLaundryHamperLid\tbedrooms\nLettuce\tkitchens\nLightSwitch\tkitchens,livingrooms,bedrooms,bathrooms\nMicrowave\tkitchens\nMirror\tkitchens,livingrooms,bedrooms,bathrooms\nMug\tkitchens,bedrooms\nNewspaper\tlivingrooms\nOttoman\tlivingrooms,bedrooms\nPainting\tkitchens,livingrooms,bedrooms,bathrooms\nPan\tkitchens\nPaperTowel\tkitchens,bathrooms\nPen\tkitchens,livingrooms,bedrooms\nPencil\tkitchens,livingrooms,bedrooms\nPepperShaker\tkitchens\nPillow\tlivingrooms,bedrooms\nPlate\tkitchens,livingrooms\nPlunger\tbathrooms\nPoster\tbedrooms\nPot\tkitchens\nPotato\tkitchens\nRemoteControl\tlivingrooms,bedrooms\nSafe\tkitchens,livingrooms,bedrooms\nSaltShaker\tkitchens\nScrubBrush\tbathrooms\nShelf\tkitchens,livingrooms,bedrooms,bathrooms\nShowerCurtain\tbathrooms\nShowerDoor\tbathrooms\nShowerGlass\tbathrooms\nShowerHead\tbathrooms\nSideTable\tlivingrooms,bedrooms\nSink\tkitchens,bathrooms\nSinkBasin\tkitchens,bathrooms\nSoapBar\tbathrooms\nSoapBottle\tkitchens,bathrooms\nSofa\tlivingrooms,bedrooms\nSpatula\tkitchens\nSpoon\tkitchens\nSprayBottle\tbathrooms\nStatue\tkitchens,livingrooms,bedrooms\nStoveBurner\tkitchens\nStoveKnob\tkitchens\nTeddyBear\tbedrooms\nTelevision\tlivingrooms,bedrooms\nTennisRacket\tbedrooms\nTissueBox\tlivingrooms,bedrooms,bathrooms\nToaster\tkitchens\nToilet\tbathrooms\nToiletPaper\tbathrooms\nToiletPaperHanger\tbathrooms\nTomato\tkitchens\nTowel\tbathrooms\nTowelHolder\tbathrooms\nTVStand\tlivingrooms\nVase\tkitchens,livingrooms,bedrooms\nWatch\tlivingrooms,bedrooms\nWateringCan\tlivingrooms\nWindow\tkitchens,livingrooms,bedrooms,bathrooms\nWineBottle\tkitchens\n\"\"\"\n\nOBJECT_TYPE_TO_SCENE_TYPES = OrderedDict()\nfor ot_tab_scene_types in _object_type_and_location_tsv.split(\"\\n\"):\n    if ot_tab_scene_types != \"\":\n        ot, scene_types_csv = ot_tab_scene_types.split(\"\\t\")\n        OBJECT_TYPE_TO_SCENE_TYPES[ot] = tuple(sorted(scene_types_csv.split(\",\")))\n\nSCENE_TYPE_TO_OBJECT_TYPES: Dict[str, Set[str]] = OrderedDict(\n    ((k, set()) for k in ORDERED_SCENE_TYPES)\n)\nfor ot_tab_scene_types in _object_type_and_location_tsv.split(\"\\n\"):\n    if ot_tab_scene_types != \"\":\n        ot, scene_types_csv = ot_tab_scene_types.split(\"\\t\")\n        for scene_type in scene_types_csv.split(\",\"):\n            SCENE_TYPE_TO_OBJECT_TYPES[scene_type].add(ot)\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/ithor_environment.py",
    "content": "\"\"\"A wrapper for engaging with the THOR environment.\"\"\"\n\nimport copy\nimport functools\nimport math\nimport random\nfrom typing import Tuple, Dict, List, Set, Union, Any, Optional, Mapping, cast\n\nimport ai2thor.server\nimport networkx as nx\nimport numpy as np\nfrom ai2thor.controller import Controller\nfrom scipy.spatial.transform import Rotation\n\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV\nfrom allenact_plugins.ithor_plugin.ithor_util import round_to_factor\n\n\nclass IThorEnvironment(object):\n    \"\"\"Wrapper for the ai2thor controller providing additional functionality\n    and bookkeeping.\n\n    See [here](https://ai2thor.allenai.org/documentation/installation) for comprehensive\n     documentation on AI2-THOR.\n\n    # Attributes\n\n    controller : The ai2thor controller.\n    \"\"\"\n\n    def __init__(\n        self,\n        x_display: Optional[str] = None,\n        docker_enabled: bool = False,\n        local_thor_build: Optional[str] = None,\n        visibility_distance: float = VISIBILITY_DISTANCE,\n        fov: float = FOV,\n        player_screen_width: int = 300,\n        player_screen_height: int = 300,\n        quality: str = \"Very Low\",\n        restrict_to_initially_reachable_points: bool = False,\n        make_agents_visible: bool = True,\n        object_open_speed: float = 1.0,\n        simplify_physics: bool = False,\n    ) -> None:\n        \"\"\"Initializer.\n\n        # Parameters\n\n        x_display : The x display into which to launch ai2thor (possibly necessarily if you are running on a server\n            without an attached display).\n        docker_enabled : Whether or not to run thor in a docker container (useful on a server without an attached\n            display so that you don't have to start an x display).\n        local_thor_build : The path to a local build of ai2thor. This is probably not necessary for your use case\n            and can be safely ignored.\n        visibility_distance : The distance (in meters) at which objects, in the viewport of the agent,\n            are considered visible by ai2thor and will have their \"visible\" flag be set to `True` in the metadata.\n        fov : The agent's camera's field of view.\n        player_screen_width : The width resolution (in pixels) of the images returned by ai2thor.\n        player_screen_height : The height resolution (in pixels) of the images returned by ai2thor.\n        quality : The quality at which to render. Possible quality settings can be found in\n            `ai2thor._quality_settings.QUALITY_SETTINGS`.\n        restrict_to_initially_reachable_points : Whether or not to restrict the agent to locations in ai2thor\n            that were found to be (initially) reachable by the agent (i.e. reachable by the agent after resetting\n            the scene). This can be useful if you want to ensure there are only a fixed set of locations where the\n            agent can go.\n        make_agents_visible : Whether or not the agent should be visible. Most noticable when there are multiple agents\n            or when quality settings are high so that the agent casts a shadow.\n        object_open_speed : How quickly objects should be opened. High speeds mean faster simulation but also mean\n            that opening objects have a lot of kinetic energy and can, possibly, knock other objects away.\n        simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object\n            interactions when opening drawers (when simplified, objects within a drawer do not slide around on\n            their own when the drawer is opened or closed, instead they are effectively glued down).\n        \"\"\"\n\n        self._start_player_screen_width = player_screen_width\n        self._start_player_screen_height = player_screen_height\n        self._local_thor_build = local_thor_build\n        self.x_display = x_display\n        self.controller: Optional[Controller] = None\n        self._started = False\n        self._quality = quality\n\n        self._initially_reachable_points: Optional[List[Dict]] = None\n        self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None\n        self._move_mag: Optional[float] = None\n        self._grid_size: Optional[float] = None\n        self._visibility_distance = visibility_distance\n        self._fov = fov\n        self.restrict_to_initially_reachable_points = (\n            restrict_to_initially_reachable_points\n        )\n        self.make_agents_visible = make_agents_visible\n        self.object_open_speed = object_open_speed\n        self._always_return_visible_range = False\n        self.simplify_physics = simplify_physics\n\n        self.start(None)\n        # noinspection PyTypeHints\n        self.controller.docker_enabled = docker_enabled  # type: ignore\n\n    @property\n    def scene_name(self) -> str:\n        \"\"\"Current ai2thor scene.\"\"\"\n        return self.controller.last_event.metadata[\"sceneName\"]\n\n    @property\n    def current_frame(self) -> np.ndarray:\n        \"\"\"Returns rgb image corresponding to the agent's egocentric view.\"\"\"\n        return self.controller.last_event.frame\n\n    @property\n    def last_event(self) -> ai2thor.server.Event:\n        \"\"\"Last event returned by the controller.\"\"\"\n        return self.controller.last_event\n\n    @property\n    def started(self) -> bool:\n        \"\"\"Has the ai2thor controller been started.\"\"\"\n        return self._started\n\n    @property\n    def last_action(self) -> str:\n        \"\"\"Last action, as a string, taken by the agent.\"\"\"\n        return self.controller.last_event.metadata[\"lastAction\"]\n\n    @last_action.setter\n    def last_action(self, value: str) -> None:\n        \"\"\"Set the last action taken by the agent.\n\n        Doing this is rewriting history, be careful.\n        \"\"\"\n        self.controller.last_event.metadata[\"lastAction\"] = value\n\n    @property\n    def last_action_success(self) -> bool:\n        \"\"\"Was the last action taken by the agent a success?\"\"\"\n        return self.controller.last_event.metadata[\"lastActionSuccess\"]\n\n    @last_action_success.setter\n    def last_action_success(self, value: bool) -> None:\n        \"\"\"Set whether or not the last action taken by the agent was a success.\n\n        Doing this is rewriting history, be careful.\n        \"\"\"\n        self.controller.last_event.metadata[\"lastActionSuccess\"] = value\n\n    @property\n    def last_action_return(self) -> Any:\n        \"\"\"Get the value returned by the last action (if applicable).\n\n        For an example of an action that returns a value, see\n        `\"GetReachablePositions\"`.\n        \"\"\"\n        return self.controller.last_event.metadata[\"actionReturn\"]\n\n    @last_action_return.setter\n    def last_action_return(self, value: Any) -> None:\n        \"\"\"Set the value returned by the last action.\n\n        Doing this is rewriting history, be careful.\n        \"\"\"\n        self.controller.last_event.metadata[\"actionReturn\"] = value\n\n    def start(\n        self,\n        scene_name: Optional[str],\n        move_mag: float = 0.25,\n        **kwargs,\n    ) -> None:\n        \"\"\"Starts the ai2thor controller if it was previously stopped.\n\n        After starting, `reset` will be called with the scene name and move magnitude.\n\n        # Parameters\n\n        scene_name : The scene to load.\n        move_mag : The amount of distance the agent moves in a single `MoveAhead` step.\n        kwargs : additional kwargs, passed to reset.\n        \"\"\"\n        if self._started:\n            raise RuntimeError(\n                \"Trying to start the environment but it is already started.\"\n            )\n\n        # noinspection PyUnresolvedReferences\n        self.controller = Controller(\n            x_display=self.x_display,\n            width=self._start_player_screen_width,\n            height=self._start_player_screen_height,\n            local_executable_path=self._local_thor_build,\n            quality=self._quality,\n            server_class=ai2thor.fifo_server.FifoServer,\n        )\n\n        if (\n            self._start_player_screen_height,\n            self._start_player_screen_width,\n        ) != self.current_frame.shape[:2]:\n            self.controller.step(\n                {\n                    \"action\": \"ChangeResolution\",\n                    \"x\": self._start_player_screen_width,\n                    \"y\": self._start_player_screen_height,\n                }\n            )\n\n        self._started = True\n        self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs)\n\n    def stop(self) -> None:\n        \"\"\"Stops the ai2thor controller.\"\"\"\n        try:\n            self.controller.stop()\n        except Exception as e:\n            get_logger().warning(str(e))\n        finally:\n            self._started = False\n\n    def reset(\n        self,\n        scene_name: Optional[str],\n        move_mag: float = 0.25,\n        **kwargs,\n    ):\n        \"\"\"Resets the ai2thor in a new scene.\n\n        Resets ai2thor into a new scene and initializes the scene/agents with\n        prespecified settings (e.g. move magnitude).\n\n        # Parameters\n\n        scene_name : The scene to load.\n        move_mag : The amount of distance the agent moves in a single `MoveAhead` step.\n        kwargs : additional kwargs, passed to the controller \"Initialize\" action.\n        \"\"\"\n        self._move_mag = move_mag\n        self._grid_size = self._move_mag\n\n        if scene_name is None:\n            scene_name = self.controller.last_event.metadata[\"sceneName\"]\n        self.controller.reset(scene_name)\n\n        self.controller.step(\n            {\n                \"action\": \"Initialize\",\n                \"gridSize\": self._grid_size,\n                \"visibilityDistance\": self._visibility_distance,\n                \"fieldOfView\": self._fov,\n                \"makeAgentsVisible\": self.make_agents_visible,\n                \"alwaysReturnVisibleRange\": self._always_return_visible_range,\n                **kwargs,\n            }\n        )\n\n        if self.object_open_speed != 1.0:\n            self.controller.step(\n                {\"action\": \"ChangeOpenSpeed\", \"x\": self.object_open_speed}\n            )\n\n        self._initially_reachable_points = None\n        self._initially_reachable_points_set = None\n        self.controller.step({\"action\": \"GetReachablePositions\"})\n        if not self.controller.last_event.metadata[\"lastActionSuccess\"]:\n            get_logger().warning(\n                \"Error when getting reachable points: {}\".format(\n                    self.controller.last_event.metadata[\"errorMessage\"]\n                )\n            )\n        self._initially_reachable_points = self.last_action_return\n\n    def teleport_agent_to(\n        self,\n        x: float,\n        y: float,\n        z: float,\n        rotation: float,\n        horizon: float,\n        standing: Optional[bool] = None,\n        force_action: bool = False,\n        only_initially_reachable: Optional[bool] = None,\n        verbose=True,\n        ignore_y_diffs=False,\n    ) -> None:\n        \"\"\"Helper function teleporting the agent to a given location.\"\"\"\n        if standing is None:\n            standing = self.last_event.metadata.get(\n                \"isStanding\", self.last_event.metadata[\"agent\"].get(\"isStanding\")\n            )\n        original_location = self.get_agent_location()\n        target = {\"x\": x, \"y\": y, \"z\": z}\n        if only_initially_reachable is None:\n            only_initially_reachable = self.restrict_to_initially_reachable_points\n        if only_initially_reachable:\n            reachable_points = self.initially_reachable_points\n            reachable = False\n            for p in reachable_points:\n                if self.position_dist(target, p, ignore_y=ignore_y_diffs) < 0.01:\n                    reachable = True\n                    break\n            if not reachable:\n                self.last_action = \"TeleportFull\"\n                self.last_event.metadata[\"errorMessage\"] = (\n                    \"Target position was not initially reachable.\"\n                )\n                self.last_action_success = False\n                return\n        self.controller.step(\n            dict(\n                action=\"TeleportFull\",\n                x=x,\n                y=y,\n                z=z,\n                rotation={\"x\": 0.0, \"y\": rotation, \"z\": 0.0},\n                horizon=horizon,\n                standing=standing,\n                forceAction=force_action,\n            )\n        )\n        if not self.last_action_success:\n            agent_location = self.get_agent_location()\n            rot_diff = (\n                agent_location[\"rotation\"] - original_location[\"rotation\"]\n            ) % 360\n            new_old_dist = self.position_dist(\n                original_location, agent_location, ignore_y=ignore_y_diffs\n            )\n            if (\n                self.position_dist(\n                    original_location, agent_location, ignore_y=ignore_y_diffs\n                )\n                > 1e-2\n                or min(rot_diff, 360 - rot_diff) > 1\n            ):\n                get_logger().warning(\n                    \"Teleportation FAILED but agent still moved (position_dist {}, rot diff {})\"\n                    \" (\\nprevious location\\n{}\\ncurrent_location\\n{}\\n)\".format(\n                        new_old_dist, rot_diff, original_location, agent_location\n                    )\n                )\n            return\n\n        if force_action:\n            assert self.last_action_success\n            return\n\n        agent_location = self.get_agent_location()\n        rot_diff = (agent_location[\"rotation\"] - rotation) % 360\n        if (\n            self.position_dist(agent_location, target, ignore_y=ignore_y_diffs) > 1e-2\n            or min(rot_diff, 360 - rot_diff) > 1\n        ):\n            if only_initially_reachable:\n                self._snap_agent_to_initially_reachable(verbose=False)\n            if verbose:\n                get_logger().warning(\n                    \"Teleportation did not place agent\"\n                    \" precisely where desired in scene {}\"\n                    \" (\\ndesired\\n{}\\nactual\\n{}\\n)\"\n                    \" perhaps due to grid snapping.\"\n                    \" Action is considered failed but agent may have moved.\".format(\n                        self.scene_name,\n                        {\n                            \"x\": x,\n                            \"y\": y,\n                            \"z\": z,\n                            \"rotation\": rotation,\n                            \"standing\": standing,\n                            \"horizon\": horizon,\n                        },\n                        agent_location,\n                    )\n                )\n            self.last_action_success = False\n        return\n\n    def random_reachable_state(self, seed: int = None) -> Dict:\n        \"\"\"Returns a random reachable location in the scene.\"\"\"\n        if seed is not None:\n            random.seed(seed)\n        xyz = random.choice(self.currently_reachable_points)\n        rotation = random.choice([0, 90, 180, 270])\n        horizon = random.choice([0, 30, 60, 330])\n        state = copy.copy(xyz)\n        state[\"rotation\"] = rotation\n        state[\"horizon\"] = horizon\n        return state\n\n    def randomize_agent_location(\n        self, seed: int = None, partial_position: Optional[Dict[str, float]] = None\n    ) -> Dict:\n        \"\"\"Teleports the agent to a random reachable location in the scene.\"\"\"\n        if partial_position is None:\n            partial_position = {}\n        k = 0\n        state: Optional[Dict] = None\n\n        while k == 0 or (not self.last_action_success and k < 10):\n            state = self.random_reachable_state(seed=seed)\n            self.teleport_agent_to(**{**state, **partial_position})\n            k += 1\n\n        if not self.last_action_success:\n            get_logger().warning(\n                (\n                    \"Randomize agent location in scene {}\"\n                    \" with seed {} and partial position {} failed in \"\n                    \"10 attempts. Forcing the action.\"\n                ).format(self.scene_name, seed, partial_position)\n            )\n            self.teleport_agent_to(**{**state, **partial_position}, force_action=True)  # type: ignore\n            assert self.last_action_success\n\n        assert state is not None\n        return state\n\n    def object_pixels_in_frame(\n        self, object_id: str, hide_all: bool = True, hide_transparent: bool = False\n    ) -> np.ndarray:\n        \"\"\"Return an mask for a given object in the agent's current view.\n\n        # Parameters\n\n        object_id : The id of the object.\n        hide_all : Whether or not to hide all other objects in the scene before getting the mask.\n        hide_transparent : Whether or not partially transparent objects are considered to occlude the object.\n\n        # Returns\n\n        A numpy array of the mask.\n        \"\"\"\n\n        # Emphasizing an object turns it magenta and hides all other objects\n        # from view, we can find where the hand object is on the screen by\n        # emphasizing it and then scanning across the image for the magenta pixels.\n        if hide_all:\n            self.step({\"action\": \"EmphasizeObject\", \"objectId\": object_id})\n        else:\n            self.step({\"action\": \"MaskObject\", \"objectId\": object_id})\n            if hide_transparent:\n                self.step({\"action\": \"HideTranslucentObjects\"})\n        # noinspection PyShadowingBuiltins\n        filter = np.array([[[255, 0, 255]]])\n        object_pixels = 1 * np.all(self.current_frame == filter, axis=2)\n        if hide_all:\n            self.step({\"action\": \"UnemphasizeAll\"})\n        else:\n            self.step({\"action\": \"UnmaskObject\", \"objectId\": object_id})\n            if hide_transparent:\n                self.step({\"action\": \"UnhideAllObjects\"})\n        return object_pixels\n\n    def object_pixels_on_grid(\n        self,\n        object_id: str,\n        grid_shape: Tuple[int, int],\n        hide_all: bool = True,\n        hide_transparent: bool = False,\n    ) -> np.ndarray:\n        \"\"\"Like `object_pixels_in_frame` but counts object pixels in a\n        partitioning of the image.\"\"\"\n\n        def partition(n, num_parts):\n            m = n // num_parts\n            parts = [m] * num_parts\n            num_extra = n % num_parts\n            for k in range(num_extra):\n                parts[k] += 1\n            return parts\n\n        object_pixels = self.object_pixels_in_frame(\n            object_id=object_id, hide_all=hide_all, hide_transparent=hide_transparent\n        )\n\n        # Divide the current frame into a grid and count the number\n        # of hand object pixels in each of the grid squares\n        sums_in_blocks: List[List] = []\n        frame_shape = self.current_frame.shape[:2]\n        row_inds = np.cumsum([0] + partition(frame_shape[0], grid_shape[0]))\n        col_inds = np.cumsum([0] + partition(frame_shape[1], grid_shape[1]))\n        for i in range(len(row_inds) - 1):\n            sums_in_blocks.append([])\n            for j in range(len(col_inds) - 1):\n                sums_in_blocks[i].append(\n                    np.sum(\n                        object_pixels[\n                            row_inds[i] : row_inds[i + 1], col_inds[j] : col_inds[j + 1]\n                        ]\n                    )\n                )\n        return np.array(sums_in_blocks, dtype=np.float32)\n\n    def object_in_hand(self):\n        \"\"\"Object metadata for the object in the agent's hand.\"\"\"\n        inv_objs = self.last_event.metadata[\"inventoryObjects\"]\n        if len(inv_objs) == 0:\n            return None\n        elif len(inv_objs) == 1:\n            return self.get_object_by_id(\n                self.last_event.metadata[\"inventoryObjects\"][0][\"objectId\"]\n            )\n        else:\n            raise AttributeError(\"Must be <= 1 inventory objects.\")\n\n    @property\n    def initially_reachable_points(self) -> List[Dict[str, float]]:\n        \"\"\"List of {\"x\": x, \"y\": y, \"z\": z} locations in the scene that were\n        reachable after initially resetting.\"\"\"\n        assert self._initially_reachable_points is not None\n        return copy.deepcopy(self._initially_reachable_points)  # type:ignore\n\n    @property\n    def initially_reachable_points_set(self) -> Set[Tuple[float, float]]:\n        \"\"\"Set of (x,z) locations in the scene that were reachable after\n        initially resetting.\"\"\"\n        if self._initially_reachable_points_set is None:\n            self._initially_reachable_points_set = set()\n            for p in self.initially_reachable_points:\n                self._initially_reachable_points_set.add(\n                    self._agent_location_to_tuple(p)\n                )\n\n        return self._initially_reachable_points_set\n\n    @property\n    def currently_reachable_points(self) -> List[Dict[str, float]]:\n        \"\"\"List of {\"x\": x, \"y\": y, \"z\": z} locations in the scene that are\n        currently reachable.\"\"\"\n        self.step({\"action\": \"GetReachablePositions\"})\n        return self.last_event.metadata[\"actionReturn\"]  # type:ignore\n\n    def get_agent_location(self) -> Dict[str, Union[float, bool]]:\n        \"\"\"Gets agent's location.\"\"\"\n        metadata = self.controller.last_event.metadata\n        location = {\n            \"x\": metadata[\"agent\"][\"position\"][\"x\"],\n            \"y\": metadata[\"agent\"][\"position\"][\"y\"],\n            \"z\": metadata[\"agent\"][\"position\"][\"z\"],\n            \"rotation\": metadata[\"agent\"][\"rotation\"][\"y\"],\n            \"horizon\": metadata[\"agent\"][\"cameraHorizon\"],\n            \"standing\": metadata.get(\"isStanding\", metadata[\"agent\"].get(\"isStanding\")),\n        }\n        return location\n\n    @staticmethod\n    def _agent_location_to_tuple(p: Dict[str, float]) -> Tuple[float, float]:\n        return round(p[\"x\"], 2), round(p[\"z\"], 2)\n\n    def _snap_agent_to_initially_reachable(self, verbose=True):\n        agent_location = self.get_agent_location()\n\n        end_location_tuple = self._agent_location_to_tuple(agent_location)\n        if end_location_tuple in self.initially_reachable_points_set:\n            return\n\n        agent_x = agent_location[\"x\"]\n        agent_z = agent_location[\"z\"]\n\n        closest_reachable_points = list(self.initially_reachable_points_set)\n        closest_reachable_points = sorted(\n            closest_reachable_points,\n            key=lambda xz: abs(xz[0] - agent_x) + abs(xz[1] - agent_z),\n        )\n\n        # In rare cases end_location_tuple might be not considered to be in self.initially_reachable_points_set\n        # even when it is, here we check for such cases.\n        if (\n            math.sqrt(\n                (\n                    (\n                        np.array(closest_reachable_points[0])\n                        - np.array(end_location_tuple)\n                    )\n                    ** 2\n                ).sum()\n            )\n            < 1e-6\n        ):\n            return\n\n        saved_last_action = self.last_action\n        saved_last_action_success = self.last_action_success\n        saved_last_action_return = self.last_action_return\n        saved_error_message = self.last_event.metadata[\"errorMessage\"]\n\n        # Thor behaves weirdly when the agent gets off of the grid and you\n        # try to teleport the agent back to the closest grid location. To\n        # get around this we first teleport the agent to random location\n        # and then back to where it should be.\n        for point in self.initially_reachable_points:\n            if abs(agent_x - point[\"x\"]) > 0.1 or abs(agent_z - point[\"z\"]) > 0.1:\n                self.teleport_agent_to(\n                    rotation=0,\n                    horizon=30,\n                    **point,\n                    only_initially_reachable=False,\n                    verbose=False,\n                )\n                if self.last_action_success:\n                    break\n\n        for p in closest_reachable_points:\n            self.teleport_agent_to(\n                **{**agent_location, \"x\": p[0], \"z\": p[1]},\n                only_initially_reachable=False,\n                verbose=False,\n            )\n            if self.last_action_success:\n                break\n\n        teleport_forced = False\n        if not self.last_action_success:\n            self.teleport_agent_to(\n                **{\n                    **agent_location,\n                    \"x\": closest_reachable_points[0][0],\n                    \"z\": closest_reachable_points[0][1],\n                },\n                force_action=True,\n                only_initially_reachable=False,\n                verbose=False,\n            )\n            teleport_forced = True\n\n        self.last_action = saved_last_action\n        self.last_action_success = saved_last_action_success\n        self.last_action_return = saved_last_action_return\n        self.last_event.metadata[\"errorMessage\"] = saved_error_message\n        new_agent_location = self.get_agent_location()\n        if verbose:\n            get_logger().warning(\n                (\n                    \"In {}, at location (x,z)=({},{}) which is not in the set \"\n                    \"of initially reachable points;\"\n                    \" attempting to correct this: agent teleported to (x,z)=({},{}).\\n\"\n                    \"Teleportation {} forced.\"\n                ).format(\n                    self.scene_name,\n                    agent_x,\n                    agent_z,\n                    new_agent_location[\"x\"],\n                    new_agent_location[\"z\"],\n                    \"was\" if teleport_forced else \"wasn't\",\n                )\n            )\n\n    def step(\n        self,\n        action_dict: Optional[Dict[str, Union[str, int, float, Dict]]] = None,\n        **kwargs: Union[str, int, float, Dict],\n    ) -> ai2thor.server.Event:\n        \"\"\"Take a step in the ai2thor environment.\"\"\"\n        if action_dict is None:\n            action_dict = dict()\n        action_dict.update(kwargs)\n\n        action = cast(str, action_dict[\"action\"])\n\n        skip_render = \"renderImage\" in action_dict and not action_dict[\"renderImage\"]\n        last_frame: Optional[np.ndarray] = None\n        if skip_render:\n            last_frame = self.current_frame\n\n        if self.simplify_physics:\n            action_dict[\"simplifyPhysics\"] = True\n\n        if \"Move\" in action and \"Hand\" not in action:  # type: ignore\n            action_dict = {\n                **action_dict,\n                \"moveMagnitude\": self._move_mag,\n            }  # type: ignore\n            start_location = self.get_agent_location()\n            sr = self.controller.step(action_dict)\n\n            if self.restrict_to_initially_reachable_points:\n                end_location_tuple = self._agent_location_to_tuple(\n                    self.get_agent_location()\n                )\n                if end_location_tuple not in self.initially_reachable_points_set:\n                    self.teleport_agent_to(**start_location, force_action=True)  # type: ignore\n                    self.last_action = action\n                    self.last_action_success = False\n                    self.last_event.metadata[\"errorMessage\"] = (\n                        \"Moved to location outside of initially reachable points.\"\n                    )\n        elif \"RandomizeHideSeekObjects\" in action:\n            last_position = self.get_agent_location()\n            self.controller.step(action_dict)\n            metadata = self.last_event.metadata\n            if self.position_dist(last_position, self.get_agent_location()) > 0.001:\n                self.teleport_agent_to(**last_position, force_action=True)  # type: ignore\n                get_logger().warning(\n                    \"In scene {}, after randomization of hide and seek objects, agent moved.\".format(\n                        self.scene_name\n                    )\n                )\n\n            sr = self.controller.step({\"action\": \"GetReachablePositions\"})\n            self._initially_reachable_points = self.controller.last_event.metadata[\n                \"actionReturn\"\n            ]\n            self._initially_reachable_points_set = None\n            self.last_action = action\n            self.last_action_success = metadata[\"lastActionSuccess\"]\n            self.controller.last_event.metadata[\"actionReturn\"] = []\n        elif \"RotateUniverse\" in action:\n            sr = self.controller.step(action_dict)\n            metadata = self.last_event.metadata\n\n            if metadata[\"lastActionSuccess\"]:\n                sr = self.controller.step({\"action\": \"GetReachablePositions\"})\n                self._initially_reachable_points = self.controller.last_event.metadata[\n                    \"actionReturn\"\n                ]\n                self._initially_reachable_points_set = None\n                self.last_action = action\n                self.last_action_success = metadata[\"lastActionSuccess\"]\n                self.controller.last_event.metadata[\"actionReturn\"] = []\n        else:\n            sr = self.controller.step(action_dict)\n\n        if self.restrict_to_initially_reachable_points:\n            self._snap_agent_to_initially_reachable()\n\n        if skip_render:\n            assert last_frame is not None\n            self.last_event.frame = last_frame\n\n        return sr\n\n    @staticmethod\n    def position_dist(\n        p0: Mapping[str, Any],\n        p1: Mapping[str, Any],\n        ignore_y: bool = False,\n        l1_dist: bool = False,\n    ) -> float:\n        \"\"\"Distance between two points of the form {\"x\": x, \"y\":y, \"z\":z\"}.\"\"\"\n        if l1_dist:\n            return (\n                abs(p0[\"x\"] - p1[\"x\"])\n                + (0 if ignore_y else abs(p0[\"y\"] - p1[\"y\"]))\n                + abs(p0[\"z\"] - p1[\"z\"])\n            )\n        else:\n            return math.sqrt(\n                (p0[\"x\"] - p1[\"x\"]) ** 2\n                + (0 if ignore_y else (p0[\"y\"] - p1[\"y\"]) ** 2)\n                + (p0[\"z\"] - p1[\"z\"]) ** 2\n            )\n\n    @staticmethod\n    def rotation_dist(a: Dict[str, float], b: Dict[str, float]):\n        \"\"\"Distance between rotations.\"\"\"\n\n        def deg_dist(d0: float, d1: float):\n            dist = (d0 - d1) % 360\n            return min(dist, 360 - dist)\n\n        return sum(deg_dist(a[k], b[k]) for k in [\"x\", \"y\", \"z\"])\n\n    @staticmethod\n    def angle_between_rotations(a: Dict[str, float], b: Dict[str, float]):\n        return np.abs(\n            (180 / (2 * math.pi))\n            * (\n                Rotation.from_euler(\"xyz\", [a[k] for k in \"xyz\"], degrees=True)\n                * Rotation.from_euler(\"xyz\", [b[k] for k in \"xyz\"], degrees=True).inv()\n            ).as_rotvec()\n        ).sum()\n\n    def closest_object_with_properties(\n        self, properties: Dict[str, Any]\n    ) -> Optional[Dict[str, Any]]:\n        \"\"\"Find the object closest to the agent that has the given\n        properties.\"\"\"\n        agent_pos = self.controller.last_event.metadata[\"agent\"][\"position\"]\n        min_dist = float(\"inf\")\n        closest = None\n        for o in self.all_objects():\n            satisfies_all = True\n            for k, v in properties.items():\n                if o[k] != v:\n                    satisfies_all = False\n                    break\n            if satisfies_all:\n                d = self.position_dist(agent_pos, o[\"position\"])\n                if d < min_dist:\n                    min_dist = d\n                    closest = o\n        return closest\n\n    def closest_visible_object_of_type(\n        self, object_type: str\n    ) -> Optional[Dict[str, Any]]:\n        \"\"\"Find the object closest to the agent that is visible and has the\n        given type.\"\"\"\n        properties = {\"visible\": True, \"objectType\": object_type}\n        return self.closest_object_with_properties(properties)\n\n    def closest_object_of_type(self, object_type: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Find the object closest to the agent that has the given type.\"\"\"\n        properties = {\"objectType\": object_type}\n        return self.closest_object_with_properties(properties)\n\n    def closest_reachable_point_to_position(\n        self, position: Dict[str, float]\n    ) -> Tuple[Dict[str, float], float]:\n        \"\"\"Of all reachable positions, find the one that is closest to the\n        given location.\"\"\"\n        target = np.array([position[\"x\"], position[\"z\"]])\n        min_dist = float(\"inf\")\n        closest_point = None\n        for pt in self.initially_reachable_points:\n            dist = np.linalg.norm(target - np.array([pt[\"x\"], pt[\"z\"]]))\n            if dist < min_dist:\n                closest_point = pt\n                min_dist = dist\n                if min_dist < 1e-3:\n                    break\n        assert closest_point is not None\n        return closest_point, min_dist\n\n    @staticmethod\n    def _angle_from_to(a_from: float, a_to: float) -> float:\n        a_from = a_from % 360\n        a_to = a_to % 360\n        min_rot = min(a_from, a_to)\n        max_rot = max(a_from, a_to)\n        rot_across_0 = (360 - max_rot) + min_rot\n        rot_not_across_0 = max_rot - min_rot\n        rot_err = min(rot_across_0, rot_not_across_0)\n        if rot_across_0 == rot_err:\n            rot_err *= -1 if a_to > a_from else 1\n        else:\n            rot_err *= 1 if a_to > a_from else -1\n        return rot_err\n\n    def agent_xz_to_scene_xz(self, agent_xz: Dict[str, float]) -> Dict[str, float]:\n        agent_pos = self.get_agent_location()\n\n        x_rel_agent = agent_xz[\"x\"]\n        z_rel_agent = agent_xz[\"z\"]\n        scene_x = agent_pos[\"x\"]\n        scene_z = agent_pos[\"z\"]\n        rotation = agent_pos[\"rotation\"]\n        if abs(rotation) < 1e-5:\n            scene_x += x_rel_agent\n            scene_z += z_rel_agent\n        elif abs(rotation - 90) < 1e-5:\n            scene_x += z_rel_agent\n            scene_z += -x_rel_agent\n        elif abs(rotation - 180) < 1e-5:\n            scene_x += -x_rel_agent\n            scene_z += -z_rel_agent\n        elif abs(rotation - 270) < 1e-5:\n            scene_x += -z_rel_agent\n            scene_z += x_rel_agent\n        else:\n            raise Exception(\"Rotation must be one of 0, 90, 180, or 270.\")\n\n        return {\"x\": scene_x, \"z\": scene_z}\n\n    def scene_xz_to_agent_xz(self, scene_xz: Dict[str, float]) -> Dict[str, float]:\n        agent_pos = self.get_agent_location()\n        x_err = scene_xz[\"x\"] - agent_pos[\"x\"]\n        z_err = scene_xz[\"z\"] - agent_pos[\"z\"]\n\n        rotation = agent_pos[\"rotation\"]\n        if abs(rotation) < 1e-5:\n            agent_x = x_err\n            agent_z = z_err\n        elif abs(rotation - 90) < 1e-5:\n            agent_x = -z_err\n            agent_z = x_err\n        elif abs(rotation - 180) < 1e-5:\n            agent_x = -x_err\n            agent_z = -z_err\n        elif abs(rotation - 270) < 1e-5:\n            agent_x = z_err\n            agent_z = -x_err\n        else:\n            raise Exception(\"Rotation must be one of 0, 90, 180, or 270.\")\n\n        return {\"x\": agent_x, \"z\": agent_z}\n\n    def all_objects(self) -> List[Dict[str, Any]]:\n        \"\"\"Return all object metadata.\"\"\"\n        return self.controller.last_event.metadata[\"objects\"]\n\n    def all_objects_with_properties(\n        self, properties: Dict[str, Any]\n    ) -> List[Dict[str, Any]]:\n        \"\"\"Find all objects with the given properties.\"\"\"\n        objects = []\n        for o in self.all_objects():\n            satisfies_all = True\n            for k, v in properties.items():\n                if o[k] != v:\n                    satisfies_all = False\n                    break\n            if satisfies_all:\n                objects.append(o)\n        return objects\n\n    def visible_objects(self) -> List[Dict[str, Any]]:\n        \"\"\"Return all visible objects.\"\"\"\n        return self.all_objects_with_properties({\"visible\": True})\n\n    def get_object_by_id(self, object_id: str) -> Optional[Dict[str, Any]]:\n        for o in self.last_event.metadata[\"objects\"]:\n            if o[\"objectId\"] == object_id:\n                return o\n        return None\n\n    ###\n    # Following is used for computing shortest paths between states\n    ###\n    _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}\n\n    GRAPH_ACTIONS_SET = {\"LookUp\", \"LookDown\", \"RotateLeft\", \"RotateRight\", \"MoveAhead\"}\n\n    def reachable_points_with_rotations_and_horizons(self):\n        self.controller.step({\"action\": \"GetReachablePositions\"})\n        assert self.last_action_success\n\n        points_slim = self.last_event.metadata[\"actionReturn\"]\n\n        points = []\n        for r in [0, 90, 180, 270]:\n            for horizon in [-30, 0, 30, 60]:\n                for p in points_slim:\n                    p = copy.copy(p)\n                    p[\"rotation\"] = r\n                    p[\"horizon\"] = horizon\n                    points.append(p)\n        return points\n\n    @staticmethod\n    def location_for_key(key, y_value=0.0):\n        x, z, rot, hor = key\n        loc = dict(x=x, y=y_value, z=z, rotation=rot, horizon=hor)\n        return loc\n\n    @staticmethod\n    def get_key(input_dict: Dict[str, Any]) -> Tuple[float, float, int, int]:\n        if \"x\" in input_dict:\n            x = input_dict[\"x\"]\n            z = input_dict[\"z\"]\n            rot = input_dict[\"rotation\"]\n            hor = input_dict[\"horizon\"]\n        else:\n            x = input_dict[\"position\"][\"x\"]\n            z = input_dict[\"position\"][\"z\"]\n            rot = input_dict[\"rotation\"][\"y\"]\n            hor = input_dict[\"cameraHorizon\"]\n\n        return (\n            round(x, 2),\n            round(z, 2),\n            round_to_factor(rot, 90) % 360,\n            round_to_factor(hor, 30) % 360,\n        )\n\n    def update_graph_with_failed_action(self, failed_action: str):\n        if (\n            self.scene_name not in self._CACHED_GRAPHS\n            or failed_action not in self.GRAPH_ACTIONS_SET\n        ):\n            return\n\n        source_key = self.get_key(self.last_event.metadata[\"agent\"])\n        self._check_contains_key(source_key)\n\n        edge_dict = self.graph[source_key]\n        to_remove_key = None\n        for target_key in self.graph[source_key]:\n            if edge_dict[target_key][\"action\"] == failed_action:\n                to_remove_key = target_key\n                break\n        if to_remove_key is not None:\n            self.graph.remove_edge(source_key, to_remove_key)\n\n    def _add_from_to_edge(\n        self,\n        g: nx.DiGraph,\n        s: Tuple[float, float, int, int],\n        t: Tuple[float, float, int, int],\n    ):\n        def ae(x, y):\n            return abs(x - y) < 0.001\n\n        s_x, s_z, s_rot, s_hor = s\n        t_x, t_z, t_rot, t_hor = t\n\n        dist = round(math.sqrt((s_x - t_x) ** 2 + (s_z - t_z) ** 2), 2)\n        angle_dist = (round_to_factor(t_rot - s_rot, 90) % 360) // 90\n        horz_dist = (round_to_factor(t_hor - s_hor, 30) % 360) // 30\n\n        # If source and target differ by more than one action, continue\n        if sum(x != 0 for x in [dist, angle_dist, horz_dist]) != 1:\n            return\n\n        grid_size = self._grid_size\n        action = None\n        if angle_dist != 0:\n            if angle_dist == 1:\n                action = \"RotateRight\"\n            elif angle_dist == 3:\n                action = \"RotateLeft\"\n\n        elif horz_dist != 0:\n            if horz_dist == 11:\n                action = \"LookUp\"\n            elif horz_dist == 1:\n                action = \"LookDown\"\n        elif ae(dist, grid_size):\n            if (\n                (s_rot == 0 and ae(t_z - s_z, grid_size))\n                or (s_rot == 90 and ae(t_x - s_x, grid_size))\n                or (s_rot == 180 and ae(t_z - s_z, -grid_size))\n                or (s_rot == 270 and ae(t_x - s_x, -grid_size))\n            ):\n                g.add_edge(s, t, action=\"MoveAhead\")\n\n        if action is not None:\n            g.add_edge(s, t, action=action)\n\n    @functools.lru_cache(1)\n    def possible_neighbor_offsets(self) -> Tuple[Tuple[float, float, int, int], ...]:\n        grid_size = round(self._grid_size, 2)\n        offsets = []\n        for rot_diff in [-90, 0, 90]:\n            for horz_diff in [-30, 0, 30, 60]:\n                for x_diff in [-grid_size, 0, grid_size]:\n                    for z_diff in [-grid_size, 0, grid_size]:\n                        if (rot_diff != 0) + (horz_diff != 0) + (x_diff != 0) + (\n                            z_diff != 0\n                        ) == 1:\n                            offsets.append((x_diff, z_diff, rot_diff, horz_diff))\n        return tuple(offsets)\n\n    def _add_node_to_graph(self, graph: nx.DiGraph, s: Tuple[float, float, int, int]):\n        if s in graph:\n            return\n\n        existing_nodes = set(graph.nodes())\n        graph.add_node(s)\n\n        for o in self.possible_neighbor_offsets():\n            t = (s[0] + o[0], s[1] + o[1], s[2] + o[2], s[3] + o[3])\n            if t in existing_nodes:\n                self._add_from_to_edge(graph, s, t)\n                self._add_from_to_edge(graph, t, s)\n\n    @property\n    def graph(self):\n        if self.scene_name not in self._CACHED_GRAPHS:\n            g = nx.DiGraph()\n            points = self.reachable_points_with_rotations_and_horizons()\n            for p in points:\n                self._add_node_to_graph(g, self.get_key(p))\n\n            self._CACHED_GRAPHS[self.scene_name] = g\n        return self._CACHED_GRAPHS[self.scene_name]\n\n    @graph.setter\n    def graph(self, g):\n        self._CACHED_GRAPHS[self.scene_name] = g\n\n    def _check_contains_key(self, key: Tuple[float, float, int, int], add_if_not=True):\n        if key not in self.graph:\n            get_logger().warning(\n                \"{} was not in the graph for scene {}.\".format(key, self.scene_name)\n            )\n            if add_if_not:\n                self._add_node_to_graph(self.graph, key)\n\n    def shortest_state_path(self, source_state_key, goal_state_key):\n        self._check_contains_key(source_state_key)\n        self._check_contains_key(goal_state_key)\n        # noinspection PyBroadException\n        try:\n            path = nx.shortest_path(self.graph, source_state_key, goal_state_key)\n            return path\n        except Exception as _:\n            return None\n\n    def action_transitioning_between_keys(self, s, t):\n        self._check_contains_key(s)\n        self._check_contains_key(t)\n        if self.graph.has_edge(s, t):\n            return self.graph.get_edge_data(s, t)[\"action\"]\n        else:\n            return None\n\n    def shortest_path_next_state(self, source_state_key, goal_state_key):\n        self._check_contains_key(source_state_key)\n        self._check_contains_key(goal_state_key)\n        if source_state_key == goal_state_key:\n            raise RuntimeError(\"called next state on the same source and goal state\")\n        state_path = self.shortest_state_path(source_state_key, goal_state_key)\n        return state_path[1]\n\n    def shortest_path_next_action(self, source_state_key, goal_state_key):\n        self._check_contains_key(source_state_key)\n        self._check_contains_key(goal_state_key)\n\n        next_state_key = self.shortest_path_next_state(source_state_key, goal_state_key)\n        return self.graph.get_edge_data(source_state_key, next_state_key)[\"action\"]\n\n    def shortest_path_length(self, source_state_key, goal_state_key):\n        self._check_contains_key(source_state_key)\n        self._check_contains_key(goal_state_key)\n        try:\n            return nx.shortest_path_length(self.graph, source_state_key, goal_state_key)\n        except nx.NetworkXNoPath as _:\n            return float(\"inf\")\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/ithor_sensors.py",
    "content": "import copy\nfrom functools import reduce\nfrom typing import Any, Dict, Optional, Union, Sequence\n\nimport ai2thor.controller\nimport gym\nimport gym.spaces\nimport numpy as np\nimport torch\n\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact.embodiedai.mapping.mapping_utils.map_builders import (\n    BinnedPointCloudMapBuilder,\n    SemanticMapBuilder,\n    ObjectHull2d,\n)\nfrom allenact.embodiedai.sensors.vision_sensors import RGBSensor\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment\nfrom allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask\nfrom allenact_plugins.ithor_plugin.ithor_util import include_object_data\nfrom allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment\nfrom allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask, ObjectNavTask\n\nTHOR_ENV_TYPE = Union[\n    ai2thor.controller.Controller, IThorEnvironment, RoboThorEnvironment\n]\nTHOR_TASK_TYPE = Union[\n    Task[ai2thor.controller.Controller],\n    Task[IThorEnvironment],\n    Task[RoboThorEnvironment],\n]\n\n\nclass RGBSensorThor(RGBSensor[THOR_ENV_TYPE, THOR_TASK_TYPE]):\n    \"\"\"Sensor for RGB images in THOR.\n\n    Returns from a running IThorEnvironment instance, the current RGB\n    frame corresponding to the agent's egocentric view.\n    \"\"\"\n\n    def frame_from_env(\n        self,\n        env: THOR_ENV_TYPE,\n        task: Optional[THOR_TASK_TYPE],\n    ) -> np.ndarray:  # type:ignore\n        if isinstance(env, ai2thor.controller.Controller):\n            return env.last_event.frame.copy()\n        else:\n            return env.current_frame.copy()\n\n\nclass GoalObjectTypeThorSensor(Sensor):\n    def __init__(\n        self,\n        object_types: Sequence[str],\n        target_to_detector_map: Optional[Dict[str, str]] = None,\n        detector_types: Optional[Sequence[str]] = None,\n        uuid: str = \"goal_object_type_ind\",\n        **kwargs: Any,\n    ):\n        self.ordered_object_types = list(object_types)\n        assert self.ordered_object_types == sorted(\n            self.ordered_object_types\n        ), \"object types input to goal object type sensor must be ordered\"\n\n        self.target_to_detector_map = target_to_detector_map\n\n        if target_to_detector_map is None:\n            self.object_type_to_ind = {\n                ot: i for i, ot in enumerate(self.ordered_object_types)\n            }\n        else:\n            assert (\n                detector_types is not None\n            ), \"Missing detector_types for map {}\".format(target_to_detector_map)\n            self.target_to_detector = target_to_detector_map\n            self.detector_types = detector_types\n\n            detector_index = {ot: i for i, ot in enumerate(self.detector_types)}\n            self.object_type_to_ind = {\n                ot: detector_index[self.target_to_detector[ot]]\n                for ot in self.ordered_object_types\n            }\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self):\n        if self.target_to_detector_map is None:\n            return gym.spaces.Discrete(len(self.ordered_object_types))\n        else:\n            return gym.spaces.Discrete(len(self.detector_types))\n\n    def get_observation(\n        self,\n        env: IThorEnvironment,\n        task: Optional[ObjectNaviThorGridTask],\n        *args: Any,\n        **kwargs: Any,\n    ) -> Any:\n        return self.object_type_to_ind[task.task_info[\"object_type\"]]\n\n\nclass TakeEndActionThorNavSensor(\n    Sensor[\n        Union[RoboThorEnvironment, IThorEnvironment],\n        Union[ObjectNaviThorGridTask, ObjectNavTask, PointNavTask],\n    ]\n):\n    def __init__(self, nactions: int, uuid: str, **kwargs: Any) -> None:\n        self.nactions = nactions\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self) -> gym.spaces.Discrete:\n        \"\"\"The observation space.\n\n        Equals `gym.spaces.Discrete(2)` where a 0 indicates that the agent\n        **should not** take the `End` action and a 1 indicates that the agent\n        **should** take the end action.\n        \"\"\"\n        return gym.spaces.Discrete(2)\n\n    def get_observation(  # type:ignore\n        self,\n        env: IThorEnvironment,\n        task: Union[ObjectNaviThorGridTask, ObjectNavTask, PointNavTask],\n        *args,\n        **kwargs,\n    ) -> np.ndarray:\n        if isinstance(task, ObjectNaviThorGridTask):\n            should_end = task.is_goal_object_visible()\n        elif isinstance(task, ObjectNavTask):\n            should_end = task._is_goal_in_range()\n        elif isinstance(task, PointNavTask):\n            should_end = task._is_goal_in_range()\n        else:\n            raise NotImplementedError\n\n        if should_end is None:\n            should_end = False\n        return np.array([1 * should_end], dtype=np.int64)\n\n\nclass RelativePositionChangeTHORSensor(\n    Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]\n):\n    def __init__(self, uuid: str = \"rel_position_change\", **kwargs: Any):\n        observation_space = gym.spaces.Dict(\n            {\n                \"last_allocentric_position\": gym.spaces.Box(\n                    low=np.array([-np.inf, -np.inf, 0], dtype=np.float32),\n                    high=np.array([np.inf, np.inf, 360], dtype=np.float32),\n                    shape=(3,),\n                    dtype=np.float32,\n                ),\n                \"dx_dz_dr\": gym.spaces.Box(\n                    low=np.array([-np.inf, -np.inf, -360], dtype=np.float32),\n                    high=np.array([-np.inf, -np.inf, 360], dtype=np.float32),\n                    shape=(3,),\n                    dtype=np.float32,\n                ),\n            }\n        )\n        super().__init__(**prepare_locals_for_super(locals()))\n\n        self.last_xzr: Optional[np.ndarray] = None\n\n    @staticmethod\n    def get_relative_position_change(from_xzr: np.ndarray, to_xzr: np.ndarray):\n        dx_dz_dr = to_xzr - from_xzr\n\n        # Transform dx, dz (in global coordinates) into the relative coordinates\n        # given by rotation r0=from_xzr[-2]. This requires rotating everything so that\n        # r0 is facing in the positive z direction. Since thor rotations are negative\n        # the usual rotation direction this means we want to rotate by r0 degrees.\n        theta = np.pi * from_xzr[-1] / 180\n        cos_theta = np.cos(theta)\n        sin_theta = np.sin(theta)\n\n        dx_dz_dr = (\n            np.array(\n                [\n                    [cos_theta, -sin_theta, 0],\n                    [sin_theta, cos_theta, 0],\n                    [0, 0, 1],  # Don't change dr\n                ]\n            )\n            @ dx_dz_dr.reshape(-1, 1)\n        ).reshape(-1)\n\n        dx_dz_dr[-1] = dx_dz_dr[-1] % 360\n        return dx_dz_dr\n\n    def get_observation(\n        self,\n        env: RoboThorEnvironment,\n        task: Optional[Task[RoboThorEnvironment]],\n        *args: Any,\n        **kwargs: Any,\n    ) -> Any:\n\n        if task.num_steps_taken() == 0:\n            p = env.controller.last_event.metadata[\"agent\"][\"position\"]\n            r = env.controller.last_event.metadata[\"agent\"][\"rotation\"][\"y\"]\n            self.last_xzr = np.array([p[\"x\"], p[\"z\"], r % 360])\n\n        p = env.controller.last_event.metadata[\"agent\"][\"position\"]\n        r = env.controller.last_event.metadata[\"agent\"][\"rotation\"][\"y\"]\n        current_xzr = np.array([p[\"x\"], p[\"z\"], r % 360])\n\n        dx_dz_dr = self.get_relative_position_change(\n            from_xzr=self.last_xzr, to_xzr=current_xzr\n        )\n\n        to_return = {\"last_allocentric_position\": self.last_xzr, \"dx_dz_dr\": dx_dz_dr}\n\n        self.last_xzr = current_xzr\n\n        return to_return\n\n\nclass ReachableBoundsTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):\n    def __init__(self, margin: float, uuid: str = \"scene_bounds\", **kwargs: Any):\n        observation_space = gym.spaces.Dict(\n            {\n                \"x_range\": gym.spaces.Box(\n                    low=np.array([-np.inf, -np.inf], dtype=np.float32),\n                    high=np.array([np.inf, np.inf], dtype=np.float32),\n                    shape=(2,),\n                    dtype=np.float32,\n                ),\n                \"z_range\": gym.spaces.Box(\n                    low=np.array([-np.inf, -np.inf], dtype=np.float32),\n                    high=np.array([np.inf, np.inf], dtype=np.float32),\n                    shape=(2,),\n                    dtype=np.float32,\n                ),\n            }\n        )\n        super().__init__(**prepare_locals_for_super(locals()))\n\n        self.margin = margin\n        self._bounds_cache = {}\n\n    @staticmethod\n    def get_bounds(\n        controller: ai2thor.controller.Controller,\n        margin: float,\n    ) -> Dict[str, np.ndarray]:\n        positions = controller.step(\"GetReachablePositions\").metadata[\"actionReturn\"]\n        min_x = min(p[\"x\"] for p in positions)\n        max_x = max(p[\"x\"] for p in positions)\n        min_z = min(p[\"z\"] for p in positions)\n        max_z = max(p[\"z\"] for p in positions)\n\n        return {\n            \"x_range\": np.array([min_x - margin, max_x + margin]),\n            \"z_range\": np.array([min_z - margin, max_z + margin]),\n        }\n\n    def get_observation(\n        self,\n        env: RoboThorEnvironment,\n        task: Optional[Task[RoboThorEnvironment]],\n        *args: Any,\n        **kwargs: Any,\n    ) -> Any:\n        if isinstance(env, ai2thor.controller.Controller):\n            controller = env\n        else:\n            controller = env.controller\n\n        scene_name = controller.last_event.metadata[\"sceneName\"]\n        if scene_name not in self._bounds_cache:\n            self._bounds_cache[scene_name] = self.get_bounds(\n                controller=controller, margin=self.margin\n            )\n\n        return copy.deepcopy(self._bounds_cache[scene_name])\n\n\nclass SceneBoundsTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):\n    def __init__(self, uuid: str = \"scene_bounds\", **kwargs: Any):\n        observation_space = gym.spaces.Dict(\n            {\n                \"x_range\": gym.spaces.Box(\n                    low=np.array([-np.inf, -np.inf]),\n                    high=np.array([np.inf, np.inf]),\n                    shape=(2,),\n                    dtype=np.float32,\n                ),\n                \"z_range\": gym.spaces.Box(\n                    low=np.array([-np.inf, -np.inf]),\n                    high=np.array([np.inf, np.inf]),\n                    shape=(2,),\n                    dtype=np.float32,\n                ),\n            }\n        )\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(\n        self,\n        env: RoboThorEnvironment,\n        task: Optional[Task[RoboThorEnvironment]],\n        *args: Any,\n        **kwargs: Any,\n    ) -> Any:\n        scene_bounds = env.controller.last_event.metadata[\"sceneBounds\"]\n        center = scene_bounds[\"center\"]\n        size = scene_bounds[\"size\"]\n\n        return {\n            \"x_range\": np.array(\n                [center[\"x\"] - size[\"x\"] / 2, center[\"x\"] + size[\"x\"] / 2]\n            ),\n            \"z_range\": np.array(\n                [center[\"z\"] - size[\"z\"] / 2, center[\"z\"] + size[\"z\"] / 2]\n            ),\n        }\n\n\nclass BinnedPointCloudMapTHORSensor(\n    Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]\n):\n    observation_space = gym.spaces.Dict\n\n    def __init__(\n        self,\n        fov: Optional[float],\n        vision_range_in_cm: int,\n        map_size_in_cm: int,\n        resolution_in_cm: int,\n        map_range_sensor: Sensor,\n        return_egocentric_local_context: bool = False,\n        height_bins: Sequence[float] = (0.02, 2),\n        ego_only: bool = True,\n        exclude_agent: bool = False,\n        uuid: str = \"binned_pc_map\",\n        device: torch.device = torch.device(\"cpu\"),\n        **kwargs: Any,\n    ):\n        self.fov = fov\n        self.vision_range_in_cm = vision_range_in_cm\n        self.map_size_in_cm = map_size_in_cm\n        self.resolution_in_cm = resolution_in_cm\n        self.height_bins = height_bins\n        self.ego_only = ego_only\n        self.return_egocentric_local_context = return_egocentric_local_context\n        self.exclude_agent = exclude_agent\n\n        self.binned_pc_map_builder = BinnedPointCloudMapBuilder(\n            fov=fov,\n            vision_range_in_cm=vision_range_in_cm,\n            map_size_in_cm=map_size_in_cm,\n            resolution_in_cm=resolution_in_cm,\n            height_bins=height_bins,\n            return_egocentric_local_context=return_egocentric_local_context,\n        )\n        self.device = device\n\n        big_map_space = gym.spaces.Box(\n            low=0,\n            high=np.inf,\n            shape=self.binned_pc_map_builder.binned_point_cloud_map.shape,\n            dtype=np.float32,\n        )\n        local_map_space = gym.spaces.Box(\n            low=0,\n            high=np.inf,\n            shape=(self.binned_pc_map_builder.vision_range_in_map_units,) * 2\n            + self.binned_pc_map_builder.binned_point_cloud_map.shape[-1:],\n            dtype=np.float32,\n        )\n\n        space_dict = {\n            \"egocentric_update\": local_map_space,\n        }\n        if self.return_egocentric_local_context:\n            space_dict = {\n                \"egocentric_local_context\": copy.deepcopy(local_map_space),\n            }\n        if not ego_only:\n            space_dict[\"allocentric_update\"] = copy.deepcopy(big_map_space)\n            space_dict[\"map\"] = copy.deepcopy(big_map_space)\n\n        observation_space = gym.spaces.Dict(space_dict)\n        super().__init__(**prepare_locals_for_super(locals()))\n\n        self.map_range_sensor = map_range_sensor\n\n    @property\n    def device(self):\n        return self.binned_pc_map_builder.device\n\n    @device.setter\n    def device(self, val: torch.device):\n        self.binned_pc_map_builder.device = torch.device(val)\n\n    def get_observation(\n        self,\n        env: RoboThorEnvironment,\n        task: Optional[Task[RoboThorEnvironment]],\n        *args: Any,\n        **kwargs: Any,\n    ) -> Any:\n        if isinstance(env, ai2thor.controller.Controller):\n            controller = env\n        else:\n            controller = env.controller\n\n        e = controller.last_event\n        metadata = e.metadata\n\n        if task.num_steps_taken() == 0:\n            xz_ranges_dict = self.map_range_sensor.get_observation(env=env, task=task)\n            if self.fov is None:\n                self.binned_pc_map_builder.fov = e.metadata[\"fov\"]\n            self.binned_pc_map_builder.reset(\n                min_xyz=np.array(\n                    [\n                        xz_ranges_dict[\"x_range\"][0],\n                        0,  # TODO: Should y be different per scene?\n                        xz_ranges_dict[\"z_range\"][0],\n                    ]\n                )\n            )\n\n        depth_frame = e.depth_frame\n\n        if self.exclude_agent:\n            depth_frame = depth_frame.copy()\n            assert len(e.instance_masks) > 0\n            depth_frame[~reduce(np.logical_or, e.instance_masks.values())] = np.nan\n\n        map_dict = self.binned_pc_map_builder.update(\n            depth_frame=depth_frame,\n            camera_xyz=np.array(\n                [metadata[\"cameraPosition\"][k] for k in [\"x\", \"y\", \"z\"]]\n            ),\n            camera_rotation=metadata[\"agent\"][\"rotation\"][\"y\"],\n            camera_horizon=metadata[\"agent\"][\"cameraHorizon\"],\n        )\n        return {k: map_dict[k] for k in self.observation_space.spaces.keys()}\n\n\nclass SemanticMapTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):\n    observation_space = gym.spaces.Dict\n\n    def __init__(\n        self,\n        fov: float,\n        vision_range_in_cm: int,\n        map_size_in_cm: int,\n        resolution_in_cm: int,\n        ordered_object_types: Sequence[str],\n        map_range_sensor: Sensor,\n        ego_only: bool = True,\n        uuid: str = \"semantic_map\",\n        device: torch.device = torch.device(\"cpu\"),\n        **kwargs: Any,\n    ):\n        self.fov = fov\n        self.vision_range_in_cm = vision_range_in_cm\n        self.map_size_in_cm = map_size_in_cm\n        self.resolution_in_cm = resolution_in_cm\n        self.ordered_object_types = ordered_object_types\n        self.map_range_sensor = map_range_sensor\n        self.ego_only = ego_only\n\n        self.semantic_map_builder = SemanticMapBuilder(\n            fov=fov,\n            vision_range_in_cm=vision_range_in_cm,\n            map_size_in_cm=map_size_in_cm,\n            resolution_in_cm=resolution_in_cm,\n            ordered_object_types=ordered_object_types,\n            device=device,\n        )\n\n        def get_map_space(nchannels: int, size: int):\n            return gym.spaces.Box(\n                low=0,\n                high=1,\n                shape=(size, size, nchannels),\n                dtype=np.bool_,\n            )\n\n        n = len(self.ordered_object_types)\n        small = self.vision_range_in_cm // self.resolution_in_cm\n        big = self.semantic_map_builder.ground_truth_semantic_map.shape[0]\n\n        space_dict = {\n            \"egocentric_update\": get_map_space(\n                nchannels=n,\n                size=small,\n            ),\n            \"egocentric_mask\": get_map_space(\n                nchannels=1,\n                size=small,\n            ),\n        }\n        if not ego_only:\n            space_dict[\"explored_mask\"] = get_map_space(\n                nchannels=1,\n                size=big,\n            )\n            space_dict[\"map\"] = get_map_space(\n                nchannels=n,\n                size=big,\n            )\n\n        observation_space = gym.spaces.Dict(space_dict)\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    @property\n    def device(self):\n        return self.semantic_map_builder.device\n\n    @device.setter\n    def device(self, val: torch.device):\n        self.semantic_map_builder.device = torch.device(val)\n\n    def get_observation(\n        self,\n        env: RoboThorEnvironment,\n        task: Optional[Task[RoboThorEnvironment]],\n        *args: Any,\n        **kwargs: Any,\n    ) -> Any:\n        with include_object_data(env.controller):\n            last_event = env.controller.last_event\n            metadata = last_event.metadata\n\n            if task.num_steps_taken() == 0:\n                env.controller.step(\n                    \"Get2DSemanticHulls\", objectTypes=self.ordered_object_types\n                )\n                assert env.last_event.metadata[\n                    \"lastActionSuccess\"\n                ], f\"Get2DSemanticHulls failed with error '{env.last_event.metadata['lastActionSuccess']}'\"\n\n                object_id_to_hull = env.controller.last_event.metadata[\"actionReturn\"]\n\n                xz_ranges_dict = self.map_range_sensor.get_observation(\n                    env=env, task=task\n                )\n\n                self.semantic_map_builder.reset(\n                    min_xyz=np.array(\n                        [\n                            xz_ranges_dict[\"x_range\"][0],\n                            0,  # TODO: Should y be different per scene?\n                            xz_ranges_dict[\"z_range\"][0],\n                        ]\n                    ),\n                    object_hulls=[\n                        ObjectHull2d(\n                            object_id=o[\"objectId\"],\n                            object_type=o[\"objectType\"],\n                            hull_points=object_id_to_hull[o[\"objectId\"]],\n                        )\n                        for o in metadata[\"objects\"]\n                        if o[\"objectId\"] in object_id_to_hull\n                    ],\n                )\n\n            map_dict = self.semantic_map_builder.update(\n                depth_frame=last_event.depth_frame,\n                camera_xyz=np.array(\n                    [metadata[\"cameraPosition\"][k] for k in [\"x\", \"y\", \"z\"]]\n                ),\n                camera_rotation=metadata[\"agent\"][\"rotation\"][\"y\"],\n                camera_horizon=metadata[\"agent\"][\"cameraHorizon\"],\n            )\n            return {\n                k: map_dict[k] > 0.001 if map_dict[k].dtype != np.bool_ else map_dict[k]\n                for k in self.observation_space.spaces.keys()\n            }\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/ithor_task_samplers.py",
    "content": "import copy\nimport random\nfrom typing import List, Dict, Optional, Any, Union, cast\n\nimport gym\n\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import set_deterministic_cudnn, set_seed\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment\nfrom allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask\n\n\nclass ObjectNavTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        scenes: List[str],\n        object_types: str,\n        sensors: List[Sensor],\n        max_steps: int,\n        env_args: Dict[str, Any],\n        action_space: gym.Space,\n        scene_period: Optional[Union[int, str]] = None,\n        max_tasks: Optional[int] = None,\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        **kwargs,\n    ) -> None:\n        self.env_args = env_args\n        self.scenes = scenes\n        self.object_types = object_types\n        self.grid_size = 0.25\n        self.env: Optional[IThorEnvironment] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_space = action_space\n\n        self.scene_counter: Optional[int] = None\n        self.scene_order: Optional[List[str]] = None\n        self.scene_id: Optional[int] = None\n        self.scene_period: Optional[Union[str, int]] = (\n            scene_period  # default makes a random choice\n        )\n        self.max_tasks: Optional[int] = None\n        self.reset_tasks = max_tasks\n\n        self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None\n\n        self.seed: Optional[int] = None\n        self.set_seed(seed)\n\n        if deterministic_cudnn:\n            set_deterministic_cudnn()\n\n        self.reset()\n\n    def _create_environment(self) -> IThorEnvironment:\n        env = IThorEnvironment(\n            make_agents_visible=False,\n            object_open_speed=0.05,\n            restrict_to_initially_reachable_points=True,\n            **self.env_args,\n        )\n        return env\n\n    @property\n    def length(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled. Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        return None\n\n    @property\n    def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]:\n        return self._last_sampled_task\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"Check if observation spaces equal.\n\n        # Returns\n\n        True if all Tasks that can be sampled by this sampler have the\n            same observation space. Otherwise False.\n        \"\"\"\n        return True\n\n    def sample_scene(self, force_advance_scene: bool):\n        if force_advance_scene:\n            if self.scene_period != \"manual\":\n                get_logger().warning(\n                    \"When sampling scene, have `force_advance_scene == True`\"\n                    \"but `self.scene_period` is not equal to 'manual',\"\n                    \"this may cause unexpected behavior.\"\n                )\n            self.scene_id = (1 + self.scene_id) % len(self.scenes)\n            if self.scene_id == 0:\n                random.shuffle(self.scene_order)\n\n        if self.scene_period is None:\n            # Random scene\n            self.scene_id = random.randint(0, len(self.scenes) - 1)\n        elif self.scene_period == \"manual\":\n            pass\n        elif self.scene_counter >= cast(int, self.scene_period):\n            if self.scene_id == len(self.scene_order) - 1:\n                # Randomize scene order for next iteration\n                random.shuffle(self.scene_order)\n                # Move to next scene\n                self.scene_id = 0\n            else:\n                # Move to next scene\n                self.scene_id += 1\n            # Reset scene counter\n            self.scene_counter = 1\n        elif isinstance(self.scene_period, int):\n            # Stay in current scene\n            self.scene_counter += 1\n        else:\n            raise NotImplementedError(\n                \"Invalid scene_period {}\".format(self.scene_period)\n            )\n\n        if self.max_tasks is not None:\n            self.max_tasks -= 1\n\n        return self.scenes[int(self.scene_order[self.scene_id])]\n\n    def next_task(\n        self, force_advance_scene: bool = False\n    ) -> Optional[ObjectNaviThorGridTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            return None\n\n        scene = self.sample_scene(force_advance_scene)\n\n        if self.env is not None:\n            if scene.replace(\"_physics\", \"\") != self.env.scene_name.replace(\n                \"_physics\", \"\"\n            ):\n                self.env.reset(scene)\n        else:\n            self.env = self._create_environment()\n            self.env.reset(scene_name=scene)\n\n        pose = self.env.randomize_agent_location()\n\n        object_types_in_scene = set(\n            [o[\"objectType\"] for o in self.env.last_event.metadata[\"objects\"]]\n        )\n\n        task_info: Dict[str, Any] = {}\n        for ot in random.sample(self.object_types, len(self.object_types)):\n            if ot in object_types_in_scene:\n                task_info[\"object_type\"] = ot\n                break\n\n        if len(task_info) == 0:\n            get_logger().warning(\n                \"Scene {} does not contain any\"\n                \" objects of any of the types {}.\".format(scene, self.object_types)\n            )\n\n        task_info[\"start_pose\"] = copy.copy(pose)\n        task_info[\"id\"] = (\n            f\"{scene}__{'_'.join(list(map(str, self.env.get_key(pose))))}__{task_info['object_type']}\"\n        )\n\n        self._last_sampled_task = ObjectNaviThorGridTask(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n        )\n        return self._last_sampled_task\n\n    def reset(self):\n        self.scene_counter = 0\n        self.scene_order = list(range(len(self.scenes)))\n        random.shuffle(self.scene_order)\n        self.scene_id = 0\n        self.max_tasks = self.reset_tasks\n\n    def set_seed(self, seed: int):\n        self.seed = seed\n        if seed is not None:\n            set_seed(seed)\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/ithor_tasks.py",
    "content": "import random\nfrom typing import Dict, Tuple, List, Any, Optional, Union, Sequence, cast\n\nimport gym\nimport numpy as np\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.ithor_plugin.ithor_constants import (\n    MOVE_AHEAD,\n    ROTATE_LEFT,\n    ROTATE_RIGHT,\n    LOOK_DOWN,\n    LOOK_UP,\n    END,\n)\n\nfrom allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment\nfrom allenact_plugins.ithor_plugin.ithor_util import round_to_factor\n\n\nclass ObjectNaviThorGridTask(Task[IThorEnvironment]):\n    \"\"\"Defines the object navigation task in AI2-THOR.\n\n    In object navigation an agent is randomly initialized into an AI2-THOR scene and must\n    find an object of a given type (e.g. tomato, television, etc). An object is considered\n    found if the agent takes an `End` action and the object is visible to the agent (see\n    [here](https://ai2thor.allenai.org/documentation/concepts) for a definition of visibiliy\n    in AI2-THOR).\n\n    The actions available to an agent in this task are:\n\n    1. Move ahead\n        * Moves agent ahead by 0.25 meters.\n    1. Rotate left / rotate right\n        * Rotates the agent by 90 degrees counter-clockwise / clockwise.\n    1. Look down / look up\n        * Changes agent view angle by 30 degrees up or down. An agent cannot look more than 30\n          degrees above horizontal or less than 60 degrees below horizontal.\n    1. End\n        * Ends the task and the agent receives a positive reward if the object type is visible to the agent,\n        otherwise it receives a negative reward.\n\n    # Attributes\n\n    env : The ai2thor environment.\n    sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer.\n    task_info : The task info. Must contain a field \"object_type\" that specifies, as a string,\n        the goal object type.\n    max_steps : The maximum number of steps an agent can take an in the task before it is considered failed.\n    observation_space: The observation space returned on each step from the sensors.\n    \"\"\"\n\n    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END)\n\n    _CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE: Dict[\n        Tuple[str, str], List[Tuple[float, float, int, int]]\n    ] = {}\n\n    def __init__(\n        self,\n        env: IThorEnvironment,\n        sensors: List[Sensor],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        **kwargs,\n    ) -> None:\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n        self._took_end_action: bool = False\n        self._success: Optional[bool] = False\n        self._subsampled_locations_from_which_obj_visible: Optional[\n            List[Tuple[float, float, int, int]]\n        ] = None\n\n        self.task_info[\"followed_path\"] = [self.env.get_agent_location()]\n        self.task_info[\"action_names\"] = self.class_action_names()\n\n    @property\n    def action_space(self):\n        return gym.spaces.Discrete(len(self._actions))\n\n    def reached_terminal_state(self) -> bool:\n        return self._took_end_action\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return cls._actions\n\n    def close(self) -> None:\n        self.env.stop()\n\n    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        action_str = self.class_action_names()[action]\n\n        if action_str == END:\n            self._took_end_action = True\n            self._success = self.is_goal_object_visible()\n            self.last_action_success = self._success\n        else:\n            self.env.step({\"action\": action_str})\n            self.last_action_success = self.env.last_action_success\n\n            if (\n                not self.last_action_success\n            ) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None:\n                self.env.update_graph_with_failed_action(failed_action=action_str)\n\n            self.task_info[\"followed_path\"].append(self.env.get_agent_location())\n\n        step_result = RLStepResult(\n            observation=self.get_observations(),\n            reward=self.judge(),\n            done=self.is_done(),\n            info={\"last_action_success\": self.last_action_success},\n        )\n        return step_result\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        assert mode == \"rgb\", \"only rgb rendering is implemented\"\n        return self.env.current_frame\n\n    def is_goal_object_visible(self) -> bool:\n        \"\"\"Is the goal object currently visible?\"\"\"\n        return any(\n            o[\"objectType\"] == self.task_info[\"object_type\"]\n            for o in self.env.visible_objects()\n        )\n\n    def judge(self) -> float:\n        \"\"\"Compute the reward after having taken a step.\"\"\"\n        reward = -0.01\n\n        if not self.last_action_success:\n            reward += -0.03\n\n        if self._took_end_action:\n            reward += 1.0 if self._success else -1.0\n\n        return float(reward)\n\n    def metrics(self) -> Dict[str, Any]:\n        if not self.is_done():\n            return {}\n        else:\n            return {\n                \"success\": self._success,\n                **super(ObjectNaviThorGridTask, self).metrics(),\n            }\n\n    def query_expert(self, **kwargs) -> Tuple[int, bool]:\n        target = self.task_info[\"object_type\"]\n\n        if self.is_goal_object_visible():\n            return self.class_action_names().index(END), True\n        else:\n            key = (self.env.scene_name, target)\n            if self._subsampled_locations_from_which_obj_visible is None:\n                if key not in self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE:\n                    obj_ids: List[str] = []\n                    obj_ids.extend(\n                        o[\"objectId\"]\n                        for o in self.env.last_event.metadata[\"objects\"]\n                        if o[\"objectType\"] == target\n                    )\n\n                    assert len(obj_ids) != 0, \"No objects to get an expert path to.\"\n\n                    locations_from_which_object_is_visible: List[\n                        Tuple[float, float, int, int]\n                    ] = []\n                    y = self.env.last_event.metadata[\"agent\"][\"position\"][\"y\"]\n                    positions_to_check_interactionable_from = [\n                        {\"x\": x, \"y\": y, \"z\": z}\n                        for x, z in set((x, z) for x, z, _, _ in self.env.graph.nodes)\n                    ]\n                    for obj_id in set(obj_ids):\n                        self.env.controller.step(\n                            {\n                                \"action\": \"PositionsFromWhichItemIsInteractable\",\n                                \"objectId\": obj_id,\n                                \"positions\": positions_to_check_interactionable_from,\n                            }\n                        )\n                        assert (\n                            self.env.last_action_success\n                        ), \"Could not get positions from which item was interactable.\"\n\n                        returned = self.env.last_event.metadata[\"actionReturn\"]\n                        locations_from_which_object_is_visible.extend(\n                            (\n                                round(x, 2),\n                                round(z, 2),\n                                round_to_factor(rot, 90) % 360,\n                                round_to_factor(hor, 30) % 360,\n                            )\n                            for x, z, rot, hor, standing in zip(\n                                returned[\"x\"],\n                                returned[\"z\"],\n                                returned[\"rotation\"],\n                                returned[\"horizon\"],\n                                returned[\"standing\"],\n                            )\n                            if standing == 1\n                        )\n\n                    self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key] = (\n                        locations_from_which_object_is_visible\n                    )\n\n                self._subsampled_locations_from_which_obj_visible = (\n                    self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key]\n                )\n                if len(self._subsampled_locations_from_which_obj_visible) > 5:\n                    self._subsampled_locations_from_which_obj_visible = random.sample(\n                        self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key], 5\n                    )\n\n            current_loc_key = self.env.get_key(self.env.last_event.metadata[\"agent\"])\n            paths = []\n\n            for goal_key in self._subsampled_locations_from_which_obj_visible:\n                path = self.env.shortest_state_path(\n                    source_state_key=current_loc_key, goal_state_key=goal_key\n                )\n                if path is not None:\n                    paths.append(path)\n            if len(paths) == 0:\n                return 0, False\n\n            shortest_path_ind = int(np.argmin([len(p) for p in paths]))\n\n            if len(paths[shortest_path_ind]) == 1:\n                get_logger().warning(\n                    \"Shortest path computations suggest we are at the target but episode does not think so.\"\n                )\n                return 0, False\n\n            next_key_on_shortest_path = paths[shortest_path_ind][1]\n            return (\n                self.class_action_names().index(\n                    self.env.action_transitioning_between_keys(\n                        current_loc_key, next_key_on_shortest_path\n                    )\n                ),\n                True,\n            )\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/ithor_util.py",
    "content": "import glob\nimport math\nimport os\nimport platform\nimport traceback\nimport warnings\nfrom contextlib import contextmanager\nfrom typing import Sequence\n\nimport Xlib\nimport Xlib.display\nimport ai2thor.controller\n\n\n@contextmanager\ndef include_object_data(controller: ai2thor.controller.Controller):\n    needs_reset = len(controller.last_event.metadata[\"objects\"]) == 0\n    try:\n        if needs_reset:\n            controller.step(\"ResetObjectFilter\")\n            assert controller.last_event.metadata[\"lastActionSuccess\"]\n        yield None\n    finally:\n        if needs_reset:\n            controller.step(\"SetObjectFilter\", objectIds=[])\n            assert controller.last_event.metadata[\"lastActionSuccess\"]\n\n\ndef vertical_to_horizontal_fov(\n    vertical_fov_in_degrees: float, height: float, width: float\n):\n    assert 0 < vertical_fov_in_degrees < 180\n    aspect_ratio = width / height\n    vertical_fov_in_rads = (math.pi / 180) * vertical_fov_in_degrees\n    return (\n        (180 / math.pi)\n        * math.atan(math.tan(vertical_fov_in_rads * 0.5) * aspect_ratio)\n        * 2\n    )\n\n\ndef horizontal_to_vertical_fov(\n    horizontal_fov_in_degrees: float, height: float, width: float\n):\n    return vertical_to_horizontal_fov(\n        vertical_fov_in_degrees=horizontal_fov_in_degrees,\n        height=width,\n        width=height,\n    )\n\n\ndef round_to_factor(num: float, base: int) -> int:\n    \"\"\"Rounds floating point number to the nearest integer multiple of the\n    given base. E.g., for floating number 90.1 and integer base 45, the result\n    is 90.\n\n    # Attributes\n\n    num : floating point number to be rounded.\n    base: integer base\n    \"\"\"\n    return round(num / base) * base\n\n\ndef get_open_x_displays(throw_error_if_empty: bool = False) -> Sequence[str]:\n    assert platform.system() == \"Linux\", \"Can only get X-displays for Linux systems.\"\n\n    displays = []\n\n    open_display_strs = [\n        os.path.basename(s)[1:] for s in glob.glob(\"/tmp/.X11-unix/X*\")\n    ]\n\n    for open_display_str in sorted(open_display_strs):\n        try:\n            open_display_str = str(int(open_display_str))\n            display = Xlib.display.Display(f\":{open_display_str}\")\n        except Exception:\n            warnings.warn(\n                f\"Encountered error when attempting to open display :{open_display_str},\"\n                f\" error message:\\n{traceback.format_exc()}\"\n            )\n            continue\n\n        displays.extend(\n            [f\"{open_display_str}.{i}\" for i in range(display.screen_count())]\n        )\n\n    if throw_error_if_empty and len(displays) == 0:\n        raise IOError(\n            \"Could not find any open X-displays on which to run AI2-THOR processes. \"\n            \" Please see the AI2-THOR installation instructions at\"\n            \" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin\"\n            \" for information as to how to start such displays.\"\n        )\n\n    return displays\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/ithor_viz.py",
    "content": "import copy\nimport json\nimport math\nimport os\nfrom typing import Tuple, Sequence, Union, Dict, Optional, Any, cast, Generator, List\n\nimport colour as col\nimport cv2\nimport numpy as np\nfrom PIL import Image, ImageDraw\nfrom ai2thor.controller import Controller\nfrom matplotlib import pyplot as plt\nfrom matplotlib.figure import Figure\n\nfrom allenact.utils.system import get_logger\nfrom allenact.utils.viz_utils import TrajectoryViz\n\nITHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR = os.path.join(\n    os.path.expanduser(\"~\"), \".allenact\", \"ithor\", \"top_down_viz_cache\"\n)\n\n\nclass ThorPositionTo2DFrameTranslator(object):\n    def __init__(\n        self,\n        frame_shape_rows_cols: Tuple[int, int],\n        cam_position: Sequence[float],\n        orth_size: float,\n    ):\n        self.frame_shape = frame_shape_rows_cols\n        self.lower_left = np.array((cam_position[0], cam_position[2])) - orth_size\n        self.span = 2 * orth_size\n\n    def __call__(self, position: Sequence[float]):\n        if len(position) == 3:\n            x, _, z = position\n        else:\n            x, z = position\n\n        camera_position = (np.array((x, z)) - self.lower_left) / self.span\n        return np.array(\n            (\n                round(self.frame_shape[0] * (1.0 - camera_position[1])),\n                round(self.frame_shape[1] * camera_position[0]),\n            ),\n            dtype=int,\n        )\n\n\nclass ThorViz(TrajectoryViz):\n    def __init__(\n        self,\n        path_to_trajectory: Sequence[str] = (\"task_info\", \"followed_path\"),\n        label: str = \"thor_trajectory\",\n        figsize: Tuple[float, float] = (8, 8),  # width, height\n        fontsize: float = 10,\n        scenes: Union[Tuple[str, int, int], Sequence[Tuple[str, int, int]]] = (\n            (\"FloorPlan{}_physics\", 1, 30),\n            (\"FloorPlan{}_physics\", 201, 230),\n            (\"FloorPlan{}_physics\", 301, 330),\n            (\"FloorPlan{}_physics\", 401, 430),\n        ),\n        viz_rows_cols: Tuple[int, int] = (448, 448),\n        single_color: bool = False,\n        view_triangle_only_on_last: bool = True,\n        disable_view_triangle: bool = False,\n        line_opacity: float = 1.0,\n        path_to_rot_degrees: Sequence[str] = (\"rotation\",),\n        **kwargs,\n    ):\n        super().__init__(\n            path_to_trajectory=path_to_trajectory,\n            label=label,\n            figsize=figsize,\n            fontsize=fontsize,\n            path_to_rot_degrees=path_to_rot_degrees,\n            **kwargs,\n        )\n\n        if isinstance(scenes[0], str):\n            scenes = [cast(Tuple[str, int, int], scenes)]  # make it list of tuples\n        self.scenes = cast(List[Tuple[str, int, int]], scenes)\n\n        self.room_path = ITHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR\n        os.makedirs(self.room_path, exist_ok=True)\n\n        self.viz_rows_cols = viz_rows_cols\n        self.single_color = single_color\n        self.view_triangle_only_on_last = view_triangle_only_on_last\n        self.disable_view_triangle = disable_view_triangle\n        self.line_opacity = line_opacity\n\n        # Only needed for rendering\n        self.map_data: Optional[Dict[str, Any]] = None\n        self.thor_top_downs: Optional[Dict[str, np.ndarray]] = None\n\n        self.controller: Optional[Controller] = None\n\n    def init_top_down_render(self):\n        self.map_data = self.get_translator()\n        self.thor_top_downs = self.make_top_down_views()\n\n        # No controller needed after this point\n        if self.controller is not None:\n            self.controller.stop()\n            self.controller = None\n\n    @staticmethod\n    def iterate_scenes(\n        all_scenes: Sequence[Tuple[str, int, int]]\n    ) -> Generator[str, None, None]:\n        for scenes in all_scenes:\n            for wall in range(scenes[1], scenes[2] + 1):\n                roomname = scenes[0].format(wall)\n                yield roomname\n\n    def cached_map_data_path(self, roomname: str) -> str:\n        return os.path.join(self.room_path, \"map_data__{}.json\".format(roomname))\n\n    def get_translator(self) -> Dict[str, Any]:\n        # roomname = list(ThorViz.iterate_scenes(self.scenes))[0]\n        all_map_data = {}\n        for roomname in ThorViz.iterate_scenes(self.scenes):\n            json_file = self.cached_map_data_path(roomname)\n            if not os.path.exists(json_file):\n                self.make_controller()\n                self.controller.reset(roomname)\n                map_data = self.get_agent_map_data()\n                get_logger().info(\"Dumping {}\".format(json_file))\n                with open(json_file, \"w\") as f:\n                    json.dump(map_data, f, indent=4, sort_keys=True)\n            else:\n                with open(json_file, \"r\") as f:\n                    map_data = json.load(f)\n\n            pos_translator = ThorPositionTo2DFrameTranslator(\n                self.viz_rows_cols,\n                self.position_to_tuple(map_data[\"cam_position\"]),\n                map_data[\"cam_orth_size\"],\n            )\n            map_data[\"pos_translator\"] = pos_translator\n            all_map_data[roomname] = map_data\n\n        get_logger().debug(\"Using map_data {}\".format(all_map_data))\n        return all_map_data\n\n    def cached_image_path(self, roomname: str) -> str:\n        return os.path.join(\n            self.room_path, \"{}__r{}_c{}.png\".format(roomname, *self.viz_rows_cols)\n        )\n\n    def make_top_down_views(self) -> Dict[str, np.ndarray]:\n        top_downs = {}\n        for roomname in self.iterate_scenes(self.scenes):\n            fname = self.cached_image_path(roomname)\n            if not os.path.exists(fname):\n                self.make_controller()\n                self.dump_top_down_view(roomname, fname)\n            top_downs[roomname] = cv2.imread(fname)\n\n        return top_downs\n\n    def crop_viz_image(self, viz_image: np.ndarray) -> np.ndarray:\n        y_min = int(self.viz_rows_cols[0] * 0)\n        y_max = int(self.viz_rows_cols[0] * 1)\n        # But it covers approximately the entire width:\n        x_min = 0\n        x_max = self.viz_rows_cols[1]\n        cropped_viz_image = viz_image[y_min:y_max, x_min:x_max, :]\n        return cropped_viz_image\n\n    def make_controller(self):\n        if self.controller is None:\n            self.controller = Controller()\n\n            self.controller.step({\"action\": \"ChangeQuality\", \"quality\": \"Very High\"})\n            self.controller.step(\n                {\n                    \"action\": \"ChangeResolution\",\n                    \"x\": self.viz_rows_cols[1],\n                    \"y\": self.viz_rows_cols[0],\n                }\n            )\n\n    def get_agent_map_data(self):\n        self.controller.step({\"action\": \"ToggleMapView\"})\n        cam_position = self.controller.last_event.metadata[\"cameraPosition\"]\n        cam_orth_size = self.controller.last_event.metadata[\"cameraOrthSize\"]\n        to_return = {\n            \"cam_position\": cam_position,\n            \"cam_orth_size\": cam_orth_size,\n        }\n        self.controller.step({\"action\": \"ToggleMapView\"})\n        return to_return\n\n    @staticmethod\n    def position_to_tuple(position: Dict[str, float]) -> Tuple[float, float, float]:\n        return position[\"x\"], position[\"y\"], position[\"z\"]\n\n    @staticmethod\n    def add_lines_to_map(\n        ps: Sequence[Any],\n        frame: np.ndarray,\n        pos_translator: ThorPositionTo2DFrameTranslator,\n        opacity: float,\n        color: Optional[Tuple[int, ...]] = None,\n    ) -> np.ndarray:\n        if len(ps) <= 1:\n            return frame\n        if color is None:\n            color = (255, 0, 0)\n\n        img1 = Image.fromarray(frame.astype(\"uint8\"), \"RGB\").convert(\"RGBA\")\n        img2 = Image.new(\"RGBA\", frame.shape[:-1])  # Use RGBA\n\n        opacity = int(round(255 * opacity))  # Define transparency for the triangle.\n        draw = ImageDraw.Draw(img2)\n        for i in range(len(ps) - 1):\n            draw.line(\n                tuple(reversed(pos_translator(ps[i])))\n                + tuple(reversed(pos_translator(ps[i + 1]))),\n                fill=color + (opacity,),\n                width=int(frame.shape[0] / 100),\n            )\n\n        img = Image.alpha_composite(img1, img2)\n        return np.array(img.convert(\"RGB\"))\n\n    @staticmethod\n    def add_line_to_map(\n        p0: Any,\n        p1: Any,\n        frame: np.ndarray,\n        pos_translator: ThorPositionTo2DFrameTranslator,\n        opacity: float,\n        color: Optional[Tuple[int, ...]] = None,\n    ) -> np.ndarray:\n        if p0 == p1:\n            return frame\n        if color is None:\n            color = (255, 0, 0)\n\n        img1 = Image.fromarray(frame.astype(\"uint8\"), \"RGB\").convert(\"RGBA\")\n        img2 = Image.new(\"RGBA\", frame.shape[:-1])  # Use RGBA\n\n        opacity = int(round(255 * opacity))  # Define transparency for the triangle.\n        draw = ImageDraw.Draw(img2)\n        draw.line(\n            tuple(reversed(pos_translator(p0))) + tuple(reversed(pos_translator(p1))),\n            fill=color + (opacity,),\n            width=int(frame.shape[0] / 100),\n        )\n\n        img = Image.alpha_composite(img1, img2)\n        return np.array(img.convert(\"RGB\"))\n\n    @staticmethod\n    def add_agent_view_triangle(\n        position: Any,\n        rotation: float,\n        frame: np.ndarray,\n        pos_translator: ThorPositionTo2DFrameTranslator,\n        scale: float = 1.0,\n        opacity: float = 0.1,\n    ) -> np.ndarray:\n        p0 = np.array((position[0], position[2]))\n        p1 = copy.copy(p0)\n        p2 = copy.copy(p0)\n\n        theta = -2 * math.pi * (rotation / 360.0)\n        rotation_mat = np.array(\n            [[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]]\n        )\n        offset1 = scale * np.array([-1 / 2.0, 1])\n        offset2 = scale * np.array([1 / 2.0, 1])\n\n        p1 += np.matmul(rotation_mat, offset1)\n        p2 += np.matmul(rotation_mat, offset2)\n\n        img1 = Image.fromarray(frame.astype(\"uint8\"), \"RGB\").convert(\"RGBA\")\n        img2 = Image.new(\"RGBA\", frame.shape[:-1])  # Use RGBA\n\n        opacity = int(round(255 * opacity))  # Define transparency for the triangle.\n        points = [tuple(reversed(pos_translator(p))) for p in [p0, p1, p2]]\n        draw = ImageDraw.Draw(img2)\n        draw.polygon(points, fill=(255, 255, 255, opacity))\n\n        img = Image.alpha_composite(img1, img2)\n        return np.array(img.convert(\"RGB\"))\n\n    @staticmethod\n    def visualize_agent_path(\n        positions: Sequence[Any],\n        frame: np.ndarray,\n        pos_translator: ThorPositionTo2DFrameTranslator,\n        single_color: bool = False,\n        view_triangle_only_on_last: bool = False,\n        disable_view_triangle: bool = False,\n        line_opacity: float = 1.0,\n        trajectory_start_end_color_str: Tuple[str, str] = (\"red\", \"green\"),\n    ) -> np.ndarray:\n        if single_color:\n            frame = ThorViz.add_lines_to_map(\n                list(map(ThorViz.position_to_tuple, positions)),\n                frame,\n                pos_translator,\n                line_opacity,\n                tuple(\n                    map(\n                        lambda x: int(round(255 * x)),\n                        col.Color(trajectory_start_end_color_str[0]).rgb,\n                    )\n                ),\n            )\n        else:\n            if len(positions) > 1:\n                colors = list(\n                    col.Color(trajectory_start_end_color_str[0]).range_to(\n                        col.Color(trajectory_start_end_color_str[1]), len(positions) - 1\n                    )\n                )\n                for i in range(len(positions) - 1):\n                    frame = ThorViz.add_line_to_map(\n                        ThorViz.position_to_tuple(positions[i]),\n                        ThorViz.position_to_tuple(positions[i + 1]),\n                        frame,\n                        pos_translator,\n                        opacity=line_opacity,\n                        color=tuple(map(lambda x: int(round(255 * x)), colors[i].rgb)),\n                    )\n\n        if view_triangle_only_on_last:\n            positions = [positions[-1]]\n        if disable_view_triangle:\n            positions = []\n        for position in positions:\n            frame = ThorViz.add_agent_view_triangle(\n                ThorViz.position_to_tuple(position),\n                rotation=position[\"rotation\"],\n                frame=frame,\n                pos_translator=pos_translator,\n                opacity=0.05 + view_triangle_only_on_last * 0.2,\n            )\n        return frame\n\n    def dump_top_down_view(self, room_name: str, image_path: str):\n        get_logger().debug(\"Dumping {}\".format(image_path))\n\n        self.controller.reset(room_name)\n        self.controller.step(\n            {\"action\": \"Initialize\", \"gridSize\": 0.1, \"makeAgentsVisible\": False}\n        )\n        self.controller.step({\"action\": \"ToggleMapView\"})\n        top_down_view = self.controller.last_event.cv2img\n\n        cv2.imwrite(image_path, top_down_view)\n\n    def make_fig(self, episode: Any, episode_id: str) -> Figure:\n        trajectory: Sequence[Dict[str, Any]] = self._access(\n            episode, self.path_to_trajectory\n        )\n\n        if self.thor_top_downs is None:\n            self.init_top_down_render()\n\n        roomname = \"_\".join(episode_id.split(\"_\")[:2])\n\n        im = self.visualize_agent_path(\n            trajectory,\n            self.thor_top_downs[roomname],\n            self.map_data[roomname][\"pos_translator\"],\n            single_color=self.single_color,\n            view_triangle_only_on_last=self.view_triangle_only_on_last,\n            disable_view_triangle=self.disable_view_triangle,\n            line_opacity=self.line_opacity,\n        )\n\n        fig, ax = plt.subplots(figsize=self.figsize)\n        ax.set_title(episode_id, fontsize=self.fontsize)\n        ax.imshow(self.crop_viz_image(im)[:, :, ::-1])\n        ax.axis(\"off\")\n\n        return fig\n\n\nclass ThorMultiViz(ThorViz):\n    def __init__(\n        self,\n        path_to_trajectory_prefix: Sequence[str] = (\"task_info\", \"followed_path\"),\n        agent_suffixes: Sequence[str] = (\"1\", \"2\"),\n        label: str = \"thor_trajectories\",\n        trajectory_start_end_color_strs: Sequence[Tuple[str, str]] = (\n            (\"red\", \"green\"),\n            (\"cyan\", \"purple\"),\n        ),\n        **kwargs,\n    ):\n        super().__init__(label=label, **kwargs)\n\n        self.path_to_trajectory_prefix = list(path_to_trajectory_prefix)\n        self.agent_suffixes = list(agent_suffixes)\n        self.trajectory_start_end_color_strs = list(trajectory_start_end_color_strs)\n\n    def make_fig(self, episode: Any, episode_id: str) -> Figure:\n        if self.thor_top_downs is None:\n            self.init_top_down_render()\n\n        roomname = \"_\".join(episode_id.split(\"_\")[:2])\n        im = self.thor_top_downs[roomname]\n\n        for agent, start_end_color in zip(\n            self.agent_suffixes, self.trajectory_start_end_color_strs\n        ):\n            path = self.path_to_trajectory_prefix[:]\n            path[-1] = path[-1] + agent\n            trajectory = self._access(episode, path)\n\n            im = self.visualize_agent_path(\n                trajectory,\n                im,\n                self.map_data[roomname][\"pos_translator\"],\n                single_color=self.single_color,\n                view_triangle_only_on_last=self.view_triangle_only_on_last,\n                disable_view_triangle=self.disable_view_triangle,\n                line_opacity=self.line_opacity,\n                trajectory_start_end_color_str=start_end_color,\n            )\n\n        fig, ax = plt.subplots(figsize=self.figsize)\n        ax.set_title(episode_id, fontsize=self.fontsize)\n        ax.imshow(self.crop_viz_image(im)[:, :, ::-1])\n        ax.axis(\"off\")\n\n        return fig\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/scripts/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.py",
    "content": "import os\n\nfrom allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import (\n    create_debug_dataset_from_train_dataset,\n)\n\nif __name__ == \"__main__\":\n    CURRENT_PATH = os.getcwd()\n\n    SCENE = \"FloorPlan1\"\n    TARGET = \"Apple\"\n    EPISODES = [0, 7, 11, 12]\n    BASE_OUT = os.path.join(CURRENT_PATH, \"datasets\", \"ithor-objectnav\", \"debug\")\n\n    create_debug_dataset_from_train_dataset(\n        scene=SCENE,\n        target_object_type=TARGET,\n        episodes_subset=EPISODES,\n        train_dataset_path=os.path.join(\n            CURRENT_PATH, \"datasets\", \"ithor-objectnav\", \"train\"\n        ),\n        base_debug_output_path=BASE_OUT,\n    )\n"
  },
  {
    "path": "allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.py",
    "content": "import os\n\nfrom allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import (\n    create_debug_dataset_from_train_dataset,\n)\n\nif __name__ == \"__main__\":\n    CURRENT_PATH = os.getcwd()\n    SCENE = \"FloorPlan1\"\n    EPISODES = [0, 7, 11, 12]\n    BASE_OUT = os.path.join(CURRENT_PATH, \"datasets\", \"ithor-pointnav\", \"debug\")\n\n    create_debug_dataset_from_train_dataset(\n        scene=SCENE,\n        target_object_type=None,\n        episodes_subset=EPISODES,\n        train_dataset_path=os.path.join(\n            CURRENT_PATH, \"datasets\", \"ithor-pointnav\", \"train\"\n        ),\n        base_debug_output_path=BASE_OUT,\n    )\n"
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/configs/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/data/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/extra_environment.yml",
    "content": "dependencies:\n  - patsy>=0.5.1\n  - pip\n  - pip:\n      - gym-minigrid>=1.0.1\n"
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/extra_requirements.txt",
    "content": "patsy>=0.5.1\ngym-minigrid>=1.0.1\n"
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/lighthouse_environment.py",
    "content": "import copy\nimport curses\nimport itertools\nimport time\nfrom functools import lru_cache\nfrom typing import Optional, Tuple, Any, List, Union, cast\n\nimport numpy as np\nfrom gym.utils import seeding\nfrom gym_minigrid import minigrid\n\nEMPTY = 0\nGOAL = 1\nWRONG_CORNER = 2\nWALL = 3\n\n\n@lru_cache(1000)\ndef _get_world_corners(world_dim: int, world_radius: int):\n    if world_radius == 0:\n        return ((0,) * world_dim,)\n\n    def combination_to_vec(comb) -> Tuple[int, ...]:\n        vec = [world_radius] * world_dim\n        for k in comb:\n            vec[k] *= -1\n        return tuple(vec)\n\n    return tuple(\n        sorted(\n            combination_to_vec(comb)\n            for i in range(world_dim + 1)\n            for comb in itertools.combinations(list(range(world_dim)), i)\n        )\n    )\n\n\n@lru_cache(1000)\ndef _base_world_tensor(world_dim: int, world_radius: int):\n    tensor = np.full((2 * world_radius + 1,) * world_dim, fill_value=EMPTY)\n\n    slices: List[Union[slice, int]] = [slice(0, 2 * world_radius + 1)] * world_dim\n    for i in range(world_dim):\n        tmp_slices = [*slices]\n        tmp_slices[i] = 0\n        tensor[tuple(tmp_slices)] = WALL\n        tmp_slices[i] = 2 * world_radius\n        tensor[tuple(tmp_slices)] = WALL\n\n    for corner in _get_world_corners(world_dim=world_dim, world_radius=world_radius):\n        tensor[tuple([loc + world_radius for loc in corner])] = WRONG_CORNER\n\n    return tensor\n\n\nclass LightHouseEnvironment(object):\n    EMPTY = 0\n    GOAL = 1\n    WRONG_CORNER = 2\n    WALL = 3\n    SPACE_LEVELS = [EMPTY, GOAL, WRONG_CORNER, WALL]\n\n    def __init__(self, world_dim: int, world_radius: int, **kwargs):\n        self.world_dim = world_dim\n        self.world_radius = world_radius\n\n        self.world_corners = np.array(\n            _get_world_corners(world_dim=world_dim, world_radius=world_radius),\n            dtype=int,\n        )\n\n        self.curses_screen: Optional[Any] = None\n\n        self.world_tensor: np.ndarray = copy.deepcopy(\n            _base_world_tensor(world_radius=world_radius, world_dim=world_dim)\n        )\n        self.current_position = np.zeros(world_dim, dtype=int)\n        self.closest_distance_to_corners = np.full(\n            2**world_dim, fill_value=world_radius, dtype=int\n        )\n        self.positions: List[Tuple[int, ...]] = [tuple(self.current_position)]\n        self.goal_position: Optional[np.ndarray] = None\n        self.last_action: Optional[int] = None\n\n        self.seed: Optional[int] = None\n        self.np_seeded_random_gen: Optional[np.random.RandomState] = None\n        self.set_seed(seed=int(kwargs.get(\"seed\", np.random.randint(0, 2**31 - 1))))\n\n        self.random_reset()\n\n    def set_seed(self, seed: int):\n        # More information about why `np_seeded_random_gen` is used rather than just `np.random.seed`\n        # can be found at gym/utils/seeding.py\n        # There's literature indicating that having linear correlations between seeds of multiple\n        # PRNG's can correlate the outputs\n        self.seed = seed\n        self.np_seeded_random_gen, _ = cast(\n            Tuple[np.random.RandomState, Any], seeding.np_random(self.seed)\n        )\n\n    def random_reset(self, goal_position: Optional[bool] = None):\n        self.last_action = None\n        self.world_tensor = copy.deepcopy(\n            _base_world_tensor(world_radius=self.world_radius, world_dim=self.world_dim)\n        )\n        if goal_position is None:\n            self.goal_position = self.world_corners[\n                self.np_seeded_random_gen.randint(low=0, high=len(self.world_corners))\n            ]\n        self.world_tensor[\n            tuple(cast(np.ndarray, self.world_radius + self.goal_position))\n        ] = GOAL\n\n        if self.curses_screen is not None:\n            curses.nocbreak()\n            self.curses_screen.keypad(False)\n            curses.echo()\n            curses.endwin()\n\n        self.curses_screen = None\n\n        self.current_position = np.zeros(self.world_dim, dtype=int)\n        self.closest_distance_to_corners = np.abs(\n            (self.world_corners - self.current_position.reshape(1, -1))\n        ).max(1)\n\n        self.positions = [tuple(self.current_position)]\n\n    def step(self, action: int) -> bool:\n        assert 0 <= action < 2 * self.world_dim\n        self.last_action = action\n\n        delta = -1 if action >= self.world_dim else 1\n        ind = action % self.world_dim\n        old = self.current_position[ind]\n        new = min(max(delta + old, -self.world_radius), self.world_radius)\n        if new == old:\n            self.positions.append(self.positions[-1])\n            return False\n        else:\n            self.current_position[ind] = new\n            self.closest_distance_to_corners = np.minimum(\n                np.abs((self.world_corners - self.current_position.reshape(1, -1))).max(\n                    1\n                ),\n                self.closest_distance_to_corners,\n            )\n            self.positions.append(tuple(self.current_position))\n            return True\n\n    def render(self, mode=\"array\", **kwargs):\n        if mode == \"array\":\n            arr = copy.deepcopy(self.world_tensor)\n            arr[tuple(self.world_radius + self.current_position)] = 9\n            return arr\n\n        elif mode == \"curses\":\n            if self.world_dim == 1:\n                space_list = [\"_\"] * (1 + 2 * self.world_radius)\n\n                goal_ind = self.goal_position[0] + self.world_radius\n                space_list[goal_ind] = \"G\"\n                space_list[2 * self.world_radius - goal_ind] = \"W\"\n                space_list[self.current_position[0] + self.world_radius] = \"X\"\n\n                to_print = \" \".join(space_list)\n\n                if self.curses_screen is None:\n                    self.curses_screen = curses.initscr()\n\n                self.curses_screen.addstr(0, 0, to_print)\n                if \"extra_text\" in kwargs:\n                    self.curses_screen.addstr(1, 0, kwargs[\"extra_text\"])\n                self.curses_screen.refresh()\n            elif self.world_dim == 2:\n                space_list = [\n                    [\"_\"] * (1 + 2 * self.world_radius)\n                    for _ in range(1 + 2 * self.world_radius)\n                ]\n\n                for row_ind in range(1 + 2 * self.world_radius):\n                    for col_ind in range(1 + 2 * self.world_radius):\n                        if self.world_tensor[row_ind][col_ind] == self.GOAL:\n                            space_list[row_ind][col_ind] = \"G\"\n\n                        if self.world_tensor[row_ind][col_ind] == self.WRONG_CORNER:\n                            space_list[row_ind][col_ind] = \"C\"\n\n                        if self.world_tensor[row_ind][col_ind] == self.WALL:\n                            space_list[row_ind][col_ind] = \"W\"\n\n                        if (\n                            (row_ind, col_ind)\n                            == self.world_radius + self.current_position\n                        ).all():\n                            space_list[row_ind][col_ind] = \"X\"\n\n                if self.curses_screen is None:\n                    self.curses_screen = curses.initscr()\n\n                for i, sl in enumerate(space_list):\n                    self.curses_screen.addstr(i, 0, \" \".join(sl))\n\n                self.curses_screen.addstr(len(space_list), 0, str(self.state()))\n                if \"extra_text\" in kwargs:\n                    self.curses_screen.addstr(\n                        len(space_list) + 1, 0, kwargs[\"extra_text\"]\n                    )\n\n                self.curses_screen.refresh()\n            else:\n                raise NotImplementedError(\"Cannot render worlds of > 2 dimensions.\")\n        elif mode == \"minigrid\":\n            height = width = 2 * self.world_radius + 2\n            grid = minigrid.Grid(width, height)\n\n            # Generate the surrounding walls\n            grid.horz_wall(0, 0)\n            grid.horz_wall(0, height - 1)\n            grid.vert_wall(0, 0)\n            grid.vert_wall(width - 1, 0)\n\n            # Place fake agent at the center\n            agent_pos = np.array(self.positions[-1]) + 1 + self.world_radius\n            # grid.set(*agent_pos, None)\n            agent = minigrid.Goal()\n            agent.color = \"red\"\n            grid.set(agent_pos[0], agent_pos[1], agent)\n            agent.init_pos = tuple(agent_pos)\n            agent.cur_pos = tuple(agent_pos)\n\n            goal_pos = self.goal_position + self.world_radius\n\n            goal = minigrid.Goal()\n            grid.set(goal_pos[0], goal_pos[1], goal)\n            goal.init_pos = tuple(goal_pos)\n            goal.cur_pos = tuple(goal_pos)\n\n            highlight_mask = np.zeros((height, width), dtype=bool)\n\n            minx, maxx = max(1, agent_pos[0] - 5), min(height - 1, agent_pos[0] + 5)\n            miny, maxy = max(1, agent_pos[1] - 5), min(height - 1, agent_pos[1] + 5)\n            highlight_mask[minx : (maxx + 1), miny : (maxy + 1)] = True\n\n            img = grid.render(\n                minigrid.TILE_PIXELS, agent_pos, None, highlight_mask=highlight_mask\n            )\n\n            return img\n\n        else:\n            raise NotImplementedError(\"Unknown render mode {}.\".format(mode))\n\n        time.sleep(0.0 if \"sleep_time\" not in kwargs else kwargs[\"sleep_time\"])\n\n    def close(self):\n        if self.curses_screen is not None:\n            curses.nocbreak()\n            self.curses_screen.keypad(False)\n            curses.echo()\n            curses.endwin()\n\n    @staticmethod\n    def optimal_ave_ep_length(world_dim: int, world_radius: int, view_radius: int):\n        if world_dim == 1:\n            max_steps_wrong_dir = max(world_radius - view_radius, 0)\n\n            return max_steps_wrong_dir + world_radius\n\n        elif world_dim == 2:\n            tau = 2 * (world_radius - view_radius)\n\n            average_steps_needed = 0.25 * (4 * 2 * view_radius + 10 * tau)\n\n            return average_steps_needed\n        else:\n            raise NotImplementedError(\n                \"`optimal_average_ep_length` is only implemented\"\n                \" for when the `world_dim` is 1 or 2 ({} given).\".format(world_dim)\n            )\n"
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/lighthouse_models.py",
    "content": "from typing import Optional, Tuple, cast\n\nimport gym\nimport torch\nimport torch.nn as nn\nfrom gym.spaces.dict import Dict as SpaceDict\n\nfrom allenact.algorithms.onpolicy_sync.policy import (\n    ActorCriticModel,\n    Memory,\n    ObservationType,\n)\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput, DistributionType\n\n\nclass LinearAdvisorActorCritic(ActorCriticModel[CategoricalDistr]):\n    def __init__(\n        self,\n        input_uuid: str,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        ensure_same_init_aux_weights: bool = True,\n    ):\n        super().__init__(action_space=action_space, observation_space=observation_space)\n\n        assert (\n            input_uuid in observation_space.spaces\n        ), \"LinearActorCritic expects only a single observational input.\"\n        self.input_uuid = input_uuid\n\n        box_space: gym.spaces.Box = observation_space[self.input_uuid]\n        assert isinstance(box_space, gym.spaces.Box), (\n            \"LinearActorCritic requires that\"\n            \"observation space corresponding to the input key is a Box space.\"\n        )\n        assert len(box_space.shape) == 1\n        self.in_dim = box_space.shape[0]\n\n        self.num_actions = action_space.n\n        self.linear = nn.Linear(self.in_dim, 2 * self.num_actions + 1)\n\n        nn.init.orthogonal_(self.linear.weight)\n        if ensure_same_init_aux_weights:\n            # Ensure main actor / auxiliary actor start with the same weights\n            self.linear.weight.data[self.num_actions : -1, :] = self.linear.weight[\n                : self.num_actions, :\n            ]\n        nn.init.constant_(self.linear.bias, 0)\n\n    # noinspection PyMethodMayBeStatic\n    def _recurrent_memory_specification(self):\n        return None\n\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        out = self.linear(cast(torch.Tensor, observations[self.input_uuid]))\n\n        main_logits = out[..., : self.num_actions]\n        aux_logits = out[..., self.num_actions : -1]\n        values = out[..., -1:]\n\n        # noinspection PyArgumentList\n        return (\n            ActorCriticOutput(\n                distributions=cast(\n                    DistributionType, CategoricalDistr(logits=main_logits)\n                ),  # step x sampler x ...\n                values=cast(\n                    torch.FloatTensor, values.view(values.shape[:2] + (-1,))\n                ),  # step x sampler x flattened\n                extras={\"auxiliary_distributions\": CategoricalDistr(logits=aux_logits)},\n            ),\n            None,\n        )\n"
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/lighthouse_sensors.py",
    "content": "import itertools\nfrom typing import Any, Dict, Optional, Tuple, Sequence\n\nimport gym\nimport numpy as np\nimport pandas as pd\nimport patsy\n\nfrom allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super\nfrom allenact.base_abstractions.task import Task\nfrom allenact_plugins.lighthouse_plugin.lighthouse_environment import (\n    LightHouseEnvironment,\n)\n\n\ndef get_corner_observation(\n    env: LightHouseEnvironment,\n    view_radius: int,\n    view_corner_offsets: Optional[np.array],\n):\n    if view_corner_offsets is None:\n        view_corner_offsets = view_radius * (2 * (env.world_corners > 0) - 1)\n\n    world_corners_offset = env.world_corners + env.world_radius\n    multidim_view_corner_indices = np.clip(\n        np.reshape(env.current_position, (1, -1))\n        + view_corner_offsets\n        + env.world_radius,\n        a_min=0,\n        a_max=2 * env.world_radius,\n    )\n    flat_view_corner_indices = np.ravel_multi_index(\n        np.transpose(multidim_view_corner_indices), env.world_tensor.shape\n    )\n    view_values = env.world_tensor.reshape(-1)[flat_view_corner_indices]\n\n    last_action = 2 * env.world_dim if env.last_action is None else env.last_action\n    on_border_bools = np.concatenate(\n        (\n            env.current_position == env.world_radius,\n            env.current_position == -env.world_radius,\n        ),\n        axis=0,\n    )\n\n    if last_action == 2 * env.world_dim or on_border_bools[last_action]:\n        on_border_value = last_action\n    elif on_border_bools.any():\n        on_border_value = np.argwhere(on_border_bools).reshape(-1)[0]\n    else:\n        on_border_value = 2 * env.world_dim\n\n    seen_mask = np.array(env.closest_distance_to_corners <= view_radius, dtype=int)\n    seen_corner_values = (\n        env.world_tensor.reshape(-1)[\n            np.ravel_multi_index(\n                np.transpose(world_corners_offset), env.world_tensor.shape\n            )\n        ]\n        * seen_mask\n    )\n\n    return np.concatenate(\n        (\n            seen_corner_values + view_values * (1 - seen_mask),\n            [on_border_value, last_action],\n        ),\n        axis=0,\n        out=np.zeros(\n            (seen_corner_values.shape[0] + 2,),\n            dtype=np.float32,\n        ),\n    )\n\n\nclass CornerSensor(Sensor[LightHouseEnvironment, Any]):\n    def __init__(\n        self,\n        view_radius: int,\n        world_dim: int,\n        uuid: str = \"corner_fixed_radius\",\n        **kwargs: Any\n    ):\n        self.view_radius = view_radius\n        self.world_dim = world_dim\n        self.view_corner_offsets: Optional[np.ndarray] = None\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self):\n        return gym.spaces.Box(\n            low=min(LightHouseEnvironment.SPACE_LEVELS),\n            high=max(LightHouseEnvironment.SPACE_LEVELS),\n            shape=(2**self.world_dim + 2,),\n            dtype=int,\n        )\n\n    def get_observation(\n        self,\n        env: LightHouseEnvironment,\n        task: Optional[Task],\n        *args: Any,\n        **kwargs: Any\n    ) -> Any:\n        if self.view_corner_offsets is None:\n            self.view_corner_offsets = self.view_radius * (\n                2 * (env.world_corners > 0) - 1\n            )\n\n        return get_corner_observation(\n            env=env,\n            view_radius=self.view_radius,\n            view_corner_offsets=self.view_corner_offsets,\n        )\n\n\nclass FactorialDesignCornerSensor(Sensor[LightHouseEnvironment, Any]):\n    _DESIGN_MAT_CACHE: Dict[Tuple, Any] = {}\n\n    def __init__(\n        self,\n        view_radius: int,\n        world_dim: int,\n        degree: int,\n        uuid: str = \"corner_fixed_radius_categorical\",\n        **kwargs: Any\n    ):\n        self.view_radius = view_radius\n        self.world_dim = world_dim\n        self.degree = degree\n\n        if self.world_dim > 2:\n            raise NotImplementedError(\n                \"When using the `FactorialDesignCornerSensor`,\"\n                \"`world_dim` must be <= 2 due to memory constraints.\"\n                \"In the current implementation, creating the design\"\n                \"matrix in the `world_dim == 3` case would require\"\n                \"instantiating a matrix of size ~ 3Mx3M (9 trillion entries).\"\n            )\n\n        self.view_corner_offsets: Optional[np.ndarray] = None\n        # self.world_corners_offset: Optional[List[typing.Tuple[int, ...]]] = None\n\n        self.corner_sensor = CornerSensor(self.view_radius, self.world_dim)\n\n        self.variables_and_levels = self._get_variables_and_levels(\n            world_dim=self.world_dim\n        )\n        self._design_mat_formula = self._create_formula(\n            variables_and_levels=self._get_variables_and_levels(\n                world_dim=self.world_dim\n            ),\n            degree=self.degree,\n        )\n        self.single_row_df = pd.DataFrame(\n            data=[[0] * len(self.variables_and_levels)],\n            columns=[x[0] for x in self.variables_and_levels],\n        )\n        self._view_tuple_to_design_array: Dict[Tuple[int, ...], np.ndarray] = {}\n\n        (\n            design_matrix,\n            tuple_to_ind,\n        ) = self._create_full_design_matrix_and_tuple_to_ind_dict(\n            variables_and_levels=tuple(self.variables_and_levels), degree=self.degree\n        )\n\n        self.design_matrix = design_matrix\n        self.tuple_to_ind = tuple_to_ind\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self):\n        return gym.spaces.Box(\n            low=min(LightHouseEnvironment.SPACE_LEVELS),\n            high=max(LightHouseEnvironment.SPACE_LEVELS),\n            shape=(\n                len(\n                    self.view_tuple_to_design_array(\n                        (0,) * len(self.variables_and_levels)\n                    )\n                ),\n            ),\n            dtype=int,\n        )\n\n    def view_tuple_to_design_array(self, view_tuple: Tuple):\n        return np.array(\n            self.design_matrix[self.tuple_to_ind[view_tuple], :], dtype=np.float32\n        )\n\n    @classmethod\n    def output_dim(cls, world_dim: int):\n        return ((3 if world_dim == 1 else 4) ** (2**world_dim)) * (\n            2 * world_dim + 1\n        ) ** 2\n\n    @classmethod\n    def _create_full_design_matrix_and_tuple_to_ind_dict(\n        cls, variables_and_levels: Sequence[Tuple[str, Sequence[int]]], degree: int\n    ):\n        variables_and_levels = tuple((x, tuple(y)) for x, y in variables_and_levels)\n        key = (variables_and_levels, degree)\n        if key not in cls._DESIGN_MAT_CACHE:\n            all_tuples = [\n                tuple(x)\n                for x in itertools.product(\n                    *[levels for _, levels in variables_and_levels]\n                )\n            ]\n\n            tuple_to_ind = {}\n            for i, t in enumerate(all_tuples):\n                tuple_to_ind[t] = i\n\n            df = pd.DataFrame(\n                data=all_tuples,\n                columns=[var_name for var_name, _ in variables_and_levels],\n            )\n\n            cls._DESIGN_MAT_CACHE[key] = (\n                np.array(\n                    1.0\n                    * patsy.dmatrix(\n                        cls._create_formula(\n                            variables_and_levels=variables_and_levels, degree=degree\n                        ),\n                        data=df,\n                    ),\n                    dtype=bool,\n                ),\n                tuple_to_ind,\n            )\n        return cls._DESIGN_MAT_CACHE[key]\n\n    @staticmethod\n    def _get_variables_and_levels(world_dim: int):\n        return (\n            [\n                (\"s{}\".format(i), list(range(3 if world_dim == 1 else 4)))\n                for i in range(2**world_dim)\n            ]\n            + [(\"b{}\".format(i), list(range(2 * world_dim + 1))) for i in range(1)]\n            + [(\"a{}\".format(i), list(range(2 * world_dim + 1))) for i in range(1)]\n        )\n\n    @classmethod\n    def _create_formula(\n        cls, variables_and_levels: Sequence[Tuple[str, Sequence[int]]], degree: int\n    ):\n        def make_categorial(var_name, levels):\n            return \"C({}, levels={})\".format(var_name, levels)\n\n        if degree == -1:\n            return \":\".join(\n                make_categorial(var_name, levels)\n                for var_name, levels in variables_and_levels\n            )\n        else:\n            return \"({})**{}\".format(\n                \"+\".join(\n                    make_categorial(var_name, levels)\n                    for var_name, levels in variables_and_levels\n                ),\n                degree,\n            )\n\n    def get_observation(\n        self,\n        env: LightHouseEnvironment,\n        task: Optional[Task],\n        *args: Any,\n        **kwargs: Any\n    ) -> Any:\n        kwargs[\"as_tuple\"] = True\n        view_array = self.corner_sensor.get_observation(env, task, *args, **kwargs)\n        return self.view_tuple_to_design_array(tuple(view_array))\n"
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/lighthouse_tasks.py",
    "content": "import abc\nimport string\nfrom typing import List, Dict, Any, Optional, Tuple, Union, Sequence, cast\n\nimport gym\nimport numpy as np\nfrom gym.utils import seeding\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import Sensor, SensorSuite\nfrom allenact.base_abstractions.task import Task, TaskSampler\nfrom allenact.utils.experiment_utils import set_seed\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.lighthouse_plugin.lighthouse_environment import (\n    LightHouseEnvironment,\n)\nfrom allenact_plugins.lighthouse_plugin.lighthouse_sensors import get_corner_observation\n\nDISCOUNT_FACTOR = 0.99\nSTEP_PENALTY = -0.01\nFOUND_TARGET_REWARD = 1.0\n\n\nclass LightHouseTask(Task[LightHouseEnvironment], abc.ABC):\n    \"\"\"Defines an abstract embodied task in the light house gridworld.\n\n    # Attributes\n\n    env : The light house environment.\n    sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer.\n    task_info : Dictionary of (k, v) pairs defining task goals and other task information.\n    max_steps : The maximum number of steps an agent can take an in the task before it is considered failed.\n    observation_space: The observation space returned on each step from the sensors.\n    \"\"\"\n\n    def __init__(\n        self,\n        env: LightHouseEnvironment,\n        sensors: Union[SensorSuite, List[Sensor]],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        **kwargs,\n    ) -> None:\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n\n        self._last_action: Optional[int] = None\n\n    @property\n    def last_action(self) -> int:\n        return self._last_action\n\n    @last_action.setter\n    def last_action(self, value: int):\n        self._last_action = value\n\n    def step(self, action: Union[int, Sequence[int]]) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        self.last_action = action\n        return super(LightHouseTask, self).step(action=action)\n\n    def render(self, mode: str = \"array\", *args, **kwargs) -> np.ndarray:\n        if mode == \"array\":\n            return self.env.render(mode, **kwargs)\n        elif mode in [\"rgb\", \"rgb_array\", \"human\"]:\n            arr = self.env.render(\"array\", **kwargs)\n            colors = np.array(\n                [\n                    (31, 119, 180),\n                    (255, 127, 14),\n                    (44, 160, 44),\n                    (214, 39, 40),\n                    (148, 103, 189),\n                    (140, 86, 75),\n                    (227, 119, 194),\n                    (127, 127, 127),\n                    (188, 189, 34),\n                    (23, 190, 207),\n                ],\n                dtype=np.uint8,\n            )\n            return colors[arr]\n        else:\n            raise NotImplementedError(\"Render mode '{}' is not supported.\".format(mode))\n\n\nclass FindGoalLightHouseTask(LightHouseTask):\n    _CACHED_ACTION_NAMES: Dict[int, Tuple[str, ...]] = {}\n\n    def __init__(\n        self,\n        env: LightHouseEnvironment,\n        sensors: Union[SensorSuite, List[Sensor]],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        **kwargs,\n    ):\n        super().__init__(env, sensors, task_info, max_steps, **kwargs)\n\n        self._found_target = False\n\n    @property\n    def action_space(self) -> gym.spaces.Discrete:\n        return gym.spaces.Discrete(2 * self.env.world_dim)\n\n    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        self.env.step(action)\n        reward = STEP_PENALTY\n\n        if np.all(self.env.current_position == self.env.goal_position):\n            self._found_target = True\n            reward += FOUND_TARGET_REWARD\n        elif self.num_steps_taken() == self.max_steps - 1:\n            reward = STEP_PENALTY / (1 - DISCOUNT_FACTOR)\n\n        return RLStepResult(\n            observation=self.get_observations(),\n            reward=reward,\n            done=self.is_done(),\n            info=None,\n        )\n\n    def reached_terminal_state(self) -> bool:\n        return self._found_target\n\n    @classmethod\n    def class_action_names(cls, world_dim: int = 2, **kwargs) -> Tuple[str, ...]:\n        assert 1 <= world_dim <= 26, \"Too many dimensions.\"\n        if world_dim not in cls._CACHED_ACTION_NAMES:\n            action_names = [\n                \"{}(+1)\".format(string.ascii_lowercase[i] for i in range(world_dim))\n            ]\n            action_names.extend(\n                \"{}(-1)\".format(string.ascii_lowercase[i] for i in range(world_dim))\n            )\n            cls._CACHED_ACTION_NAMES[world_dim] = tuple(action_names)\n\n        return cls._CACHED_ACTION_NAMES[world_dim]\n\n    def action_names(self) -> Tuple[str, ...]:\n        return self.class_action_names(world_dim=self.env.world_dim)\n\n    def close(self) -> None:\n        pass\n\n    def query_expert(\n        self,\n        expert_view_radius: int,\n        return_policy: bool = False,\n        deterministic: bool = False,\n        **kwargs,\n    ) -> Tuple[Any, bool]:\n        view_tuple = get_corner_observation(\n            env=self.env,\n            view_radius=expert_view_radius,\n            view_corner_offsets=None,\n        )\n\n        goal = self.env.GOAL\n        wrong = self.env.WRONG_CORNER\n\n        if self.env.world_dim == 1:\n            left_view, right_view, hitting, last_action = view_tuple\n\n            left = 1\n            right = 0\n\n            expert_action: Optional[int] = None\n            policy: Optional[np.ndarray] = None\n\n            if left_view == goal:\n                expert_action = left\n            elif right_view == goal:\n                expert_action = right\n            elif hitting != 2 * self.env.world_dim:\n                expert_action = left if last_action == right else right\n            elif left_view == wrong:\n                expert_action = right\n            elif right_view == wrong:\n                expert_action = left\n            elif last_action == 2 * self.env.world_dim:\n                policy = np.array([0.5, 0.5])\n            else:\n                expert_action = last_action\n\n            if policy is None:\n                policy = np.array([expert_action == right, expert_action == left])\n\n        elif self.env.world_dim == 2:\n\n            tl, tr, bl, br, hitting, last_action = view_tuple\n\n            wall = self.env.WALL\n\n            d, r, u, l, none = 0, 1, 2, 3, 4\n\n            if tr == goal:\n                if hitting != r:\n                    expert_action = r\n                else:\n                    expert_action = u\n            elif br == goal:\n                if hitting != d:\n                    expert_action = d\n                else:\n                    expert_action = r\n            elif bl == goal:\n                if hitting != l:\n                    expert_action = l\n                else:\n                    expert_action = d\n            elif tl == goal:\n                if hitting != u:\n                    expert_action = u\n                else:\n                    expert_action = l\n\n            elif tr == wrong and not any(x == wrong for x in [br, bl, tl]):\n                expert_action = l\n            elif br == wrong and not any(x == wrong for x in [bl, tl, tr]):\n                expert_action = u\n            elif bl == wrong and not any(x == wrong for x in [tl, tr, br]):\n                expert_action = r\n            elif tl == wrong and not any(x == wrong for x in [tr, br, bl]):\n                expert_action = d\n\n            elif all(x == wrong for x in [tr, br]) and not any(\n                x == wrong for x in [bl, tl]\n            ):\n                expert_action = l\n            elif all(x == wrong for x in [br, bl]) and not any(\n                x == wrong for x in [tl, tr]\n            ):\n                expert_action = u\n\n            elif all(x == wrong for x in [bl, tl]) and not any(\n                x == wrong for x in [tr, br]\n            ):\n                expert_action = r\n            elif all(x == wrong for x in [tl, tr]) and not any(\n                x == wrong for x in [br, bl]\n            ):\n                expert_action = d\n\n            elif hitting != none and tr == br == bl == tl:\n                # Only possible if in 0 vis setting\n                if tr == self.env.WRONG_CORNER or last_action == hitting:\n                    if last_action == r:\n                        expert_action = u\n                    elif last_action == u:\n                        expert_action = l\n                    elif last_action == l:\n                        expert_action = d\n                    elif last_action == d:\n                        expert_action = r\n                    else:\n                        raise NotImplementedError()\n                else:\n                    expert_action = last_action\n\n            elif last_action == r and tr == wall:\n                expert_action = u\n\n            elif last_action == u and tl == wall:\n                expert_action = l\n\n            elif last_action == l and bl == wall:\n                expert_action = d\n\n            elif last_action == d and br == wall:\n                expert_action = r\n\n            elif last_action == none:\n                expert_action = r\n\n            else:\n                expert_action = last_action\n\n            policy = np.array(\n                [\n                    expert_action == d,\n                    expert_action == r,\n                    expert_action == u,\n                    expert_action == l,\n                ]\n            )\n        else:\n            raise NotImplementedError(\"Can only query expert for world dims of 1 or 2.\")\n\n        if return_policy:\n            return policy, True\n        elif deterministic:\n            return int(np.argmax(policy)), True\n        else:\n            return (\n                int(np.argmax(np.random.multinomial(1, policy / (1.0 * policy.sum())))),\n                True,\n            )\n\n\nclass FindGoalLightHouseTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        world_dim: int,\n        world_radius: int,\n        sensors: Union[SensorSuite, List[Sensor]],\n        max_steps: int,\n        max_tasks: Optional[int] = None,\n        num_unique_seeds: Optional[int] = None,\n        task_seeds_list: Optional[List[int]] = None,\n        deterministic_sampling: bool = False,\n        seed: Optional[int] = None,\n        **kwargs,\n    ):\n        self.env = LightHouseEnvironment(world_dim=world_dim, world_radius=world_radius)\n\n        self._last_sampled_task: Optional[FindGoalLightHouseTask] = None\n        self.sensors = (\n            SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors\n        )\n        self.max_steps = max_steps\n        self.max_tasks = max_tasks\n        self.num_tasks_generated = 0\n        self.deterministic_sampling = deterministic_sampling\n\n        self.num_unique_seeds = num_unique_seeds\n        self.task_seeds_list = task_seeds_list\n        assert (self.num_unique_seeds is None) or (\n            0 < self.num_unique_seeds\n        ), \"`num_unique_seeds` must be a positive integer.\"\n\n        self.num_unique_seeds = num_unique_seeds\n        self.task_seeds_list = task_seeds_list\n        if self.task_seeds_list is not None:\n            if self.num_unique_seeds is not None:\n                assert self.num_unique_seeds == len(\n                    self.task_seeds_list\n                ), \"`num_unique_seeds` must equal the length of `task_seeds_list` if both specified.\"\n            self.num_unique_seeds = len(self.task_seeds_list)\n        elif self.num_unique_seeds is not None:\n            self.task_seeds_list = list(range(self.num_unique_seeds))\n\n        assert (not deterministic_sampling) or (\n            self.num_unique_seeds is not None\n        ), \"Cannot use deterministic sampling when `num_unique_seeds` is `None`.\"\n\n        if (not deterministic_sampling) and self.max_tasks:\n            get_logger().warning(\n                \"`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,\"\n                \" this might be a mistake when running testing.\"\n            )\n\n        self.seed: int = int(\n            seed if seed is not None else np.random.randint(0, 2**31 - 1)\n        )\n        self.np_seeded_random_gen: Optional[np.random.RandomState] = None\n        self.set_seed(self.seed)\n\n    @property\n    def world_dim(self):\n        return self.env.world_dim\n\n    @property\n    def world_radius(self):\n        return self.env.world_radius\n\n    @property\n    def length(self) -> Union[int, float]:\n        return (\n            float(\"inf\")\n            if self.max_tasks is None\n            else self.max_tasks - self.num_tasks_generated\n        )\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        n = 2**self.world_dim\n        return n if self.num_unique_seeds is None else min(n, self.num_unique_seeds)\n\n    @property\n    def last_sampled_task(self) -> Optional[Task]:\n        return self._last_sampled_task\n\n    def next_task(self, force_advance_scene: bool = False) -> Optional[Task]:\n        if self.length <= 0:\n            return None\n\n        if self.num_unique_seeds is not None:\n            if self.deterministic_sampling:\n                seed = self.task_seeds_list[\n                    self.num_tasks_generated % len(self.task_seeds_list)\n                ]\n            else:\n                seed = self.np_seeded_random_gen.choice(self.task_seeds_list)\n        else:\n            seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)\n\n        self.num_tasks_generated += 1\n\n        self.env.set_seed(seed)\n        self.env.random_reset()\n        return FindGoalLightHouseTask(\n            env=self.env, sensors=self.sensors, task_info={}, max_steps=self.max_steps\n        )\n\n    def close(self) -> None:\n        pass\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        return True\n\n    def reset(self) -> None:\n        self.num_tasks_generated = 0\n        self.set_seed(seed=self.seed)\n\n    def set_seed(self, seed: int) -> None:\n        set_seed(seed)\n        self.np_seeded_random_gen, _ = seeding.np_random(seed)\n        self.seed = seed\n"
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/lighthouse_util.py",
    "content": "import numpy as np\n\nfrom allenact.utils.experiment_utils import EarlyStoppingCriterion, ScalarMeanTracker\n\n\nclass StopIfNearOptimal(EarlyStoppingCriterion):\n    def __init__(self, optimal: float, deviation: float, min_memory_size: int = 100):\n        self.optimal = optimal\n        self.deviation = deviation\n\n        self.current_pos = 0\n        self.has_filled = False\n        self.memory: np.ndarray = np.zeros(min_memory_size)\n\n    def __call__(\n        self,\n        stage_steps: int,\n        total_steps: int,\n        training_metrics: ScalarMeanTracker,\n    ) -> bool:\n        sums = training_metrics.sums()\n        counts = training_metrics.counts()\n\n        k = \"ep_length\"\n        if k in sums:\n            count = counts[k]\n            ep_length_ave = sums[k] / count\n\n            n = self.memory.shape[0]\n            if count >= n:\n                if count > n:\n                    # Increase memory size to fit all of the new values\n                    self.memory = np.full(count, fill_value=ep_length_ave)\n                else:\n                    # We have exactly as many values as the memory size,\n                    # simply set the whole memory to be equal to the new\n                    # average ep length.\n                    self.memory[:] = ep_length_ave\n                self.current_pos = 0\n                self.has_filled = True\n            else:\n                self.memory[self.current_pos : (self.current_pos + count)] = (\n                    ep_length_ave\n                )\n\n                if self.current_pos + count > n:\n                    self.has_filled = True\n                    self.current_pos = self.current_pos + count % n\n                    self.memory[: self.current_pos] = ep_length_ave\n\n        if not self.has_filled:\n            return False\n\n        return self.memory.mean() < self.optimal + self.deviation\n"
  },
  {
    "path": "allenact_plugins/lighthouse_plugin/scripts/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/__init__.py",
    "content": "from allenact.utils.system import ImportChecker\n\nwith ImportChecker(\n    \"Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`).\"\n):\n    # noinspection PyUnresolvedReferences\n    import ai2thor\n"
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/arm_calculation_utils.py",
    "content": "\"\"\"Utility classes and functions for calculating the arm relative and absolute\nposition.\"\"\"\n\nfrom typing import Dict\n\nimport numpy as np\nimport torch\nfrom scipy.spatial.transform import Rotation as R\n\nfrom allenact.utils.system import get_logger\n\n\ndef state_dict_to_tensor(state: Dict):\n    result = []\n    if \"position\" in state:\n        result += [\n            state[\"position\"][\"x\"],\n            state[\"position\"][\"y\"],\n            state[\"position\"][\"z\"],\n        ]\n    if \"rotation\" in state:\n        result += [\n            state[\"rotation\"][\"x\"],\n            state[\"rotation\"][\"y\"],\n            state[\"rotation\"][\"z\"],\n        ]\n    return torch.Tensor(result)\n\n\ndef diff_position(state_goal, state_curr, absolute: bool = True):\n    p1 = state_goal[\"position\"]\n    p2 = state_curr[\"position\"]\n    if absolute:\n        result = {k: abs(p1[k] - p2[k]) for k in p1.keys()}\n    else:\n        result = {k: (p1[k] - p2[k]) for k in p1.keys()}\n    return result\n\n\ndef coord_system_transform(position: Dict, coord_system: str):\n    assert coord_system in [\n        \"xyz_unsigned\",\n        \"xyz_signed\",\n        \"polar_radian\",\n        \"polar_trigo\",\n    ]\n\n    if \"xyz\" in coord_system:\n        result = [\n            position[\"x\"],\n            position[\"y\"],\n            position[\"z\"],\n        ]\n        result = torch.Tensor(result)\n        if coord_system == \"xyz_unsigned\":\n            return torch.abs(result)\n        else:  # xyz_signed\n            return result\n\n    else:\n        hxy = np.hypot(position[\"x\"], position[\"y\"])\n        r = np.hypot(hxy, position[\"z\"])\n        el = np.arctan2(position[\"z\"], hxy)  # elevation angle: [-pi/2, pi/2]\n        az = np.arctan2(position[\"y\"], position[\"x\"])  # azimuthal angle: [-pi, pi]\n\n        if coord_system == \"polar_radian\":\n            result = [\n                r,\n                el / (0.5 * np.pi),\n                az / np.pi,\n            ]  # normalize to [-1, 1]\n            return torch.Tensor(result)\n        else:  # polar_trigo\n            result = [\n                r,\n                np.cos(el),\n                np.sin(el),\n                np.cos(az),\n                np.sin(az),\n            ]\n            return torch.Tensor(result)\n\n\ndef position_rotation_to_matrix(position, rotation):\n    result = np.zeros((4, 4))\n    r = R.from_euler(\"xyz\", [rotation[\"x\"], rotation[\"y\"], rotation[\"z\"]], degrees=True)\n    result[:3, :3] = r.as_matrix()\n    result[3, 3] = 1\n    result[:3, 3] = [position[\"x\"], position[\"y\"], position[\"z\"]]\n    return result\n\n\ndef inverse_rot_trans_matrix(mat):\n    mat = np.linalg.inv(mat)\n    return mat\n\n\ndef matrix_to_position_rotation(matrix):\n    result = {\"position\": None, \"rotation\": None}\n    rotation = R.from_matrix(matrix[:3, :3]).as_euler(\"xyz\", degrees=True)\n    rotation_dict = {\"x\": rotation[0], \"y\": rotation[1], \"z\": rotation[2]}\n    result[\"rotation\"] = rotation_dict\n    position = matrix[:3, 3]\n    result[\"position\"] = {\"x\": position[0], \"y\": position[1], \"z\": position[2]}\n    return result\n\n\ndef find_closest_inverse(deg, use_cache):\n    if use_cache:\n        for k in _saved_inverse_rotation_mats.keys():\n            if abs(k - deg) < 5:\n                return _saved_inverse_rotation_mats[k]\n    # if it reaches here it means it had not calculated the degree before\n    rotation = R.from_euler(\"xyz\", [0, deg, 0], degrees=True)\n    result = rotation.as_matrix()\n    inverse = inverse_rot_trans_matrix(result)\n    if use_cache:\n        get_logger().warning(f\"Had to calculate the matrix for {deg}\")\n    return inverse\n\n\ndef calc_inverse(deg):\n    rotation = R.from_euler(\"xyz\", [0, deg, 0], degrees=True)\n    result = rotation.as_matrix()\n    inverse = inverse_rot_trans_matrix(result)\n    return inverse\n\n\n_saved_inverse_rotation_mats = {i: calc_inverse(i) for i in range(0, 360, 45)}\n_saved_inverse_rotation_mats[360] = _saved_inverse_rotation_mats[0]\n\n\ndef world_coords_to_agent_coords(world_obj, agent_state, use_cache=True):\n    position = agent_state[\"position\"]\n    rotation = agent_state[\"rotation\"]\n    agent_translation = [position[\"x\"], position[\"y\"], position[\"z\"]]\n    assert abs(rotation[\"x\"]) < 0.01 and abs(rotation[\"z\"]) < 0.01\n    inverse_agent_rotation = find_closest_inverse(rotation[\"y\"], use_cache=use_cache)\n    obj_matrix = position_rotation_to_matrix(\n        world_obj[\"position\"], world_obj[\"rotation\"]\n    )\n    obj_translation = np.matmul(\n        inverse_agent_rotation, (obj_matrix[:3, 3] - agent_translation)\n    )\n    # add rotation later\n    obj_matrix[:3, 3] = obj_translation\n    result = matrix_to_position_rotation(obj_matrix)\n    return result\n"
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/armpointnav_constants.py",
    "content": "import json\nimport os\nfrom typing import Dict, Optional, Any\n\nfrom constants import ABS_PATH_OF_TOP_LEVEL_DIR\n\nTRAIN_OBJECTS = [\"Apple\", \"Bread\", \"Tomato\", \"Lettuce\", \"Pot\", \"Mug\"]\nTEST_OBJECTS = [\"Potato\", \"SoapBottle\", \"Pan\", \"Egg\", \"Spatula\", \"Cup\"]\nMOVE_ARM_CONSTANT = 0.05\nMOVE_ARM_HEIGHT_CONSTANT = MOVE_ARM_CONSTANT\nUNWANTED_MOVE_THR = 0.01\nDISTANCE_EPS = 1e-9\nDISTANCE_MAX = 10.0\n\ndataset_json_file = os.path.join(\n    ABS_PATH_OF_TOP_LEVEL_DIR, \"datasets\", \"apnd-dataset\", \"starting_pose.json\"\n)\n\n_ARM_START_POSITIONS: Optional[Dict[str, Any]] = None\n\n\ndef get_agent_start_positions():\n    global _ARM_START_POSITIONS\n    if _ARM_START_POSITIONS is not None:\n        try:\n            with open(dataset_json_file) as f:\n                _ARM_START_POSITIONS = json.load(f)\n        except Exception:\n            raise Exception(f\"Dataset not found in {dataset_json_file}\")\n\n    return _ARM_START_POSITIONS\n"
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/manipulathor_constants.py",
    "content": "\"\"\"Constant values and hyperparameters that are used by the environment.\"\"\"\n\nimport ai2thor.fifo_server\n\n\nARM_MIN_HEIGHT = 0.450998873\nARM_MAX_HEIGHT = 1.8009994\n\n\nADDITIONAL_ARM_ARGS = {\n    \"disableRendering\": True,\n    \"returnToStart\": True,\n    \"speed\": 1,\n}\n\nMOVE_AHEAD = \"MoveAheadContinuous\"\nMOVE_BACK = \"MoveBackContinuous\"\nROTATE_LEFT = \"RotateLeftContinuous\"\nROTATE_RIGHT = \"RotateRightContinuous\"\nMOVE_ARM_HEIGHT_P = \"MoveArmHeightP\"\nMOVE_ARM_HEIGHT_M = \"MoveArmHeightM\"\nMOVE_ARM_X_P = \"MoveArmXP\"\nMOVE_ARM_X_M = \"MoveArmXM\"\nMOVE_ARM_Y_P = \"MoveArmYP\"\nMOVE_ARM_Y_M = \"MoveArmYM\"\nMOVE_ARM_Z_P = \"MoveArmZP\"\nMOVE_ARM_Z_M = \"MoveArmZM\"\nROTATE_WRIST_PITCH_P = \"RotateArmWristPitchP\"\nROTATE_WRIST_PITCH_M = \"RotateArmWristPitchM\"\nROTATE_WRIST_YAW_P = \"RotateArmWristYawP\"\nROTATE_WRIST_YAW_M = \"RotateArmWristYawM\"\nROTATE_WRIST_ROLL_P = \"RotateArmWristRollP\"\nROTATE_WRIST_ROLL_M = \"RotateArmWristRollM\"\nROTATE_ELBOW_P = \"RotateArmElbowP\"\nROTATE_ELBOW_M = \"RotateArmElbowM\"\nLOOK_UP = \"LookUp\"\nLOOK_DOWN = \"LookDown\"\nPICKUP = \"PickUpMidLevel\"\nDROP = \"DropMidLevel\"\nDONE = \"DoneMidLevel\"\n\n\nENV_ARGS = dict(\n    gridSize=0.25,\n    width=224,\n    height=224,\n    visibilityDistance=1.0,\n    agentMode=\"arm\",\n    fieldOfView=100,\n    agentControllerType=\"mid-level\",\n    server_class=ai2thor.fifo_server.FifoServer,\n    useMassThreshold=True,\n    massThreshold=10,\n    autoSimulation=False,\n    autoSyncTransforms=True,\n)\n\nVALID_OBJECT_LIST = [\n    \"Knife\",\n    \"Bread\",\n    \"Fork\",\n    \"Potato\",\n    \"SoapBottle\",\n    \"Pan\",\n    \"Plate\",\n    \"Tomato\",\n    \"Egg\",\n    \"Pot\",\n    \"Spatula\",\n    \"Cup\",\n    \"Bowl\",\n    \"SaltShaker\",\n    \"PepperShaker\",\n    \"Lettuce\",\n    \"ButterKnife\",\n    \"Apple\",\n    \"DishSponge\",\n    \"Spoon\",\n    \"Mug\",\n]\n"
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/manipulathor_environment.py",
    "content": "\"\"\"A wrapper for engaging with the ManipulaTHOR environment.\"\"\"\n\nimport copy\nimport math\nimport warnings\nfrom typing import Dict, Union, Any, Optional, cast\n\nimport ai2thor.server\nimport numpy as np\nfrom ai2thor.controller import Controller\n\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV\nfrom allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment\nfrom allenact_plugins.manipulathor_plugin.armpointnav_constants import (\n    MOVE_ARM_HEIGHT_CONSTANT,\n    MOVE_ARM_CONSTANT,\n    UNWANTED_MOVE_THR,\n    DISTANCE_MAX,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_constants import (\n    ADDITIONAL_ARM_ARGS,\n    ARM_MIN_HEIGHT,\n    ARM_MAX_HEIGHT,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_constants import (\n    ROTATE_WRIST_PITCH_P,\n    ROTATE_WRIST_PITCH_M,\n    ROTATE_WRIST_YAW_P,\n    ROTATE_WRIST_YAW_M,\n    ROTATE_ELBOW_P,\n    ROTATE_ELBOW_M,\n    LOOK_UP,\n    LOOK_DOWN,\n    MOVE_AHEAD,\n    ROTATE_RIGHT,\n    ROTATE_LEFT,\n    PICKUP,\n    DONE,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_utils import (\n    reset_environment_and_additional_commands,\n)\n\n\ndef position_distance(s1, s2, filter_nan: bool = False):\n    position1 = s1[\"position\"]\n    position2 = s2[\"position\"]\n    dist = (\n        (position1[\"x\"] - position2[\"x\"]) ** 2\n        + (position1[\"y\"] - position2[\"y\"]) ** 2\n        + (position1[\"z\"] - position2[\"z\"]) ** 2\n    ) ** 0.5\n    if filter_nan:\n        dist = DISTANCE_MAX if math.isnan(dist) or dist > DISTANCE_MAX else dist\n    return dist\n\n\ndef rotation_distance(s1: Dict[str, Dict[str, float]], s2: Dict[str, Dict[str, float]]):\n    \"\"\"Distance between rotations.\"\"\"\n    rotation1 = s1[\"rotation\"]\n    rotation2 = s2[\"rotation\"]\n\n    def deg_dist(d0: float, d1: float):\n        dist = (d0 - d1) % 360\n        return min(dist, 360 - dist)\n\n    return sum(deg_dist(rotation1[k], rotation2[k]) for k in [\"x\", \"y\", \"z\"])\n\n\nclass ManipulaTHOREnvironment(IThorEnvironment):\n    \"\"\"Wrapper for the manipulathor controller providing arm functionality and\n    bookkeeping.\n\n    See [here](https://ai2thor.allenai.org/documentation/installation) for comprehensive\n     documentation on AI2-THOR.\n\n    # Attributes\n\n    controller : The ai2thor controller.\n    \"\"\"\n\n    def __init__(\n        self,\n        x_display: Optional[str] = None,\n        docker_enabled: bool = False,\n        local_thor_build: Optional[str] = None,\n        visibility_distance: float = VISIBILITY_DISTANCE,\n        fov: float = FOV,\n        player_screen_width: int = 224,\n        player_screen_height: int = 224,\n        quality: str = \"Very Low\",\n        restrict_to_initially_reachable_points: bool = False,\n        make_agents_visible: bool = True,\n        object_open_speed: float = 1.0,\n        simplify_physics: bool = False,\n        verbose: bool = False,\n        env_args=None,\n    ) -> None:\n        \"\"\"Initializer.\n\n        # Parameters\n\n        x_display : The x display into which to launch ai2thor (possibly necessarily if you are running on a server\n            without an attached display).\n        docker_enabled : Whether or not to run thor in a docker container (useful on a server without an attached\n            display so that you don't have to start an x display).\n        local_thor_build : The path to a local build of ai2thor. This is probably not necessary for your use case\n            and can be safely ignored.\n        visibility_distance : The distance (in meters) at which objects, in the viewport of the agent,\n            are considered visible by ai2thor and will have their \"visible\" flag be set to `True` in the metadata.\n        fov : The agent's camera's field of view.\n        width : The width resolution (in pixels) of the images returned by ai2thor.\n        height : The height resolution (in pixels) of the images returned by ai2thor.\n        quality : The quality at which to render. Possible quality settings can be found in\n            `ai2thor._quality_settings.QUALITY_SETTINGS`.\n        restrict_to_initially_reachable_points : Whether or not to restrict the agent to locations in ai2thor\n            that were found to be (initially) reachable by the agent (i.e. reachable by the agent after resetting\n            the scene). This can be useful if you want to ensure there are only a fixed set of locations where the\n            agent can go.\n        make_agents_visible : Whether or not the agent should be visible. Most noticable when there are multiple agents\n            or when quality settings are high so that the agent casts a shadow.\n        object_open_speed : How quickly objects should be opened. High speeds mean faster simulation but also mean\n            that opening objects have a lot of kinetic energy and can, possibly, knock other objects away.\n        simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object\n            interactions when opening drawers (when simplified, objects within a drawer do not slide around on\n            their own when the drawer is opened or closed, instead they are effectively glued down).\n        \"\"\"\n        self._verbose = verbose\n        self.env_args = env_args\n        del verbose\n        del env_args\n        super(ManipulaTHOREnvironment, self).__init__(\n            **prepare_locals_for_super(locals())\n        )\n\n    def create_controller(self):\n        controller = Controller(**self.env_args)\n\n        return controller\n\n    def start(\n        self,\n        scene_name: Optional[str],\n        move_mag: float = 0.25,\n        **kwargs,\n    ) -> None:\n        \"\"\"Starts the ai2thor controller if it was previously stopped.\n\n        After starting, `reset` will be called with the scene name and move magnitude.\n\n        # Parameters\n\n        scene_name : The scene to load.\n        move_mag : The amount of distance the agent moves in a single `MoveAhead` step.\n        kwargs : additional kwargs, passed to reset.\n        \"\"\"\n        if self._started:\n            raise RuntimeError(\n                \"Trying to start the environment but it is already started.\"\n            )\n\n        self.controller = self.create_controller()\n\n        self._started = True\n        self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs)\n\n    def reset(\n        self,\n        scene_name: Optional[str],\n        move_mag: float = 0.25,\n        **kwargs,\n    ):\n        self._move_mag = move_mag\n        self._grid_size = self._move_mag\n\n        if scene_name is None:\n            scene_name = self.controller.last_event.metadata[\"sceneName\"]\n        # self.reset_init_params()#**kwargs) removing this fixes one of the crashing problem\n\n        # to solve the crash issue\n        # TODO why do we still have this crashing problem?\n        try:\n            reset_environment_and_additional_commands(self.controller, scene_name)\n        except Exception as e:\n            print(\"RESETTING THE SCENE,\", scene_name, \"because of\", str(e))\n            self.controller = ai2thor.controller.Controller(**self.env_args)\n            reset_environment_and_additional_commands(self.controller, scene_name)\n\n        if self.object_open_speed != 1.0:\n            self.controller.step(\n                {\"action\": \"ChangeOpenSpeed\", \"x\": self.object_open_speed}\n            )\n\n        self._initially_reachable_points = None\n        self._initially_reachable_points_set = None\n        self.controller.step({\"action\": \"GetReachablePositions\"})\n        if not self.controller.last_event.metadata[\"lastActionSuccess\"]:\n            warnings.warn(\n                \"Error when getting reachable points: {}\".format(\n                    self.controller.last_event.metadata[\"errorMessage\"]\n                )\n            )\n        self._initially_reachable_points = self.last_action_return\n\n        self.list_of_actions_so_far = []\n\n    def randomize_agent_location(\n        self, seed: int = None, partial_position: Optional[Dict[str, float]] = None\n    ) -> Dict:\n        raise NotImplementedError\n\n    def is_object_at_low_level_hand(self, object_id):\n        current_objects_in_hand = self.controller.last_event.metadata[\"arm\"][\n            \"heldObjects\"\n        ]\n        return object_id in current_objects_in_hand\n\n    def object_in_hand(self):\n        \"\"\"Object metadata for the object in the agent's hand.\"\"\"\n        inv_objs = self.last_event.metadata[\"inventoryObjects\"]\n        if len(inv_objs) == 0:\n            return None\n        elif len(inv_objs) == 1:\n            return self.get_object_by_id(\n                self.last_event.metadata[\"inventoryObjects\"][0][\"objectId\"]\n            )\n        else:\n            raise AttributeError(\"Must be <= 1 inventory objects.\")\n\n    @classmethod\n    def correct_nan_inf(cls, flawed_dict, extra_tag=\"\"):\n        corrected_dict = copy.deepcopy(flawed_dict)\n        for k, v in corrected_dict.items():\n            if math.isnan(v) or math.isinf(v):\n                corrected_dict[k] = 0\n        return corrected_dict\n\n    def get_object_by_id(self, object_id: str) -> Optional[Dict[str, Any]]:\n        for o in self.last_event.metadata[\"objects\"]:\n            if o[\"objectId\"] == object_id:\n                o[\"position\"] = self.correct_nan_inf(o[\"position\"], \"obj id\")\n                return o\n        return None\n\n    def get_current_arm_state(self):\n        h_min = ARM_MIN_HEIGHT\n        h_max = ARM_MAX_HEIGHT\n        agent_base_location = 0.9009995460510254\n        event = self.controller.last_event\n        offset = event.metadata[\"agent\"][\"position\"][\"y\"] - agent_base_location\n        h_max += offset\n        h_min += offset\n        joints = event.metadata[\"arm\"][\"joints\"]\n        arm = joints[-1]\n        assert arm[\"name\"] == \"robot_arm_4_jnt\"\n        xyz_dict = copy.deepcopy(arm[\"rootRelativePosition\"])\n        height_arm = joints[0][\"position\"][\"y\"]\n        xyz_dict[\"h\"] = (height_arm - h_min) / (h_max - h_min)\n        xyz_dict = self.correct_nan_inf(xyz_dict, \"realtive hand\")\n        return xyz_dict\n\n    def get_absolute_hand_state(self):\n        event = self.controller.last_event\n        joints = event.metadata[\"arm\"][\"joints\"]\n        arm = copy.deepcopy(joints[-1])\n        assert arm[\"name\"] == \"robot_arm_4_jnt\"\n        xyz_dict = arm[\"position\"]\n        xyz_dict = self.correct_nan_inf(xyz_dict, \"absolute hand\")\n        return dict(position=xyz_dict, rotation={\"x\": 0, \"y\": 0, \"z\": 0})\n\n    def get_pickupable_objects(self):\n\n        event = self.controller.last_event\n        object_list = event.metadata[\"arm\"][\"pickupableObjects\"]\n\n        return object_list\n\n    def get_current_object_locations(self):\n        obj_loc_dict = {}\n        metadata = self.controller.last_event.metadata[\"objects\"]\n        for o in metadata:\n            obj_loc_dict[o[\"objectId\"]] = dict(\n                position=o[\"position\"],\n                rotation=o[\"rotation\"],\n                visible=o[\"visible\"],\n            )\n        return copy.deepcopy(obj_loc_dict)\n\n    def close_enough(self, current_obj_pose, init_obj_pose, threshold):\n        position_close = [\n            abs(current_obj_pose[\"position\"][k] - init_obj_pose[\"position\"][k])\n            <= threshold\n            for k in [\"x\", \"y\", \"z\"]\n        ]\n        position_is_close = sum(position_close) == 3\n        return position_is_close\n\n    def get_objects_moved(\n        self,\n        previous_object_locations,\n        current_object_locations,\n        target_object_id,\n        thres_dict: Optional[Dict] = None,\n    ):\n        moved_objects = []\n        scene_id = self.scene_name.split(\"_\")[0]\n\n        for object_id in current_object_locations.keys():\n            if object_id == target_object_id:\n                continue\n            if object_id not in previous_object_locations:\n                continue\n\n            threshold = UNWANTED_MOVE_THR\n            if thres_dict is not None:\n                threshold = max(threshold, thres_dict[scene_id + \"-\" + object_id])\n\n            if not self.close_enough(\n                current_object_locations[object_id],\n                previous_object_locations[object_id],\n                threshold=threshold,\n            ):\n                moved_objects.append(object_id)\n\n        return moved_objects\n\n    def get_objects_move_distance(\n        self,\n        initial_object_locations,\n        previous_object_locations,\n        current_object_locations,\n        target_object_id,\n        only_visible: bool = False,\n        thres_dict: Optional[Dict] = None,\n    ):\n        moved_objects_position_distance = {}\n        scene_id = self.scene_name.split(\"_\")[0]\n\n        for object_id in current_object_locations.keys():\n            if object_id == target_object_id:\n                continue\n            if object_id not in previous_object_locations:\n                continue\n            if only_visible:\n                # current is visible\n                if not current_object_locations[object_id][\"visible\"]:\n                    continue\n\n            p_initial2current = position_distance(\n                current_object_locations[object_id],\n                initial_object_locations[object_id],\n                filter_nan=True,\n            )\n            p_initial2previous = position_distance(\n                previous_object_locations[object_id],\n                initial_object_locations[object_id],\n                filter_nan=True,\n            )\n\n            threshold = 0.0\n            if thres_dict is not None:\n                threshold = max(threshold, thres_dict[scene_id + \"-\" + object_id])\n\n            p_initial2current = max(0.0, p_initial2current - threshold)\n            p_initial2previous = max(0.0, p_initial2previous - threshold)\n\n            moved_objects_position_distance[object_id] = (\n                p_initial2current - p_initial2previous\n            )\n\n        return sum(moved_objects_position_distance.values())\n\n    def step(\n        self, action_dict: Dict[str, Union[str, int, float]]\n    ) -> ai2thor.server.Event:\n        \"\"\"Take a step in the ai2thor environment.\"\"\"\n        action = cast(str, action_dict[\"action\"])\n\n        skip_render = \"renderImage\" in action_dict and not action_dict[\"renderImage\"]\n        last_frame: Optional[np.ndarray] = None\n        if skip_render:\n            last_frame = self.current_frame\n\n        if self.simplify_physics:\n            action_dict[\"simplifyPhysics\"] = True\n        if action in [PICKUP, DONE]:\n            if action == PICKUP:\n                object_id = action_dict[\"object_id\"]\n                if not self.is_object_at_low_level_hand(object_id):\n                    pickupable_objects = self.get_pickupable_objects()\n                    #\n                    if object_id in pickupable_objects:\n                        # This version of the task is actually harder # consider making it easier, are we penalizing failed pickup? yes\n                        self.step(dict(action=\"PickupObject\"))\n                        #  we are doing an additional pass here, label is not right and if we fail we will do it twice\n                        object_inventory = self.controller.last_event.metadata[\"arm\"][\n                            \"heldObjects\"\n                        ]\n                        if (\n                            len(object_inventory) > 0\n                            and object_id not in object_inventory\n                        ):\n                            self.step(dict(action=\"ReleaseObject\"))\n            action_dict = {\"action\": \"Pass\"}\n\n        elif action in [MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT]:\n            copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)\n\n            action_dict = {**action_dict, **copy_additions}\n            if action in [MOVE_AHEAD]:\n                action_dict[\"action\"] = \"MoveAgent\"\n                action_dict[\"ahead\"] = 0.2\n\n            elif action in [ROTATE_RIGHT]:\n                action_dict[\"action\"] = \"RotateAgent\"\n                action_dict[\"degrees\"] = 45\n\n            elif action in [ROTATE_LEFT]:\n                action_dict[\"action\"] = \"RotateAgent\"\n                action_dict[\"degrees\"] = -45\n\n        elif \"MoveArm\" in action:\n            copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)\n            action_dict = {**action_dict, **copy_additions}\n            base_position = self.get_current_arm_state()\n            if \"MoveArmHeight\" in action:\n                action_dict[\"action\"] = \"MoveArmBase\"\n\n                if action == \"MoveArmHeightP\":\n                    base_position[\"h\"] += MOVE_ARM_HEIGHT_CONSTANT\n                if action == \"MoveArmHeightM\":\n                    base_position[\n                        \"h\"\n                    ] -= MOVE_ARM_HEIGHT_CONSTANT  # height is pretty big!\n                action_dict[\"y\"] = base_position[\"h\"]\n            else:\n                action_dict[\"action\"] = \"MoveArm\"\n                if action == \"MoveArmXP\":\n                    base_position[\"x\"] += MOVE_ARM_CONSTANT\n                elif action == \"MoveArmXM\":\n                    base_position[\"x\"] -= MOVE_ARM_CONSTANT\n                elif action == \"MoveArmYP\":\n                    base_position[\"y\"] += MOVE_ARM_CONSTANT\n                elif action == \"MoveArmYM\":\n                    base_position[\"y\"] -= MOVE_ARM_CONSTANT\n                elif action == \"MoveArmZP\":\n                    base_position[\"z\"] += MOVE_ARM_CONSTANT\n                elif action == \"MoveArmZM\":\n                    base_position[\"z\"] -= MOVE_ARM_CONSTANT\n                action_dict[\"position\"] = {\n                    k: v for (k, v) in base_position.items() if k in [\"x\", \"y\", \"z\"]\n                }\n\n        elif \"RotateArm\" in action:\n            copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)\n            action_dict = {**action_dict, **copy_additions}\n\n            if action == ROTATE_WRIST_PITCH_P:\n                action_dict[\"action\"] = \"RotateWristRelative\"\n                action_dict[\"pitch\"] = 15\n            elif action == ROTATE_WRIST_PITCH_M:\n                action_dict[\"action\"] = \"RotateWristRelative\"\n                action_dict[\"pitch\"] = -15\n            elif action == ROTATE_WRIST_YAW_P:\n                action_dict[\"action\"] = \"RotateWristRelative\"\n                action_dict[\"yaw\"] = 15\n            elif action == ROTATE_WRIST_YAW_M:\n                action_dict[\"action\"] = \"RotateWristRelative\"\n                action_dict[\"yaw\"] = -15\n            elif action == ROTATE_ELBOW_P:\n                action_dict[\"action\"] = \"RotateElbowRelative\"\n                action_dict[\"degrees\"] = 15\n            elif action == ROTATE_ELBOW_M:\n                action_dict[\"action\"] = \"RotateElbowRelative\"\n                action_dict[\"degrees\"] = -15\n            else:\n                raise ValueError(\"invalid action \" + str(action))\n\n        elif action in [LOOK_UP, LOOK_DOWN]:\n            copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS)\n            action_dict = {**action_dict, **copy_additions}\n            if action == LOOK_UP:\n                action_dict[\"action\"] = LOOK_UP\n            elif action == LOOK_DOWN:\n                action_dict[\"action\"] = LOOK_DOWN\n\n        # there exists other actions e.g. \"PlaceObjectAtPoint\"\n\n        sr = self.controller.step(action_dict)\n        self.list_of_actions_so_far.append(action_dict)\n\n        if self._verbose:\n            print(self.controller.last_event)\n\n        if self.restrict_to_initially_reachable_points:\n            self._snap_agent_to_initially_reachable()\n\n        if skip_render:\n            assert last_frame is not None\n            self.last_event.frame = last_frame\n\n        return sr\n"
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/manipulathor_sensors.py",
    "content": "\"\"\"Utility classes and functions for sensory inputs used by the models.\"\"\"\n\nfrom typing import Any, Union, Optional\n\nimport gym\nimport numpy as np\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.embodiedai.sensors.vision_sensors import DepthSensor, RGBSensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact.utils.misc_utils import prepare_locals_for_super\n\nfrom allenact_plugins.manipulathor_plugin.arm_calculation_utils import (\n    world_coords_to_agent_coords,\n    state_dict_to_tensor,\n    diff_position,\n    coord_system_transform,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_environment import (\n    ManipulaTHOREnvironment,\n)\n\n\nclass DepthSensorThor(\n    DepthSensor[\n        Union[ManipulaTHOREnvironment],\n        Union[Task[ManipulaTHOREnvironment]],\n    ]\n):\n    \"\"\"Sensor for Depth images in THOR.\n\n    Returns from a running ManipulaTHOREnvironment instance, the current\n    RGB frame corresponding to the agent's egocentric view.\n    \"\"\"\n\n    def frame_from_env(\n        self, env: ManipulaTHOREnvironment, task: Optional[Task]\n    ) -> np.ndarray:\n        return env.controller.last_event.depth_frame.copy()\n\n\nclass NoVisionSensorThor(\n    RGBSensor[\n        Union[ManipulaTHOREnvironment],\n        Union[Task[ManipulaTHOREnvironment]],\n    ]\n):\n    \"\"\"Sensor for RGB images in THOR.\n\n    Returns from a running ManipulaTHOREnvironment instance, the current\n    RGB frame corresponding to the agent's egocentric view.\n    \"\"\"\n\n    def frame_from_env(\n        self, env: ManipulaTHOREnvironment, task: Optional[Task]\n    ) -> np.ndarray:\n        return np.zeros_like(env.current_frame)\n\n\nclass AgentRelativeCurrentObjectStateThorSensor(Sensor):\n    def __init__(self, uuid: str = \"relative_current_obj_state\", **kwargs: Any):\n        observation_space = gym.spaces.Box(\n            low=-100, high=100, shape=(6,), dtype=np.float32\n        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(\n        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any\n    ) -> Any:\n        object_id = task.task_info[\"objectId\"]\n        current_object_state = env.get_object_by_id(object_id)\n        relative_current_obj = world_coords_to_agent_coords(\n            current_object_state, env.controller.last_event.metadata[\"agent\"]\n        )\n        result = state_dict_to_tensor(\n            dict(\n                position=relative_current_obj[\"position\"],\n                rotation=relative_current_obj[\"rotation\"],\n            )\n        )\n        return result\n\n\nclass RelativeObjectToGoalSensor(Sensor):\n    def __init__(\n        self,\n        uuid: str = \"relative_obj_to_goal\",\n        coord_system: str = \"xyz_unsigned\",\n        **kwargs: Any\n    ):\n        assert coord_system in [\n            \"xyz_unsigned\",\n            \"xyz_signed\",\n            \"polar_radian\",\n            \"polar_trigo\",\n        ]\n        self.coord_system = coord_system\n        if coord_system == \"polar_trigo\":\n            obs_dim = 5\n        else:\n            obs_dim = 3\n        observation_space = gym.spaces.Box(\n            low=-100, high=100, shape=(obs_dim,), dtype=np.float32\n        )\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(\n        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any\n    ) -> Any:\n        goal_obj_id = task.task_info[\"objectId\"]\n        object_info = env.get_object_by_id(goal_obj_id)\n        target_state = task.task_info[\"target_location\"]\n\n        agent_state = env.controller.last_event.metadata[\"agent\"]\n\n        relative_current_obj = world_coords_to_agent_coords(object_info, agent_state)\n        relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)\n        relative_distance = diff_position(\n            relative_current_obj,\n            relative_goal_state,\n            absolute=False,\n        )\n\n        result = coord_system_transform(relative_distance, self.coord_system)\n        return result\n\n\nclass InitialObjectToGoalSensor(Sensor):\n    def __init__(self, uuid: str = \"initial_obj_to_goal\", **kwargs: Any):\n        # observation_space = gym.spaces.Discrete(len(self.detector_types))\n        observation_space = gym.spaces.Box(\n            low=-100, high=100, shape=(3,), dtype=np.float32\n        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(\n        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any\n    ) -> Any:\n        object_source_location = task.task_info[\"initial_object_location\"]\n        target_state = task.task_info[\"target_location\"]\n        agent_state = task.task_info[\"agent_initial_state\"]\n\n        relative_current_obj = world_coords_to_agent_coords(\n            object_source_location, agent_state\n        )\n        relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)\n        relative_distance = diff_position(relative_current_obj, relative_goal_state)\n        result = state_dict_to_tensor(dict(position=relative_distance))\n        return result\n\n\nclass DistanceObjectToGoalSensor(Sensor):\n    def __init__(self, uuid: str = \"distance_obj_to_goal\", **kwargs: Any):\n        # observation_space = gym.spaces.Discrete(len(self.detector_types))\n        observation_space = gym.spaces.Box(\n            low=-100, high=100, shape=(3,), dtype=np.float32\n        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(\n        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any\n    ) -> Any:\n        goal_obj_id = task.task_info[\"objectId\"]\n        object_info = env.get_object_by_id(goal_obj_id)\n        target_state = task.task_info[\"target_location\"]\n\n        agent_state = env.controller.last_event.metadata[\"agent\"]\n\n        relative_current_obj = world_coords_to_agent_coords(object_info, agent_state)\n        relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)\n        relative_distance = diff_position(relative_current_obj, relative_goal_state)\n        result = state_dict_to_tensor(dict(position=relative_distance))\n\n        result = ((result**2).sum() ** 0.5).view(1)\n        return result\n\n\nclass RelativeAgentArmToObjectSensor(Sensor):\n    def __init__(\n        self,\n        uuid: str = \"relative_agent_arm_to_obj\",\n        coord_system: str = \"xyz_unsigned\",\n        **kwargs: Any\n    ):\n        assert coord_system in [\n            \"xyz_unsigned\",\n            \"xyz_signed\",\n            \"polar_radian\",\n            \"polar_trigo\",\n        ]\n        self.coord_system = coord_system\n        if coord_system == \"polar_trigo\":\n            obs_dim = 5\n        else:\n            obs_dim = 3\n        observation_space = gym.spaces.Box(\n            low=-100, high=100, shape=(obs_dim,), dtype=np.float32\n        )\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(\n        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any\n    ) -> Any:\n        goal_obj_id = task.task_info[\"objectId\"]\n        object_info = env.get_object_by_id(goal_obj_id)\n        hand_state = env.get_absolute_hand_state()\n\n        relative_goal_obj = world_coords_to_agent_coords(\n            object_info, env.controller.last_event.metadata[\"agent\"]\n        )\n        relative_hand_state = world_coords_to_agent_coords(\n            hand_state, env.controller.last_event.metadata[\"agent\"]\n        )\n        relative_distance = diff_position(\n            relative_goal_obj,\n            relative_hand_state,\n            absolute=False,\n        )\n        result = coord_system_transform(relative_distance, self.coord_system)\n        return result\n\n\nclass InitialAgentArmToObjectSensor(Sensor):\n    def __init__(self, uuid: str = \"initial_agent_arm_to_obj\", **kwargs: Any):\n        observation_space = gym.spaces.Box(\n            low=-100, high=100, shape=(3,), dtype=np.float32\n        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(\n        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any\n    ) -> Any:\n\n        object_source_location = task.task_info[\"initial_object_location\"]\n        initial_hand_state = task.task_info[\"initial_hand_state\"]\n\n        relative_goal_obj = world_coords_to_agent_coords(\n            object_source_location, env.controller.last_event.metadata[\"agent\"]\n        )\n        relative_hand_state = world_coords_to_agent_coords(\n            initial_hand_state, env.controller.last_event.metadata[\"agent\"]\n        )\n        relative_distance = diff_position(relative_goal_obj, relative_hand_state)\n        result = state_dict_to_tensor(dict(position=relative_distance))\n\n        return result\n\n\nclass DistanceAgentArmToObjectSensor(Sensor):\n    def __init__(self, uuid: str = \"distance_agent_arm_to_obj\", **kwargs: Any):\n        observation_space = gym.spaces.Box(\n            low=-100, high=100, shape=(3,), dtype=np.float32\n        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(\n        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any\n    ) -> Any:\n        goal_obj_id = task.task_info[\"objectId\"]\n        object_info = env.get_object_by_id(goal_obj_id)\n        hand_state = env.get_absolute_hand_state()\n\n        relative_goal_obj = world_coords_to_agent_coords(\n            object_info, env.controller.last_event.metadata[\"agent\"]\n        )\n        relative_hand_state = world_coords_to_agent_coords(\n            hand_state, env.controller.last_event.metadata[\"agent\"]\n        )\n        relative_distance = diff_position(relative_goal_obj, relative_hand_state)\n        result = state_dict_to_tensor(dict(position=relative_distance))\n\n        result = ((result**2).sum() ** 0.5).view(1)\n        return result\n\n\nclass PickedUpObjSensor(Sensor):\n    def __init__(self, uuid: str = \"pickedup_object\", **kwargs: Any):\n        observation_space = gym.spaces.Box(\n            low=0, high=1, shape=(1,), dtype=np.float32\n        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def get_observation(\n        self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any\n    ) -> Any:\n        return task.object_picked_up\n"
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.py",
    "content": "\"\"\"Task Samplers for the task of ArmPointNav.\"\"\"\n\nimport json\nimport random\nfrom typing import List, Dict, Optional, Any, Union\n\nimport gym\n\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import set_deterministic_cudnn, set_seed\nfrom allenact_plugins.manipulathor_plugin.manipulathor_environment import (\n    ManipulaTHOREnvironment,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_tasks import (\n    AbstractPickUpDropOffTask,\n    ArmPointNavTask,\n    RotateArmPointNavTask,\n    CamRotateArmPointNavTask,\n    EasyArmPointNavTask,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_utils import (\n    transport_wrapper,\n    initialize_arm,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_viz import (\n    ImageVisualizer,\n    LoggerVisualizer,\n)\n\n\nclass AbstractMidLevelArmTaskSampler(TaskSampler):\n\n    _TASK_TYPE = Task\n\n    def __init__(\n        self,\n        scenes: List[str],\n        sensors: List[Sensor],\n        max_steps: int,\n        env_args: Dict[str, Any],\n        action_space: gym.Space,\n        rewards_config: Dict,\n        objects: List[str],\n        scene_period: Optional[Union[int, str]] = None,\n        max_tasks: Optional[int] = None,\n        num_task_per_scene: Optional[int] = None,\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        fixed_tasks: Optional[List[Dict[str, Any]]] = None,\n        visualizers: Optional[List[LoggerVisualizer]] = None,\n        *args,\n        **kwargs\n    ) -> None:\n        self.rewards_config = rewards_config\n        self.env_args = env_args\n        self.scenes = scenes\n        self.grid_size = 0.25\n        self.env: Optional[ManipulaTHOREnvironment] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_space = action_space\n        self.objects = objects\n        self.num_task_per_scene = num_task_per_scene\n\n        self.scene_counter: Optional[int] = None\n        self.scene_order: Optional[List[str]] = None\n        self.scene_id: Optional[int] = None\n        self.scene_period: Optional[Union[str, int]] = (\n            scene_period  # default makes a random choice\n        )\n        self.max_tasks: Optional[int] = None\n        self.reset_tasks = max_tasks\n\n        self._last_sampled_task: Optional[Task] = None\n\n        self.seed: Optional[int] = None\n        self.set_seed(seed)\n\n        if deterministic_cudnn:\n            set_deterministic_cudnn()\n\n        self.reset()\n        self.visualizers = visualizers if visualizers is not None else []\n        self.sampler_mode = kwargs[\"sampler_mode\"]\n        self.cap_training = kwargs[\"cap_training\"]\n\n    def _create_environment(self, **kwargs) -> ManipulaTHOREnvironment:\n        env = ManipulaTHOREnvironment(\n            make_agents_visible=False,\n            object_open_speed=0.05,\n            env_args=self.env_args,\n        )\n\n        return env\n\n    @property\n    def last_sampled_task(self) -> Optional[Task]:\n        return self._last_sampled_task\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"Check if observation spaces equal.\n\n        # Returns\n\n        True if all Tasks that can be sampled by this sampler have the\n            same observation space. Otherwise False.\n        \"\"\"\n        return True\n\n    def reset(self):\n        self.scene_counter = 0\n        self.scene_order = list(range(len(self.scenes)))\n        random.shuffle(self.scene_order)\n        self.scene_id = 0\n        self.sampler_index = 0\n\n        self.max_tasks = self.reset_tasks\n\n    def set_seed(self, seed: int):\n        self.seed = seed\n        if seed is not None:\n            set_seed(seed)\n\n\nclass SimpleArmPointNavGeneralSampler(AbstractMidLevelArmTaskSampler):\n\n    _TASK_TYPE = AbstractPickUpDropOffTask\n\n    def __init__(self, **kwargs) -> None:\n\n        super(SimpleArmPointNavGeneralSampler, self).__init__(**kwargs)\n        self.all_possible_points = []\n        for scene in self.scenes:\n            for object in self.objects:\n                valid_position_adr = \"datasets/apnd-dataset/valid_object_positions/valid_{}_positions_in_{}.json\".format(\n                    object, scene\n                )\n                try:\n                    with open(valid_position_adr) as f:\n                        data_points = json.load(f)\n                except Exception:\n                    print(\"Failed to load\", valid_position_adr)\n                    continue\n                visible_data = [\n                    data for data in data_points[scene] if data[\"visibility\"]\n                ]\n                self.all_possible_points += visible_data\n\n        self.countertop_object_to_data_id = self.calc_possible_trajectories(\n            self.all_possible_points\n        )\n\n        scene_names = set(\n            [\n                self.all_possible_points[counter[0]][\"scene_name\"]\n                for counter in self.countertop_object_to_data_id.values()\n                if len(counter) > 1\n            ]\n        )\n\n        if len(set(scene_names)) < len(self.scenes):\n            print(\"Not all scenes appear\")\n\n        print(\n            \"Len dataset\",\n            len(self.all_possible_points),\n            \"total_remained\",\n            sum([len(v) for v in self.countertop_object_to_data_id.values()]),\n        )\n\n        if (\n            self.sampler_mode != \"train\"\n        ):  # Be aware that this totally overrides some stuff\n            self.deterministic_data_list = []\n            for scene in self.scenes:\n                for object in self.objects:\n                    valid_position_adr = \"datasets/apnd-dataset/deterministic_tasks/tasks_{}_positions_in_{}.json\".format(\n                        object, scene\n                    )\n                    try:\n                        with open(valid_position_adr) as f:\n                            data_points = json.load(f)\n                    except Exception:\n                        print(\"Failed to load\", valid_position_adr)\n                        continue\n                    visible_data = [\n                        dict(scene=scene, index=i, datapoint=data)\n                        for (i, data) in enumerate(data_points[scene])\n                    ]\n                    if self.num_task_per_scene is None:\n                        self.deterministic_data_list += visible_data\n                    else:  # select a small number of data points for fast evaluation\n                        self.deterministic_data_list += visible_data[\n                            : min(self.num_task_per_scene, len(visible_data))\n                        ]\n\n        if self.sampler_mode == \"test\":\n            random.shuffle(self.deterministic_data_list)\n            self.max_tasks = self.reset_tasks = len(self.deterministic_data_list)\n\n    def next_task(\n        self, force_advance_scene: bool = False\n    ) -> Optional[AbstractPickUpDropOffTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            return None\n\n        if self.sampler_mode != \"train\" and self.length <= 0:\n            return None\n\n        source_data_point, target_data_point = self.get_source_target_indices()\n\n        scene = source_data_point[\"scene_name\"]\n\n        assert source_data_point[\"object_id\"] == target_data_point[\"object_id\"]\n        assert source_data_point[\"scene_name\"] == target_data_point[\"scene_name\"]\n\n        if self.env is None:\n            self.env = self._create_environment()\n\n        self.env.reset(\n            scene_name=scene, agentMode=\"arm\", agentControllerType=\"mid-level\"\n        )\n\n        initialize_arm(self.env.controller)\n\n        source_location = source_data_point\n        target_location = dict(\n            position=target_data_point[\"object_location\"],\n            rotation={\"x\": 0, \"y\": 0, \"z\": 0},\n        )\n\n        task_info = {\n            \"objectId\": source_location[\"object_id\"],\n            \"countertop_id\": source_location[\"countertop_id\"],\n            \"source_location\": source_location,\n            \"target_location\": target_location,\n        }\n\n        this_controller = self.env\n\n        transport_wrapper(\n            this_controller,\n            source_location[\"object_id\"],\n            source_location[\"object_location\"],\n        )\n        agent_state = source_location[\"agent_pose\"]\n\n        this_controller.step(\n            dict(\n                action=\"TeleportFull\",\n                standing=True,\n                x=agent_state[\"position\"][\"x\"],\n                y=agent_state[\"position\"][\"y\"],\n                z=agent_state[\"position\"][\"z\"],\n                rotation=dict(\n                    x=agent_state[\"rotation\"][\"x\"],\n                    y=agent_state[\"rotation\"][\"y\"],\n                    z=agent_state[\"rotation\"][\"z\"],\n                ),\n                horizon=agent_state[\"cameraHorizon\"],\n            )\n        )\n\n        should_visualize_goal_start = [\n            x for x in self.visualizers if issubclass(type(x), ImageVisualizer)\n        ]\n        if len(should_visualize_goal_start) > 0:\n            task_info[\"visualization_source\"] = source_data_point\n            task_info[\"visualization_target\"] = target_data_point\n\n        self._last_sampled_task = self._TASK_TYPE(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n            visualizers=self.visualizers,\n            reward_configs=self.rewards_config,\n        )\n\n        return self._last_sampled_task\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        if self.sampler_mode == \"train\":\n            return None\n        else:\n            return min(self.max_tasks, len(self.deterministic_data_list))\n\n    @property\n    def length(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled. Can be float('inf').\n        \"\"\"\n        return (\n            self.total_unique - self.sampler_index\n            if self.sampler_mode != \"train\"\n            else (float(\"inf\") if self.max_tasks is None else self.max_tasks)\n        )\n\n    def get_source_target_indices(self):\n        if self.sampler_mode == \"train\":\n            valid_countertops = [\n                k for (k, v) in self.countertop_object_to_data_id.items() if len(v) > 1\n            ]\n            countertop_id = random.choice(valid_countertops)\n            indices = random.sample(self.countertop_object_to_data_id[countertop_id], 2)\n            result = (\n                self.all_possible_points[indices[0]],\n                self.all_possible_points[indices[1]],\n            )\n        else:\n            result = self.deterministic_data_list[self.sampler_index][\"datapoint\"]\n            self.sampler_index += 1\n\n        return result\n\n    def calc_possible_trajectories(self, all_possible_points):\n\n        object_to_data_id = {}\n\n        for i in range(len(all_possible_points)):\n            object_id = all_possible_points[i][\"object_id\"]\n            object_to_data_id.setdefault(object_id, [])\n            object_to_data_id[object_id].append(i)\n\n        return object_to_data_id\n\n\nclass ArmPointNavTaskSampler(SimpleArmPointNavGeneralSampler):\n    _TASK_TYPE = ArmPointNavTask\n\n    def __init__(self, **kwargs) -> None:\n\n        super(ArmPointNavTaskSampler, self).__init__(**kwargs)\n        possible_initial_locations = (\n            \"datasets/apnd-dataset/valid_agent_initial_locations.json\"\n        )\n        if self.sampler_mode == \"test\":\n            possible_initial_locations = (\n                \"datasets/apnd-dataset/deterministic_valid_agent_initial_locations.json\"\n            )\n        with open(possible_initial_locations) as f:\n            self.possible_agent_reachable_poses = json.load(f)\n\n    def next_task(\n        self, force_advance_scene: bool = False\n    ) -> Optional[AbstractPickUpDropOffTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            return None\n\n        if self.sampler_mode != \"train\" and self.length <= 0:\n            return None\n\n        source_data_point, target_data_point = self.get_source_target_indices()\n\n        scene = source_data_point[\"scene_name\"]\n\n        assert source_data_point[\"object_id\"] == target_data_point[\"object_id\"]\n        assert source_data_point[\"scene_name\"] == target_data_point[\"scene_name\"]\n\n        if self.env is None:\n            self.env = self._create_environment()\n\n        self.env.reset(\n            scene_name=scene, agentMode=\"arm\", agentControllerType=\"mid-level\"\n        )\n\n        initialize_arm(self.env.controller)\n\n        source_location = source_data_point\n        target_location = dict(\n            position=target_data_point[\"object_location\"],\n            rotation={\"x\": 0, \"y\": 0, \"z\": 0},\n            countertop_id=target_data_point[\"countertop_id\"],\n        )\n\n        this_controller = self.env\n\n        transport_wrapper(\n            this_controller,\n            source_location[\"object_id\"],\n            source_location[\"object_location\"],\n        )\n\n        agent_state = source_location[\n            \"initial_agent_pose\"\n        ]  # THe only line different from father\n\n        this_controller.step(\n            dict(\n                action=\"TeleportFull\",\n                standing=True,\n                x=agent_state[\"position\"][\"x\"],\n                y=agent_state[\"position\"][\"y\"],\n                z=agent_state[\"position\"][\"z\"],\n                rotation=dict(\n                    x=agent_state[\"rotation\"][\"x\"],\n                    y=agent_state[\"rotation\"][\"y\"],\n                    z=agent_state[\"rotation\"][\"z\"],\n                ),\n                horizon=agent_state[\"cameraHorizon\"],\n            )\n        )\n\n        should_visualize_goal_start = [\n            x for x in self.visualizers if issubclass(type(x), ImageVisualizer)\n        ]\n\n        initial_object_info = self.env.get_object_by_id(source_location[\"object_id\"])\n        initial_agent_location = self.env.controller.last_event.metadata[\"agent\"]\n        initial_hand_state = self.env.get_absolute_hand_state()\n\n        task_info = {\n            \"objectId\": source_location[\"object_id\"],\n            \"source_location\": source_location,  # used in analysis\n            \"target_location\": target_location,  # used in analysis\n            \"agent_initial_state\": initial_agent_location,  # not used\n            \"initial_object_location\": initial_object_info,  # not used\n            \"initial_hand_state\": initial_hand_state,\n        }\n\n        if len(should_visualize_goal_start) > 0:\n            task_info[\"visualization_source\"] = source_data_point\n            task_info[\"visualization_target\"] = target_data_point\n\n        self._last_sampled_task = self._TASK_TYPE(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n            visualizers=self.visualizers,\n            reward_configs=self.rewards_config,\n        )\n\n        return self._last_sampled_task\n\n    def get_source_target_indices(self):\n        if self.sampler_mode == \"train\":\n            valid_countertops = [\n                k for (k, v) in self.countertop_object_to_data_id.items() if len(v) > 1\n            ]\n            countertop_id = random.choice(valid_countertops)\n            indices = random.sample(self.countertop_object_to_data_id[countertop_id], 2)\n            result = (\n                self.all_possible_points[indices[0]],\n                self.all_possible_points[indices[1]],\n            )\n            scene_name = result[0][\"scene_name\"]\n            selected_agent_init_loc = random.choice(\n                self.possible_agent_reachable_poses[scene_name]\n            )\n            initial_agent_pose = {\n                \"name\": \"agent\",\n                \"position\": {\n                    \"x\": selected_agent_init_loc[\"x\"],\n                    \"y\": selected_agent_init_loc[\"y\"],\n                    \"z\": selected_agent_init_loc[\"z\"],\n                },\n                \"rotation\": {\n                    \"x\": -0.0,\n                    \"y\": selected_agent_init_loc[\"rotation\"],\n                    \"z\": 0.0,\n                },\n                \"cameraHorizon\": selected_agent_init_loc[\"horizon\"],\n                \"isStanding\": True,\n            }\n            result[0][\"initial_agent_pose\"] = initial_agent_pose\n        else:  # agent init location needs to be fixed, therefore we load a fixed valid agent init that is previously randomized\n            result = self.deterministic_data_list[self.sampler_index][\"datapoint\"]\n            scene_name = self.deterministic_data_list[self.sampler_index][\"scene\"]\n            datapoint_original_index = self.deterministic_data_list[self.sampler_index][\n                \"index\"\n            ]\n            selected_agent_init_loc = self.possible_agent_reachable_poses[scene_name][\n                datapoint_original_index\n            ]\n            initial_agent_pose = {\n                \"name\": \"agent\",\n                \"position\": {\n                    \"x\": selected_agent_init_loc[\"x\"],\n                    \"y\": selected_agent_init_loc[\"y\"],\n                    \"z\": selected_agent_init_loc[\"z\"],\n                },\n                \"rotation\": {\n                    \"x\": -0.0,\n                    \"y\": selected_agent_init_loc[\"rotation\"],\n                    \"z\": 0.0,\n                },\n                \"cameraHorizon\": selected_agent_init_loc[\"horizon\"],\n                \"isStanding\": True,\n            }\n            result[0][\"initial_agent_pose\"] = initial_agent_pose\n            self.sampler_index += 1\n\n        return result\n\n\nclass RotateArmPointNavTaskSampler(ArmPointNavTaskSampler):\n    _TASK_TYPE = RotateArmPointNavTask\n\n\nclass CamRotateArmPointNavTaskSampler(ArmPointNavTaskSampler):\n    _TASK_TYPE = CamRotateArmPointNavTask\n\n\nclass EasyArmPointNavTaskSampler(ArmPointNavTaskSampler):\n    _TASK_TYPE = EasyArmPointNavTask\n\n\ndef get_all_tuples_from_list(list):\n    result = []\n    for first_ind in range(len(list) - 1):\n        for second_ind in range(first_ind + 1, len(list)):\n            result.append([list[first_ind], list[second_ind]])\n    return result\n"
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/manipulathor_tasks.py",
    "content": "\"\"\"Task Definions for the task of ArmPointNav.\"\"\"\n\nimport copy\nfrom typing import Dict, Tuple, List, Any, Optional\n\nimport gym\nimport numpy as np\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact_plugins.manipulathor_plugin.armpointnav_constants import (\n    MOVE_ARM_CONSTANT,\n    DISTANCE_EPS,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_constants import (\n    MOVE_ARM_HEIGHT_P,\n    MOVE_ARM_HEIGHT_M,\n    MOVE_ARM_X_P,\n    MOVE_ARM_X_M,\n    MOVE_ARM_Y_P,\n    MOVE_ARM_Y_M,\n    MOVE_ARM_Z_P,\n    MOVE_ARM_Z_M,\n    ROTATE_WRIST_PITCH_P,\n    ROTATE_WRIST_PITCH_M,\n    ROTATE_WRIST_YAW_P,\n    ROTATE_WRIST_YAW_M,\n    ROTATE_ELBOW_P,\n    ROTATE_ELBOW_M,\n    LOOK_UP,\n    LOOK_DOWN,\n    MOVE_AHEAD,\n    ROTATE_RIGHT,\n    ROTATE_LEFT,\n    PICKUP,\n    DONE,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_environment import (\n    ManipulaTHOREnvironment,\n    position_distance,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_viz import LoggerVisualizer\n\n\nclass AbstractPickUpDropOffTask(Task[ManipulaTHOREnvironment]):\n\n    _actions = (\n        MOVE_ARM_HEIGHT_P,\n        MOVE_ARM_HEIGHT_M,\n        MOVE_ARM_X_P,\n        MOVE_ARM_X_M,\n        MOVE_ARM_Y_P,\n        MOVE_ARM_Y_M,\n        MOVE_ARM_Z_P,\n        MOVE_ARM_Z_M,\n        MOVE_AHEAD,\n        ROTATE_RIGHT,\n        ROTATE_LEFT,\n    )\n\n    # New commit of AI2THOR has some issue that the objects will vibrate a bit\n    # without any external force. To eliminate the vibration effect, we have to\n    # introduce _vibration_dist_dict when checking the disturbance, from an external csv file.\n    # By default it is None, i.e. we assume there is no vibration.\n\n    _vibration_dist_dict: Optional[Dict] = None\n\n    def __init__(\n        self,\n        env: ManipulaTHOREnvironment,\n        sensors: List[Sensor],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        visualizers: Optional[List[LoggerVisualizer]] = None,\n        **kwargs\n    ) -> None:\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n        self._took_end_action: bool = False\n        self._success: Optional[bool] = False\n        self._subsampled_locations_from_which_obj_visible: Optional[\n            List[Tuple[float, float, int, int]]\n        ] = None\n        self.visualizers = visualizers if visualizers is not None else []\n        self.start_visualize()\n        self.action_sequence_and_success = []\n        self._took_end_action: bool = False\n        self._success: Optional[bool] = False\n        self._subsampled_locations_from_which_obj_visible: Optional[\n            List[Tuple[float, float, int, int]]\n        ] = None\n\n        # in allenact initialization is with 0.2\n        self.last_obj_to_goal_distance = None\n        self.last_arm_to_obj_distance = None\n        self.object_picked_up = False\n        self.got_reward_for_pickup = False\n        self.reward_configs = kwargs[\"reward_configs\"]\n        self.initial_object_locations = self.env.get_current_object_locations()\n\n    @property\n    def action_space(self):\n        return gym.spaces.Discrete(len(self._actions))\n\n    def reached_terminal_state(self) -> bool:\n        return self._took_end_action\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return cls._actions\n\n    def close(self) -> None:\n        self.env.stop()\n\n    def obj_state_aproximity(self, s1, s2):\n        # KIANA ignore rotation for now\n        position1 = s1[\"position\"]\n        position2 = s2[\"position\"]\n        eps = MOVE_ARM_CONSTANT * 2\n        return (\n            abs(position1[\"x\"] - position2[\"x\"]) < eps\n            and abs(position1[\"y\"] - position2[\"y\"]) < eps\n            and abs(position1[\"z\"] - position2[\"z\"]) < eps\n        )\n\n    def start_visualize(self):\n        for visualizer in self.visualizers:\n            if not visualizer.is_empty():\n                print(\"OH NO VISUALIZER WAS NOT EMPTY\")\n                visualizer.finish_episode(self.env, self, self.task_info)\n                visualizer.finish_episode_metrics(self, self.task_info, None)\n            visualizer.log(self.env)\n\n    def visualize(self, action_str):\n\n        for vizualizer in self.visualizers:\n            vizualizer.log(self.env, action_str)\n\n    def finish_visualizer(self):\n\n        for visualizer in self.visualizers:\n            visualizer.finish_episode(self.env, self, self.task_info)\n\n    def finish_visualizer_metrics(self, metric_results):\n\n        for visualizer in self.visualizers:\n            visualizer.finish_episode_metrics(self, self.task_info, metric_results)\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        assert mode == \"rgb\", \"only rgb rendering is implemented\"\n        return self.env.current_frame\n\n    def calc_action_stat_metrics(self) -> Dict[str, Any]:\n        action_stat = {\"action_stat/\" + action_str: 0.0 for action_str in self._actions}\n        action_success_stat = {\n            \"action_success/\" + action_str: 0.0 for action_str in self._actions\n        }\n        action_success_stat[\"action_success/total\"] = 0.0\n\n        seq_len = len(self.action_sequence_and_success)\n        for action_name, action_success in self.action_sequence_and_success:\n            action_stat[\"action_stat/\" + action_name] += 1.0\n            action_success_stat[\n                \"action_success/{}\".format(action_name)\n            ] += action_success\n            action_success_stat[\"action_success/total\"] += action_success\n\n        action_success_stat[\"action_success/total\"] /= seq_len\n\n        for action_name in self._actions:\n            action_success_stat[\"action_success/{}\".format(action_name)] /= max(\n                action_stat[\"action_stat/\" + action_name], 1.0\n            )\n            action_stat[\"action_stat/\" + action_name] /= seq_len\n\n        result = {**action_stat, **action_success_stat}\n\n        return result\n\n    def metrics(self) -> Dict[str, Any]:\n        result = super(AbstractPickUpDropOffTask, self).metrics()\n\n        if self.is_done():\n            result = {**result, **self.calc_action_stat_metrics()}\n\n            # 1. goal object metrics\n            final_obj_distance_from_goal = self.obj_distance_from_goal()\n            result[\"average/final_obj_distance_from_goal\"] = (\n                final_obj_distance_from_goal\n            )\n            final_arm_distance_from_obj = self.arm_distance_from_obj()\n            result[\"average/final_arm_distance_from_obj\"] = final_arm_distance_from_obj\n\n            final_obj_pickup = 1 if self.object_picked_up else 0\n            result[\"average/final_obj_pickup\"] = final_obj_pickup\n\n            original_distance = self.get_original_object_distance() + DISTANCE_EPS\n            result[\"average/original_distance\"] = original_distance\n\n            # this ratio can be more than 1\n            if self.object_picked_up:\n                ratio_distance_left = final_obj_distance_from_goal / original_distance\n                result[\"average/ratio_distance_left\"] = ratio_distance_left\n                result[\"average/eplen_pickup\"] = self.eplen_pickup\n\n            # 2. disturbance with other objects\n            current_object_locations = self.env.get_current_object_locations()\n            objects_moved = self.env.get_objects_moved(\n                self.initial_object_locations,\n                current_object_locations,\n                self.task_info[\"objectId\"],\n                self._vibration_dist_dict,\n            )\n            result[\"disturbance/objects_moved_num\"] = len(objects_moved)\n\n            # 3. conditioned on success\n            if self._success:\n                result[\"average/eplen_success\"] = result[\"ep_length\"]\n                result[\"average/success_wo_disturb\"] = len(objects_moved) == 0\n\n            else:\n                result[\"average/success_wo_disturb\"] = 0.0\n\n            result[\"success\"] = self._success\n\n            self.finish_visualizer_metrics(result)\n            self.finish_visualizer()\n            self.action_sequence_and_success = []\n\n        return result\n\n    def _step(self, action: int) -> RLStepResult:\n        raise Exception(\"Not implemented\")\n\n    def arm_distance_from_obj(self):\n        goal_obj_id = self.task_info[\"objectId\"]\n        object_info = self.env.get_object_by_id(goal_obj_id)\n        hand_state = self.env.get_absolute_hand_state()\n        return position_distance(object_info, hand_state)\n\n    def obj_distance_from_goal(self):\n        goal_obj_id = self.task_info[\"objectId\"]\n        object_info = self.env.get_object_by_id(goal_obj_id)\n        goal_state = self.task_info[\"target_location\"]\n        return position_distance(object_info, goal_state)\n\n    def get_original_object_distance(self):\n        goal_obj_id = self.task_info[\"objectId\"]\n        s_init = dict(position=self.task_info[\"source_location\"][\"object_location\"])\n        current_location = self.env.get_object_by_id(goal_obj_id)\n\n        original_object_distance = position_distance(s_init, current_location)\n        return original_object_distance\n\n    def judge(self) -> float:\n        \"\"\"Compute the reward after having taken a step.\"\"\"\n        raise Exception(\"Not implemented\")\n\n\nclass ArmPointNavTask(AbstractPickUpDropOffTask):\n    _actions = (\n        MOVE_ARM_HEIGHT_P,\n        MOVE_ARM_HEIGHT_M,\n        MOVE_ARM_X_P,\n        MOVE_ARM_X_M,\n        MOVE_ARM_Y_P,\n        MOVE_ARM_Y_M,\n        MOVE_ARM_Z_P,\n        MOVE_ARM_Z_M,\n        MOVE_AHEAD,\n        ROTATE_RIGHT,\n        ROTATE_LEFT,\n        PICKUP,\n        DONE,\n    )\n\n    def __init__(\n        self,\n        env: ManipulaTHOREnvironment,\n        sensors: List[Sensor],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        visualizers: Optional[List[LoggerVisualizer]] = None,\n        **kwargs\n    ) -> None:\n        super().__init__(\n            env=env,\n            sensors=sensors,\n            task_info=task_info,\n            max_steps=max_steps,\n            visualizers=visualizers,\n            **kwargs\n        )\n        self.cumulated_disturb_distance_all = 0.0\n        self.cumulated_disturb_distance_visible = 0.0\n        # NOTE: visible distance can be negative, no determinitic relation with\n        #   all distance\n        self.previous_object_locations = copy.deepcopy(self.initial_object_locations)\n        self.current_penalized_distance = 0.0  # used in Sensor for auxiliary task\n\n    def metrics(self) -> Dict[str, Any]:\n        result = super(ArmPointNavTask, self).metrics()\n\n        if self.is_done():\n            # add disturbance distance metrics\n            result[\"disturbance/objects_moved_distance\"] = (\n                self.cumulated_disturb_distance_all\n            )\n            result[\"disturbance/objects_moved_distance_vis\"] = (\n                self.cumulated_disturb_distance_visible\n            )\n\n        return result\n\n    def visualize(self, **kwargs):\n\n        for vizualizer in self.visualizers:\n            vizualizer.log(self.env, **kwargs)\n\n    def _step(self, action: int) -> RLStepResult:\n\n        action_str = self.class_action_names()[action]\n\n        self._last_action_str = action_str\n        action_dict = {\"action\": action_str}\n        object_id = self.task_info[\"objectId\"]\n        if action_str == PICKUP:\n            action_dict = {**action_dict, \"object_id\": object_id}\n        self.env.step(action_dict)\n        self.last_action_success = self.env.last_action_success\n\n        last_action_name = self._last_action_str\n        last_action_success = float(self.last_action_success)\n        self.action_sequence_and_success.append((last_action_name, last_action_success))\n\n        # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward\n        if not self.object_picked_up:\n\n            if self.env.is_object_at_low_level_hand(object_id):\n                self.object_picked_up = True\n                self.eplen_pickup = (\n                    self._num_steps_taken + 1\n                )  # plus one because this step has not been counted yet\n\n        if action_str == DONE:\n            self._took_end_action = True\n            object_state = self.env.get_object_by_id(object_id)\n            goal_state = self.task_info[\"target_location\"]\n            goal_achieved = self.object_picked_up and self.obj_state_aproximity(\n                object_state, goal_state\n            )\n            self.last_action_success = goal_achieved\n            self._success = goal_achieved\n\n        step_result = RLStepResult(\n            observation=self.get_observations(),\n            reward=self.judge(),\n            done=self.is_done(),\n            info={\"last_action_success\": self.last_action_success},\n        )\n        return step_result\n\n    def judge(self) -> float:\n        \"\"\"Compute the reward after having taken a step.\"\"\"\n        reward = self.reward_configs[\"step_penalty\"]\n\n        if not self.last_action_success or (\n            self._last_action_str == PICKUP and not self.object_picked_up\n        ):\n            reward += self.reward_configs[\"failed_action_penalty\"]\n\n        if self._took_end_action:\n            reward += (\n                self.reward_configs[\"goal_success_reward\"]\n                if self._success\n                else self.reward_configs[\"failed_stop_reward\"]\n            )\n\n        # increase reward if object pickup and only do it once\n        if not self.got_reward_for_pickup and self.object_picked_up:\n            reward += self.reward_configs[\"pickup_success_reward\"]\n            self.got_reward_for_pickup = True\n\n        current_obj_to_arm_distance = self.arm_distance_from_obj()\n        if self.last_arm_to_obj_distance is None:\n            delta_arm_to_obj_distance_reward = 0\n        else:\n            delta_arm_to_obj_distance_reward = (\n                self.last_arm_to_obj_distance - current_obj_to_arm_distance\n            )\n        self.last_arm_to_obj_distance = current_obj_to_arm_distance\n        reward += delta_arm_to_obj_distance_reward\n\n        current_obj_to_goal_distance = self.obj_distance_from_goal()\n        if self.last_obj_to_goal_distance is None:\n            delta_obj_to_goal_distance_reward = 0\n        else:\n            delta_obj_to_goal_distance_reward = (\n                self.last_obj_to_goal_distance - current_obj_to_goal_distance\n            )\n        self.last_obj_to_goal_distance = current_obj_to_goal_distance\n        reward += delta_obj_to_goal_distance_reward\n\n        # add disturbance cost\n        ## here we measure disturbance by the sum of moving distance of all objects\n        ## note that collided object may move for a while wo external force due to inertia\n        ## and we may also consider mass\n        current_object_locations = self.env.get_current_object_locations()\n\n        disturb_distance_visible = self.env.get_objects_move_distance(\n            initial_object_locations=self.initial_object_locations,\n            previous_object_locations=self.previous_object_locations,\n            current_object_locations=current_object_locations,\n            target_object_id=self.task_info[\"objectId\"],\n            only_visible=True,\n            thres_dict=self._vibration_dist_dict,\n        )\n        disturb_distance_all = self.env.get_objects_move_distance(\n            initial_object_locations=self.initial_object_locations,\n            previous_object_locations=self.previous_object_locations,\n            current_object_locations=current_object_locations,\n            target_object_id=self.task_info[\"objectId\"],\n            only_visible=False,\n            thres_dict=self._vibration_dist_dict,\n        )\n\n        self.cumulated_disturb_distance_all += disturb_distance_all\n        self.cumulated_disturb_distance_visible += disturb_distance_visible\n\n        penalized_distance = (\n            disturb_distance_visible\n            if self.reward_configs[\"disturb_visible\"]\n            else disturb_distance_all\n        )\n        reward += self.reward_configs[\"disturb_penalty\"] * penalized_distance\n        self.current_penalized_distance = penalized_distance\n\n        self.previous_object_locations = current_object_locations\n\n        self.visualize(\n            action_str=self._last_action_str,\n            disturbance_str=str(round(penalized_distance, 4)),\n        )\n\n        return float(reward)\n\n\nclass RotateArmPointNavTask(ArmPointNavTask):\n    _actions = (\n        MOVE_ARM_HEIGHT_P,\n        MOVE_ARM_HEIGHT_M,\n        MOVE_ARM_X_P,\n        MOVE_ARM_X_M,\n        MOVE_ARM_Y_P,\n        MOVE_ARM_Y_M,\n        MOVE_ARM_Z_P,\n        MOVE_ARM_Z_M,\n        ROTATE_WRIST_PITCH_P,\n        ROTATE_WRIST_PITCH_M,\n        ROTATE_WRIST_YAW_P,\n        ROTATE_WRIST_YAW_M,\n        ROTATE_ELBOW_P,\n        ROTATE_ELBOW_M,\n        MOVE_AHEAD,\n        ROTATE_RIGHT,\n        ROTATE_LEFT,\n        PICKUP,\n        DONE,\n    )\n\n\nclass CamRotateArmPointNavTask(ArmPointNavTask):\n    _actions = (\n        MOVE_ARM_HEIGHT_P,\n        MOVE_ARM_HEIGHT_M,\n        MOVE_ARM_X_P,\n        MOVE_ARM_X_M,\n        MOVE_ARM_Y_P,\n        MOVE_ARM_Y_M,\n        MOVE_ARM_Z_P,\n        MOVE_ARM_Z_M,\n        ROTATE_WRIST_PITCH_P,\n        ROTATE_WRIST_PITCH_M,\n        ROTATE_WRIST_YAW_P,\n        ROTATE_WRIST_YAW_M,\n        ROTATE_ELBOW_P,\n        ROTATE_ELBOW_M,\n        LOOK_UP,\n        LOOK_DOWN,\n        MOVE_AHEAD,\n        ROTATE_RIGHT,\n        ROTATE_LEFT,\n        PICKUP,\n        DONE,\n    )\n\n\nclass EasyArmPointNavTask(ArmPointNavTask):\n    _actions = (\n        MOVE_ARM_HEIGHT_P,\n        MOVE_ARM_HEIGHT_M,\n        MOVE_ARM_X_P,\n        MOVE_ARM_X_M,\n        MOVE_ARM_Y_P,\n        MOVE_ARM_Y_M,\n        MOVE_ARM_Z_P,\n        MOVE_ARM_Z_M,\n        MOVE_AHEAD,\n        ROTATE_RIGHT,\n        ROTATE_LEFT,\n        # PICKUP,\n        # DONE,\n    )\n\n    def _step(self, action: int) -> RLStepResult:\n\n        action_str = self.class_action_names()[action]\n\n        self._last_action_str = action_str\n        action_dict = {\"action\": action_str}\n        object_id = self.task_info[\"objectId\"]\n        if action_str == PICKUP:\n            action_dict = {**action_dict, \"object_id\": object_id}\n        self.env.step(action_dict)\n        self.last_action_success = self.env.last_action_success\n\n        last_action_name = self._last_action_str\n        last_action_success = float(self.last_action_success)\n        self.action_sequence_and_success.append((last_action_name, last_action_success))\n        self.visualize(last_action_name)\n\n        # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward\n        if not self.object_picked_up:\n            if (\n                object_id\n                in self.env.controller.last_event.metadata[\"arm\"][\"pickupableObjects\"]\n            ):\n                self.env.step(dict(action=\"PickupObject\"))\n                #  we are doing an additional pass here, label is not right and if we fail we will do it twice\n                object_inventory = self.env.controller.last_event.metadata[\"arm\"][\n                    \"heldObjects\"\n                ]\n                if len(object_inventory) > 0 and object_id not in object_inventory:\n                    self.env.step(dict(action=\"ReleaseObject\"))\n\n            if self.env.is_object_at_low_level_hand(object_id):\n                self.object_picked_up = True\n                self.eplen_pickup = (\n                    self._num_steps_taken + 1\n                )  # plus one because this step has not been counted yet\n\n        if self.object_picked_up:\n\n            object_state = self.env.get_object_by_id(object_id)\n            goal_state = self.task_info[\"target_location\"]\n            goal_achieved = self.object_picked_up and self.obj_state_aproximity(\n                object_state, goal_state\n            )\n            if goal_achieved:\n                self._took_end_action = True\n                self.last_action_success = goal_achieved\n                self._success = goal_achieved\n\n        step_result = RLStepResult(\n            observation=self.get_observations(),\n            reward=self.judge(),\n            done=self.is_done(),\n            info={\"last_action_success\": self.last_action_success},\n        )\n        return step_result\n\n    # def judge(self) -> float: Seems like we are fine on this\n"
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/manipulathor_utils.py",
    "content": "import ai2thor\n\nfrom allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment\nfrom allenact_plugins.manipulathor_plugin.armpointnav_constants import (\n    get_agent_start_positions,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_constants import (\n    ADDITIONAL_ARM_ARGS,\n)\n\n\ndef make_all_objects_unbreakable(controller):\n    all_breakable_objects = [\n        o[\"objectType\"]\n        for o in controller.last_event.metadata[\"objects\"]\n        if o[\"breakable\"] is True\n    ]\n    all_breakable_objects = set(all_breakable_objects)\n    for obj_type in all_breakable_objects:\n        controller.step(action=\"MakeObjectsOfTypeUnbreakable\", objectType=obj_type)\n\n\ndef reset_environment_and_additional_commands(controller, scene_name):\n    controller.reset(scene_name)\n    controller.step(action=\"MakeAllObjectsMoveable\")\n    controller.step(action=\"MakeObjectsStaticKinematicMassThreshold\")\n    make_all_objects_unbreakable(controller)\n    return\n\n\ndef transport_wrapper(controller, target_object, target_location):\n    transport_detail = dict(\n        action=\"PlaceObjectAtPoint\",\n        objectId=target_object,\n        position=target_location,\n        forceKinematic=True,\n    )\n    advance_detail = dict(action=\"AdvancePhysicsStep\", simSeconds=1.0)\n\n    if issubclass(type(controller), IThorEnvironment):\n        event = controller.step(transport_detail)\n        controller.step(advance_detail)\n    elif type(controller) == ai2thor.controller.Controller:\n        event = controller.step(**transport_detail)\n        controller.step(**advance_detail)\n    else:\n        raise NotImplementedError\n    return event\n\n\ndef initialize_arm(controller):\n    # for start arm from high up,\n    scene = controller.last_event.metadata[\"sceneName\"]\n    initial_pose = get_agent_start_positions()[scene]\n    event1 = controller.step(\n        dict(\n            action=\"TeleportFull\",\n            standing=True,\n            x=initial_pose[\"x\"],\n            y=initial_pose[\"y\"],\n            z=initial_pose[\"z\"],\n            rotation=dict(x=0, y=initial_pose[\"rotation\"], z=0),\n            horizon=initial_pose[\"horizon\"],\n        )\n    )\n    event2 = controller.step(\n        dict(action=\"MoveArm\", position=dict(x=0.0, y=0, z=0.35), **ADDITIONAL_ARM_ARGS)\n    )\n    event3 = controller.step(dict(action=\"MoveArmBase\", y=0.8, **ADDITIONAL_ARM_ARGS))\n    return event1, event2, event3\n"
  },
  {
    "path": "allenact_plugins/manipulathor_plugin/manipulathor_viz.py",
    "content": "\"\"\"Utility functions and classes for visualization and logging.\"\"\"\n\nimport os\nfrom datetime import datetime\n\nimport cv2\nimport imageio\nimport matplotlib\nimport matplotlib.cm as cm\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom allenact_plugins.manipulathor_plugin.manipulathor_utils import initialize_arm\nfrom allenact_plugins.manipulathor_plugin.manipulathor_utils import (\n    reset_environment_and_additional_commands,\n    transport_wrapper,\n)\n\n\nclass LoggerVisualizer:\n    def __init__(self, exp_name=\"\", log_dir=\"\"):\n        if log_dir == \"\":\n            log_dir = self.__class__.__name__\n        if exp_name == \"\":\n            exp_name = \"NoNameExp\"\n        self.exp_name = exp_name\n        log_dir = os.path.join(\n            exp_name,\n            log_dir,\n        )\n        self.log_dir = log_dir\n        os.makedirs(self.log_dir, exist_ok=True)\n        self.log_queue = []\n        self.action_queue = []\n        self.logger_index = 0\n\n    def log(self, environment, action_str):\n        raise Exception(\"Not Implemented\")\n\n    def is_empty(self):\n        return len(self.log_queue) == 0\n\n    def finish_episode_metrics(self, episode_info, task_info, metric_results):\n        pass\n\n    def finish_episode(self, environment, episode_info, task_info):\n        pass\n\n\nclass TestMetricLogger(LoggerVisualizer):\n    def __init__(self, exp_name=\"\", log_dir=\"\", **kwargs):\n        super().__init__(exp_name=exp_name, log_dir=log_dir)\n        self.total_metric_dict = {}\n        log_file_name = os.path.join(self.log_dir, \"test_metric.txt\")\n        self.metric_log_file = open(log_file_name, \"w\")\n        self.disturbance_distance_queue = []\n\n    def average_dict(self):\n        result = {}\n        for k, v in self.total_metric_dict.items():\n            result[k] = sum(v) / len(v)\n        return result\n\n    def finish_episode_metrics(self, episode_info, task_info, metric_results=None):\n\n        if metric_results is None:\n            print(\"had to reset\")\n            self.action_queue = []\n            self.disturbance_distance_queue = []\n            return\n\n        for k in metric_results.keys():\n            if \"metric\" in k or k in [\"ep_length\", \"reward\", \"success\"]:\n                self.total_metric_dict.setdefault(k, [])\n                self.total_metric_dict[k].append(metric_results[k])\n        print(\n            \"total\",\n            len(self.total_metric_dict[\"success\"]),\n            \"average test metric\",\n            self.average_dict(),\n        )\n\n        # save the task info and all the action queue and results\n        log_dict = {\n            \"logger_number\": self.logger_index,\n            \"action_sequence\": self.action_queue,\n            \"disturbance_sequence\": self.disturbance_distance_queue,\n            \"task_info_metrics\": metric_results,\n        }\n        self.logger_index += 1\n        self.metric_log_file.write(str(log_dict))\n        self.metric_log_file.write(\"\\n\")\n        self.metric_log_file.flush()\n        print(\"Logging to\", self.metric_log_file.name)\n\n        self.action_queue = []\n        self.disturbance_distance_queue = []\n\n    def log(self, environment, action_str=\"\", disturbance_str=\"\"):\n        # We can add agent arm and state location if needed\n        self.action_queue.append(action_str)\n        self.disturbance_distance_queue.append(disturbance_str)\n\n\nclass BringObjImageVisualizer(LoggerVisualizer):\n    def finish_episode(self, environment, episode_info, task_info):\n        now = datetime.now()\n        time_to_write = now.strftime(\"%Y_%m_%d_%H_%M_%S_%f\")\n        time_to_write += \"log_ind_{}\".format(self.logger_index)\n        self.logger_index += 1\n        print(\"Loggigng\", time_to_write, \"len\", len(self.log_queue))\n\n        source_object_id = task_info[\"source_object_id\"]\n        goal_object_id = task_info[\"goal_object_id\"]\n        pickup_success = episode_info.object_picked_up\n        episode_success = episode_info._success\n\n        # Put back if you want the images\n        # for i, img in enumerate(self.log_queue):\n        #     image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i)))\n        #     cv2.imwrite(image_dir, img[:,:,[2,1,0]])\n\n        episode_success_offset = \"succ\" if episode_success else \"fail\"\n        pickup_success_offset = \"succ\" if pickup_success else \"fail\"\n\n        gif_name = (\n            time_to_write\n            + \"_from_\"\n            + source_object_id.split(\"|\")[0]\n            + \"_to_\"\n            + goal_object_id.split(\"|\")[0]\n            + \"_pickup_\"\n            + pickup_success_offset\n            + \"_episode_\"\n            + episode_success_offset\n            + \".gif\"\n        )\n        concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1)\n        save_image_list_to_gif(concat_all_images, gif_name, self.log_dir)\n        this_controller = environment.controller\n        scene = this_controller.last_event.metadata[\"sceneName\"]\n        reset_environment_and_additional_commands(this_controller, scene)\n        self.log_start_goal(\n            environment,\n            task_info[\"visualization_source\"],\n            tag=\"start\",\n            img_adr=os.path.join(self.log_dir, time_to_write),\n        )\n        self.log_start_goal(\n            environment,\n            task_info[\"visualization_target\"],\n            tag=\"goal\",\n            img_adr=os.path.join(self.log_dir, time_to_write),\n        )\n\n        self.log_queue = []\n        self.action_queue = []\n\n    def log(self, environment, action_str):\n        image_tensor = environment.current_frame\n        self.action_queue.append(action_str)\n        self.log_queue.append(image_tensor)\n\n    def log_start_goal(self, env, task_info, tag, img_adr):\n        object_location = task_info[\"object_location\"]\n        object_id = task_info[\"object_id\"]\n        agent_state = task_info[\"agent_pose\"]\n        this_controller = env.controller\n        # We should not reset here\n        # for start arm from high up as a cheating, this block is very important. never remove\n        event1, event2, event3 = initialize_arm(this_controller)\n        if not (\n            event1.metadata[\"lastActionSuccess\"]\n            and event2.metadata[\"lastActionSuccess\"]\n            and event3.metadata[\"lastActionSuccess\"]\n        ):\n            print(\"ERROR: ARM MOVEMENT FAILED in logging! SHOULD NEVER HAPPEN\")\n\n        event = transport_wrapper(this_controller, object_id, object_location)\n        if not event.metadata[\"lastActionSuccess\"]:\n            print(\"ERROR: oh no could not transport in logging\")\n\n        event = this_controller.step(\n            dict(\n                action=\"TeleportFull\",\n                standing=True,\n                x=agent_state[\"position\"][\"x\"],\n                y=agent_state[\"position\"][\"y\"],\n                z=agent_state[\"position\"][\"z\"],\n                rotation=dict(\n                    x=agent_state[\"rotation\"][\"x\"],\n                    y=agent_state[\"rotation\"][\"y\"],\n                    z=agent_state[\"rotation\"][\"z\"],\n                ),\n                horizon=agent_state[\"cameraHorizon\"],\n            )\n        )\n        if not event.metadata[\"lastActionSuccess\"]:\n            print(\"ERROR: oh no could not teleport in logging\")\n\n        image_tensor = this_controller.last_event.frame\n        image_dir = (\n            img_adr + \"_obj_\" + object_id.split(\"|\")[0] + \"_pickup_\" + tag + \".png\"\n        )\n        cv2.imwrite(image_dir, image_tensor[:, :, [2, 1, 0]])\n\n        # Saving the mask\n        target_object_id = task_info[\"object_id\"]\n        all_visible_masks = this_controller.last_event.instance_masks\n        if target_object_id in all_visible_masks:\n            mask_frame = all_visible_masks[target_object_id]\n        else:\n            mask_frame = np.zeros(env.controller.last_event.frame[:, :, 0].shape)\n        mask_dir = (\n            img_adr + \"_obj_\" + object_id.split(\"|\")[0] + \"_pickup_\" + tag + \"_mask.png\"\n        )\n        cv2.imwrite(mask_dir, mask_frame.astype(float) * 255.0)\n\n\nclass ImageVisualizer(LoggerVisualizer):\n    def __init__(\n        self,\n        exp_name=\"\",\n        log_dir=\"\",\n        add_top_down_view: bool = False,\n        add_depth_map: bool = False,\n    ):\n        super().__init__(exp_name=exp_name, log_dir=log_dir)\n        self.add_top_down_view = add_top_down_view\n        self.add_depth_map = add_depth_map\n        if self.add_top_down_view:\n            self.top_down_queue = []\n        self.disturbance_distance_queue = []\n\n    def finish_episode(self, environment, episode_info, task_info):\n        time_to_write = \"log_ind_{:03d}\".format(self.logger_index)\n        self.logger_index += 1\n        print(\"Logging\", time_to_write, \"len\", len(self.log_queue))\n        object_id = task_info[\"objectId\"]\n        scene_name = task_info[\"source_location\"][\"scene_name\"]\n        source_countertop = task_info[\"source_location\"][\"countertop_id\"]\n        target_countertop = task_info[\"target_location\"][\"countertop_id\"]\n\n        pickup_success = episode_info.object_picked_up\n        episode_success = episode_info._success\n\n        # Put back if you want the images\n        # for i, img in enumerate(self.log_queue):\n        #     image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i)))\n        #     cv2.imwrite(image_dir, img[:,:,[2,1,0]])\n\n        episode_success_offset = \"succ\" if episode_success else \"fail\"\n        pickup_success_offset = \"succ\" if pickup_success else \"fail\"\n        gif_name = (\n            time_to_write\n            + \"_pickup_\"\n            + pickup_success_offset\n            + \"_episode_\"\n            + episode_success_offset\n            + \"_\"\n            + scene_name.split(\"_\")[0]\n            + \"_obj_\"\n            + object_id.split(\"|\")[0]\n            + \"_from_\"\n            + source_countertop.split(\"|\")[0]\n            + \"_to_\"\n            + target_countertop.split(\"|\")[0]\n            + \".gif\"\n        )\n\n        self.log_queue = put_annotation_on_image(\n            self.log_queue, self.disturbance_distance_queue\n        )\n\n        concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1)\n        if self.add_top_down_view:\n            topdown_all_images = np.expand_dims(\n                np.stack(self.top_down_queue, axis=0), axis=1\n            )  # (T, 1, H, W, 3)\n            concat_all_images = np.concatenate(\n                [concat_all_images, topdown_all_images], axis=1\n            )  # (T, 2, H, W, 3)\n\n        save_image_list_to_gif(concat_all_images, gif_name, self.log_dir)\n\n        self.log_start_goal(\n            environment,\n            task_info[\"visualization_source\"],\n            tag=\"start\",\n            img_adr=os.path.join(self.log_dir, time_to_write),\n        )\n        self.log_start_goal(\n            environment,\n            task_info[\"visualization_target\"],\n            tag=\"goal\",\n            img_adr=os.path.join(self.log_dir, time_to_write),\n        )\n\n        self.log_queue = []\n        self.action_queue = []\n        self.disturbance_distance_queue = []\n        if self.add_top_down_view:\n            self.top_down_queue = []\n\n    def log(self, environment, action_str=\"\", disturbance_str=\"\"):\n        self.action_queue.append(action_str)\n        self.disturbance_distance_queue.append(disturbance_str)\n\n        image_tensor = environment.current_frame\n        self.log_queue.append(image_tensor)\n\n        if self.add_top_down_view:\n            # Reference: https://github.com/allenai/ai2thor/pull/814\n            event = environment.controller.step(action=\"GetMapViewCameraProperties\")\n            event = environment.controller.step(\n                action=\"AddThirdPartyCamera\", **event.metadata[\"actionReturn\"]\n            )\n            self.top_down_queue.append(event.third_party_camera_frames[0])\n\n    def log_start_goal(self, env, task_info, tag, img_adr):\n        object_location = task_info[\"object_location\"]\n        object_id = task_info[\"object_id\"]\n        agent_state = task_info[\"agent_pose\"]\n        this_controller = env.controller\n        scene = this_controller.last_event.metadata[\n            \"sceneName\"\n        ]  # maybe we need to reset env actually]\n        reset_environment_and_additional_commands(this_controller, scene)\n        # for start arm from high up as a cheating, this block is very important. never remove\n        event1, event2, event3 = initialize_arm(this_controller)\n        if not (\n            event1.metadata[\"lastActionSuccess\"]\n            and event2.metadata[\"lastActionSuccess\"]\n            and event3.metadata[\"lastActionSuccess\"]\n        ):\n            print(\"ERROR: ARM MOVEMENT FAILED in logging! SHOULD NEVER HAPPEN\")\n\n        event = transport_wrapper(this_controller, object_id, object_location)\n        if not event.metadata[\"lastActionSuccess\"]:\n            print(\"ERROR: oh no could not transport in logging\")\n\n        event = this_controller.step(\n            dict(\n                action=\"TeleportFull\",\n                standing=True,\n                x=agent_state[\"position\"][\"x\"],\n                y=agent_state[\"position\"][\"y\"],\n                z=agent_state[\"position\"][\"z\"],\n                rotation=dict(\n                    x=agent_state[\"rotation\"][\"x\"],\n                    y=agent_state[\"rotation\"][\"y\"],\n                    z=agent_state[\"rotation\"][\"z\"],\n                ),\n                horizon=agent_state[\"cameraHorizon\"],\n            )\n        )\n        if not event.metadata[\"lastActionSuccess\"]:\n            print(\"ERROR: oh no could not teleport in logging\")\n\n        image_tensor = this_controller.last_event.frame\n        image_dir = img_adr + \"_\" + tag + \".png\"\n        cv2.imwrite(image_dir, image_tensor[:, :, [2, 1, 0]])\n\n        if self.add_depth_map:\n            depth = this_controller.last_event.depth_frame.copy()  # (H, W)\n            depth[depth > 5.0] = 5.0\n            norm = matplotlib.colors.Normalize(vmin=depth.min(), vmax=depth.max())\n            rgb = cm.get_cmap(plt.get_cmap(\"viridis\"))(norm(depth))[:, :, :3]  # [0,1]\n            rgb = (rgb * 255).astype(np.uint8)\n\n            depth_dir = img_adr + \"_\" + tag + \"_depth.png\"\n            cv2.imwrite(depth_dir, rgb[:, :, [2, 1, 0]])\n\n\ndef save_image_list_to_gif(image_list, gif_name, gif_dir):\n    gif_adr = os.path.join(gif_dir, gif_name)\n\n    seq_len, cols, w, h, c = image_list.shape\n\n    pallet = np.zeros(\n        (seq_len, w, h * cols, c)\n    )  # to support multiple animations in one gif\n\n    for col_ind in range(cols):\n        pallet[:, :, col_ind * h : (col_ind + 1) * h, :] = image_list[:, col_ind]\n\n    if not os.path.exists(gif_dir):\n        os.makedirs(gif_dir)\n    imageio.mimsave(gif_adr, pallet.astype(np.uint8), format=\"GIF\", duration=1 / 5)\n    print(\"Saved result in \", gif_adr)\n\n\ndef put_annotation_on_image(images, annotations):\n    all_images = []\n    for img, annot in zip(images, annotations):\n        position = (10, 10)\n\n        from PIL import Image, ImageDraw\n\n        pil_img = Image.fromarray(img)\n        draw = ImageDraw.Draw(pil_img)\n        draw.text(position, annot, (0, 0, 0))\n        all_images.append(np.array(pil_img))\n\n    return all_images\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/__init__.py",
    "content": "from allenact.utils.system import ImportChecker\n\nwith ImportChecker(\n    \"\\n\\nPlease install babyai with:\\n\\n\"\n    \"pip install -e git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\\n\",\n):\n    import babyai\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/configs/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/minigrid_plugin/configs/minigrid_nomemory.py",
    "content": "\"\"\"Experiment Config for MiniGrid tutorial.\"\"\"\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConv\nfrom allenact_plugins.minigrid_plugin.minigrid_tasks import MiniGridTask\nfrom projects.tutorials.minigrid_tutorial import MiniGridTutorialExperimentConfig\n\n\nclass MiniGridNoMemoryExperimentConfig(MiniGridTutorialExperimentConfig):\n    @classmethod\n    def tag(cls) -> str:\n        return \"MiniGridNoMemory\"\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return MiniGridSimpleConv(\n            action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())),\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            num_objects=cls.SENSORS[0].num_objects,\n            num_colors=cls.SENSORS[0].num_colors,\n            num_states=cls.SENSORS[0].num_states,\n        )\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/data/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/minigrid_plugin/extra_environment.yml",
    "content": "dependencies:\n  - patsy>=0.5.1\n  - pip\n  - pip:\n      - gym-minigrid>=1.0.1\n      - pickle5\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/extra_requirements.txt",
    "content": "patsy>=0.5.1\ngym-minigrid>=1.0.1\npickle5\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/minigrid_environments.py",
    "content": "import copy\nfrom typing import Optional, Set\n\nimport numpy as np\nfrom gym import register\nfrom gym_minigrid.envs import CrossingEnv\nfrom gym_minigrid.minigrid import Lava, Wall\n\n\nclass FastCrossing(CrossingEnv):\n    \"\"\"Similar to `CrossingEnv`, but to support faster task sampling as per\n    `repeat_failed_task_for_min_steps` flag in MiniGridTaskSampler.\"\"\"\n\n    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None):\n        self.init_agent_pos: Optional[np.ndarray] = None\n        self.init_agent_dir: Optional[int] = None\n        self.step_count: Optional[int] = None\n        super(FastCrossing, self).__init__(\n            size=size,\n            num_crossings=num_crossings,\n            obstacle_type=obstacle_type,\n            seed=seed,\n        )\n\n    def same_seed_reset(self):\n        assert self.init_agent_pos is not None\n\n        # Current position and direction of the agent\n        self.agent_pos = self.init_agent_pos\n        self.agent_dir = self.init_agent_dir\n\n        # Check that the agent doesn't overlap with an object\n        start_cell = self.grid.get(*self.agent_pos)\n        assert start_cell is None or start_cell.can_overlap()\n\n        assert self.carrying is None\n\n        # Step count since episode start\n        self.step_count = 0\n\n        # Return first observation\n        obs = self.gen_obs()\n        return obs\n\n    def reset(self, partial_reset: bool = False):\n        super(FastCrossing, self).reset()\n        self.init_agent_pos = copy.deepcopy(self.agent_pos)\n        self.init_agent_dir = self.agent_dir\n\n\nclass AskForHelpSimpleCrossing(CrossingEnv):\n    \"\"\"Corresponds to WC FAULTY SWITCH environment.\"\"\"\n\n    def __init__(\n        self,\n        size=9,\n        num_crossings=1,\n        obstacle_type=Wall,\n        seed=None,\n        exploration_reward: Optional[float] = None,\n        death_penalty: Optional[float] = None,\n        toggle_is_permenant: bool = False,\n    ):\n        self.init_agent_pos: Optional[np.ndarray] = None\n        self.init_agent_dir: Optional[int] = None\n        self.should_reveal_image: bool = False\n        self.exploration_reward = exploration_reward\n        self.death_penalty = death_penalty\n\n        self.explored_points: Set = set()\n        self._was_successful = False\n        self.toggle_is_permanent = toggle_is_permenant\n\n        self.step_count: Optional[int] = None\n\n        super(AskForHelpSimpleCrossing, self).__init__(\n            size=size,\n            num_crossings=num_crossings,\n            obstacle_type=obstacle_type,\n            seed=seed,\n        )\n\n    @property\n    def was_successful(self) -> bool:\n        return self._was_successful\n\n    def gen_obs(self):\n        obs = super(AskForHelpSimpleCrossing, self).gen_obs()\n        if not self.should_reveal_image:\n            obs[\"image\"] *= 0\n        return obs\n\n    def metrics(self):\n        return {\n            \"explored_count\": len(self.explored_points),\n            \"final_distance\": float(\n                min(\n                    abs(x - (self.width - 2)) + abs(y - (self.height - 2))\n                    for x, y in self.explored_points\n                )\n            ),\n        }\n\n    def step(self, action: int):\n        \"\"\"Reveal the observation only if the `toggle` action is executed.\"\"\"\n        if action == self.actions.toggle:\n            self.should_reveal_image = True\n        else:\n            self.should_reveal_image = (\n                self.should_reveal_image and self.toggle_is_permanent\n            )\n\n        minigrid_obs, reward, done, info = super(AskForHelpSimpleCrossing, self).step(\n            action=action\n        )\n\n        assert not self._was_successful, \"Called step after done.\"\n        self._was_successful = self._was_successful or (reward > 0)\n\n        if (\n            done\n            and self.steps_remaining != 0\n            and (not self._was_successful)\n            and self.death_penalty is not None\n        ):\n            reward += self.death_penalty\n\n        t = tuple(self.agent_pos)\n        if self.exploration_reward is not None:\n            if t not in self.explored_points:\n                reward += self.exploration_reward\n        self.explored_points.add(t)\n\n        return minigrid_obs, reward, done, info\n\n    def same_seed_reset(self):\n        assert self.init_agent_pos is not None\n        self._was_successful = False\n\n        # Current position and direction of the agent\n        self.agent_pos = self.init_agent_pos\n        self.agent_dir = self.init_agent_dir\n\n        self.explored_points.clear()\n        self.explored_points.add(tuple(self.agent_pos))\n        self.should_reveal_image = False\n\n        # Check that the agent doesn't overlap with an object\n        start_cell = self.grid.get(*self.agent_pos)\n        assert start_cell is None or start_cell.can_overlap()\n\n        assert self.carrying is None\n\n        # Step count since episode start\n        self.step_count = 0\n\n        # Return first observation\n        obs = self.gen_obs()\n        return obs\n\n    def reset(self, partial_reset: bool = False):\n        super(AskForHelpSimpleCrossing, self).reset()\n        self.explored_points.clear()\n        self.explored_points.add(tuple(self.agent_pos))\n        self.init_agent_pos = copy.deepcopy(self.agent_pos)\n        self.init_agent_dir = self.agent_dir\n        self._was_successful = False\n        self.should_reveal_image = False\n\n\nclass LavaCrossingS25N10(CrossingEnv):\n    def __init__(self):\n        super(LavaCrossingS25N10, self).__init__(size=25, num_crossings=10)\n\n\nclass LavaCrossingS15N7(CrossingEnv):\n    def __init__(self):\n        super(LavaCrossingS15N7, self).__init__(size=15, num_crossings=7)\n\n\nclass LavaCrossingS11N7(CrossingEnv):\n    def __init__(self):\n        super(LavaCrossingS11N7, self).__init__(size=9, num_crossings=4)\n\n\nregister(\n    id=\"MiniGrid-LavaCrossingS25N10-v0\",\n    entry_point=\"allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS25N10\",\n)\n\nregister(\n    id=\"MiniGrid-LavaCrossingS15N7-v0\",\n    entry_point=\"allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS15N7\",\n)\n\nregister(\n    id=\"MiniGrid-LavaCrossingS11N7-v0\",\n    entry_point=\"allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS11N7\",\n)\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/minigrid_models.py",
    "content": "import abc\nfrom typing import Callable, Dict, Optional, Tuple, cast\n\nimport gym\nimport numpy as np\nimport torch\nfrom gym.spaces.dict import Dict as SpaceDict\nimport torch.nn as nn\n\nfrom allenact.algorithms.onpolicy_sync.policy import (\n    ActorCriticModel,\n    Memory,\n    DistributionType,\n    ActorCriticOutput,\n    ObservationType,\n)\nfrom allenact.base_abstractions.distributions import Distr, CategoricalDistr\nfrom allenact.embodiedai.models.basic_models import LinearActorCritic, RNNActorCritic\nfrom allenact.utils.misc_utils import prepare_locals_for_super\n\n\nclass MiniGridSimpleConvBase(ActorCriticModel[Distr], abc.ABC):\n    actor_critic: ActorCriticModel\n\n    def __init__(\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        num_objects: int,\n        num_colors: int,\n        num_states: int,\n        object_embedding_dim: int = 8,\n        **kwargs,\n    ):\n        super().__init__(action_space=action_space, observation_space=observation_space)\n\n        self.num_objects = num_objects\n        self.object_embedding_dim = object_embedding_dim\n\n        vis_input_shape = observation_space[\"minigrid_ego_image\"].shape\n        agent_view_x, agent_view_y, view_channels = vis_input_shape\n        assert agent_view_x == agent_view_y\n        self.agent_view = agent_view_x\n        self.view_channels = view_channels\n\n        assert (np.array(vis_input_shape[:2]) >= 3).all(), (\n            \"MiniGridSimpleConvRNN requires\" \"that the input size be at least 3x3.\"\n        )\n\n        self.num_channels = 0\n\n        if self.num_objects > 0:\n            # Object embedding\n            self.object_embedding = nn.Embedding(\n                num_embeddings=num_objects, embedding_dim=self.object_embedding_dim\n            )\n            self.object_channel = self.num_channels\n            self.num_channels += 1\n\n        self.num_colors = num_colors\n        if self.num_colors > 0:\n            # Same dimensionality used for colors and states\n            self.color_embedding = nn.Embedding(\n                num_embeddings=num_colors, embedding_dim=self.object_embedding_dim\n            )\n            self.color_channel = self.num_channels\n            self.num_channels += 1\n\n        self.num_states = num_states\n        if self.num_states > 0:\n            self.state_embedding = nn.Embedding(\n                num_embeddings=num_states, embedding_dim=self.object_embedding_dim\n            )\n            self.state_channel = self.num_channels\n            self.num_channels += 1\n\n        assert self.num_channels == self.view_channels > 0\n\n        self.ac_key = \"enc\"\n        self.observations_for_ac: Dict[str, Optional[torch.Tensor]] = {\n            self.ac_key: None\n        }\n\n        self.num_agents = 1\n\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        minigrid_ego_image = cast(torch.Tensor, observations[\"minigrid_ego_image\"])\n        use_agent = minigrid_ego_image.shape == 6\n        nrow, ncol, nchannels = minigrid_ego_image.shape[-3:]\n        nsteps, nsamplers, nagents = masks.shape[:3]\n\n        assert nrow == ncol == self.agent_view\n        assert nchannels == self.view_channels == self.num_channels\n\n        embed_list = []\n        if self.num_objects > 0:\n            ego_object_embeds = self.object_embedding(\n                minigrid_ego_image[..., self.object_channel].long()\n            )\n            embed_list.append(ego_object_embeds)\n        if self.num_colors > 0:\n            ego_color_embeds = self.color_embedding(\n                minigrid_ego_image[..., self.color_channel].long()\n            )\n            embed_list.append(ego_color_embeds)\n        if self.num_states > 0:\n            ego_state_embeds = self.state_embedding(\n                minigrid_ego_image[..., self.state_channel].long()\n            )\n            embed_list.append(ego_state_embeds)\n        ego_embeds = torch.cat(embed_list, dim=-1)\n\n        if use_agent:\n            self.observations_for_ac[self.ac_key] = ego_embeds.view(\n                nsteps, nsamplers, nagents, -1\n            )\n        else:\n            self.observations_for_ac[self.ac_key] = ego_embeds.view(\n                nsteps, nsamplers * nagents, -1\n            )\n\n        # noinspection PyCallingNonCallable\n        out, mem_return = self.actor_critic(\n            observations=self.observations_for_ac,\n            memory=memory,\n            prev_actions=prev_actions,\n            masks=masks,\n        )\n\n        self.observations_for_ac[self.ac_key] = None\n\n        return out, mem_return\n\n\nclass MiniGridSimpleConvRNN(MiniGridSimpleConvBase):\n    def __init__(\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        num_objects: int,\n        num_colors: int,\n        num_states: int,\n        object_embedding_dim: int = 8,\n        hidden_size=512,\n        num_layers=1,\n        rnn_type=\"GRU\",\n        head_type: Callable[\n            ..., ActorCriticModel[CategoricalDistr]\n        ] = LinearActorCritic,\n        **kwargs,\n    ):\n        super().__init__(**prepare_locals_for_super(locals()))\n\n        self._hidden_size = hidden_size\n        agent_view_x, agent_view_y, view_channels = observation_space[\n            \"minigrid_ego_image\"\n        ].shape\n        self.actor_critic = RNNActorCritic(\n            input_uuid=self.ac_key,\n            action_space=action_space,\n            observation_space=SpaceDict(\n                {\n                    self.ac_key: gym.spaces.Box(\n                        low=np.float32(-1.0),\n                        high=np.float32(1.0),\n                        shape=(\n                            self.object_embedding_dim\n                            * agent_view_x\n                            * agent_view_y\n                            * view_channels,\n                        ),\n                    )\n                }\n            ),\n            hidden_size=hidden_size,\n            num_layers=num_layers,\n            rnn_type=rnn_type,\n            head_type=head_type,\n        )\n        self.memory_key = \"rnn\"\n\n        self.train()\n\n    @property\n    def num_recurrent_layers(self):\n        return self.actor_critic.num_recurrent_layers\n\n    @property\n    def recurrent_hidden_state_size(self):\n        return self._hidden_size\n\n    def _recurrent_memory_specification(self):\n        return {\n            self.memory_key: (\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n        }\n\n\nclass MiniGridSimpleConv(MiniGridSimpleConvBase):\n    def __init__(\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        num_objects: int,\n        num_colors: int,\n        num_states: int,\n        object_embedding_dim: int = 8,\n        **kwargs,\n    ):\n        super().__init__(**prepare_locals_for_super(locals()))\n\n        agent_view_x, agent_view_y, view_channels = observation_space[\n            \"minigrid_ego_image\"\n        ].shape\n        self.actor_critic = LinearActorCritic(\n            self.ac_key,\n            action_space=action_space,\n            observation_space=SpaceDict(\n                {\n                    self.ac_key: gym.spaces.Box(\n                        low=np.float32(-1.0),\n                        high=np.float32(1.0),\n                        shape=(\n                            self.object_embedding_dim\n                            * agent_view_x\n                            * agent_view_y\n                            * view_channels,\n                        ),\n                    )\n                }\n            ),\n        )\n        self.memory_key = None\n\n        self.train()\n\n    @property\n    def num_recurrent_layers(self):\n        return 0\n\n    @property\n    def recurrent_hidden_state_size(self):\n        return 0\n\n    # noinspection PyMethodMayBeStatic\n    def _recurrent_memory_specification(self):\n        return None\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/minigrid_offpolicy.py",
    "content": "import math\nimport queue\nimport random\nfrom collections import defaultdict\nfrom typing import Dict, Tuple, Any, cast, List, Union, Optional\n\nimport babyai\nimport blosc\nimport numpy as np\nimport pickle5 as pickle\nimport torch\nfrom gym_minigrid.minigrid import MiniGridEnv\n\nfrom allenact.algorithms.offpolicy_sync.losses.abstract_offpolicy_loss import Memory\nfrom allenact.algorithms.onpolicy_sync.policy import ObservationType\nfrom allenact.algorithms.onpolicy_sync.storage import (\n    ExperienceStorage,\n    StreamingStorageMixin,\n)\nfrom allenact.base_abstractions.misc import GenericAbstractLoss, LossOutput, ModelType\nfrom allenact.utils.misc_utils import partition_limits\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.minigrid_plugin.minigrid_sensors import MiniGridMissionSensor\n\n_DATASET_CACHE: Dict[str, Any] = {}\n\n\nclass MiniGridOffPolicyExpertCELoss(GenericAbstractLoss):\n    def __init__(self, total_episodes_in_epoch: Optional[int] = None):\n        super().__init__()\n        self.total_episodes_in_epoch = total_episodes_in_epoch\n\n    def loss(  # type: ignore\n        self,\n        *,  # No positional arguments\n        model: ModelType,\n        batch: ObservationType,\n        batch_memory: Memory,\n        stream_memory: Memory,\n    ) -> LossOutput:\n        rollout_len, nrollouts = cast(torch.Tensor, batch[\"minigrid_ego_image\"]).shape[\n            :2\n        ]\n\n        # Initialize Memory if empty\n        if len(stream_memory) == 0:\n            spec = model.recurrent_memory_specification\n            for key in spec:\n                dims_template, dtype = spec[key]\n                # get sampler_dim and all_dims from dims_template (and nrollouts)\n\n                dim_names = [d[0] for d in dims_template]\n                sampler_dim = dim_names.index(\"sampler\")\n\n                all_dims = [d[1] for d in dims_template]\n                all_dims[sampler_dim] = nrollouts\n\n                stream_memory.check_append(\n                    key=key,\n                    tensor=torch.zeros(\n                        *all_dims,\n                        dtype=dtype,\n                        device=cast(torch.Tensor, batch[\"minigrid_ego_image\"]).device,\n                    ),\n                    sampler_dim=sampler_dim,\n                )\n\n        # Forward data (through the actor and critic)\n        ac_out, stream_memory = model.forward(\n            observations=batch,\n            memory=stream_memory,\n            prev_actions=None,  # type:ignore\n            masks=cast(torch.FloatTensor, batch[\"masks\"]),\n        )\n\n        # Compute the loss from the actor's output and expert action\n        expert_ce_loss = -ac_out.distributions.log_prob(batch[\"expert_action\"]).mean()\n\n        info = {\"expert_ce\": expert_ce_loss.item()}\n\n        return LossOutput(\n            value=expert_ce_loss,\n            info=info,\n            per_epoch_info={},\n            batch_memory=batch_memory,\n            stream_memory=stream_memory,\n            bsize=rollout_len * nrollouts,\n        )\n\n\ndef transform_demos(demos):\n    # A modified version of babyai.utils.demos.transform_demos\n    # where we use pickle 5 instead of standard pickle\n    new_demos = []\n    for demo in demos:\n        new_demo = []\n\n        mission = demo[0]\n        all_images = demo[1]\n        directions = demo[2]\n        actions = demo[3]\n\n        # First decompress the pickle\n        pickled_array = blosc.blosc_extension.decompress(all_images, False)\n        # ... and unpickle\n        all_images = pickle.loads(pickled_array)\n\n        n_observations = all_images.shape[0]\n        assert (\n            len(directions) == len(actions) == n_observations\n        ), \"error transforming demos\"\n        for i in range(n_observations):\n            obs = {\n                \"image\": all_images[i],\n                \"direction\": directions[i],\n                \"mission\": mission,\n            }\n            action = actions[i]\n            done = i == n_observations - 1\n            new_demo.append((obs, action, done))\n        new_demos.append(new_demo)\n    return new_demos\n\n\nclass MiniGridExpertTrajectoryStorage(ExperienceStorage, StreamingStorageMixin):\n    def __init__(\n        self,\n        data_path: str,\n        num_samplers: int,\n        rollout_len: int,\n        instr_len: Optional[int],\n        restrict_max_steps_in_dataset: Optional[int] = None,\n        device: torch.device = torch.device(\"cpu\"),\n    ):\n        super(MiniGridExpertTrajectoryStorage, self).__init__()\n        self.data_path = data_path\n        self._data: Optional[\n            List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]\n        ] = None\n        self.restrict_max_steps_in_dataset = restrict_max_steps_in_dataset\n\n        self.original_num_samplers = num_samplers\n        self.num_samplers = num_samplers\n\n        self.rollout_len = rollout_len\n        self.instr_len = instr_len\n\n        self.current_worker = 0\n        self.num_workers = 1\n\n        self.minigrid_mission_sensor: Optional[MiniGridMissionSensor] = None\n        if instr_len is not None:\n            self.minigrid_mission_sensor = MiniGridMissionSensor(instr_len)\n\n        self.rollout_queues = []\n        self._remaining_inds = []\n        self.sampler_to_num_steps_in_queue = []\n        self._total_experiences = 0\n\n        self.device = device\n\n    @property\n    def data(self) -> List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]:\n        if self._data is None:\n            if self.data_path not in _DATASET_CACHE:\n                get_logger().info(\n                    f\"Loading minigrid dataset from {self.data_path} for first time...\"\n                )\n                _DATASET_CACHE[self.data_path] = babyai.utils.load_demos(self.data_path)\n                assert (\n                    _DATASET_CACHE[self.data_path] is not None\n                    and len(_DATASET_CACHE[self.data_path]) != 0\n                )\n                get_logger().info(\n                    \"Loading minigrid dataset complete, it contains {} trajectories\".format(\n                        len(_DATASET_CACHE[self.data_path])\n                    )\n                )\n            self._data = _DATASET_CACHE[self.data_path]\n\n            if self.restrict_max_steps_in_dataset is not None:\n                restricted_data = []\n                cur_len = 0\n                for i, d in enumerate(self._data):\n                    if cur_len >= self.restrict_max_steps_in_dataset:\n                        break\n                    restricted_data.append(d)\n                    cur_len += len(d[2])\n                self._data = restricted_data\n\n            parts = partition_limits(len(self._data), self.num_workers)\n            self._data = self._data[\n                parts[self.current_worker] : parts[self.current_worker + 1]\n            ]\n\n            self.rollout_queues = [queue.Queue() for _ in range(self.num_samplers)]\n            self.sampler_to_num_steps_in_queue = [0 for _ in range(self.num_samplers)]\n            for it, q in enumerate(self.rollout_queues):\n                self._fill_rollout_queue(q, it)\n\n        return self._data\n\n    def set_partition(self, index: int, num_parts: int):\n        self.current_worker = index\n        self.num_workers = num_parts\n\n        self.num_samplers = int(math.ceil(self.original_num_samplers / num_parts))\n\n        self._data = None\n\n        for q in self.rollout_queues:\n            try:\n                while True:\n                    q.get_nowait()\n            except queue.Empty:\n                pass\n        self.rollout_queues = []\n\n    def initialize(self, *, observations: ObservationType, **kwargs):\n        self.reset_stream()\n        assert len(self.data) != 0\n\n    def add(\n        self,\n        observations: ObservationType,\n        memory: Optional[Memory],\n        actions: torch.Tensor,\n        action_log_probs: torch.Tensor,\n        value_preds: torch.Tensor,\n        rewards: torch.Tensor,\n        masks: torch.Tensor,\n    ):\n        pass\n\n    def to(self, device: torch.device):\n        self.device = device\n\n    @property\n    def total_experiences(self) -> int:\n        return self._total_experiences\n\n    def reset_stream(self):\n        self.set_partition(index=self.current_worker, num_parts=self.num_workers)\n\n    def empty(self) -> bool:\n        return False\n\n    def _get_next_ind(self):\n        if len(self._remaining_inds) == 0:\n            self._remaining_inds = list(range(len(self.data)))\n            random.shuffle(self._remaining_inds)\n        return self._remaining_inds.pop()\n\n    def _fill_rollout_queue(self, q: queue.Queue, sampler: int):\n        assert q.empty()\n\n        while self.sampler_to_num_steps_in_queue[sampler] < self.rollout_len:\n            next_ind = self._get_next_ind()\n\n            for i, step in enumerate(transform_demos([self.data[next_ind]])[0]):\n                q.put((*step, i == 0))\n                self.sampler_to_num_steps_in_queue[sampler] += 1\n\n        return True\n\n    def get_data_for_rollout_ind(self, sampler_ind: int) -> Dict[str, np.ndarray]:\n        masks: List[bool] = []\n        minigrid_ego_image = []\n        minigrid_mission = []\n        expert_actions = []\n        q = self.rollout_queues[sampler_ind]\n        while len(masks) != self.rollout_len:\n            if q.empty():\n                assert self.sampler_to_num_steps_in_queue[sampler_ind] == 0\n                self._fill_rollout_queue(q, sampler_ind)\n\n            obs, expert_action, _, is_first_obs = cast(\n                Tuple[\n                    Dict[str, Union[np.array, int, str]],\n                    MiniGridEnv.Actions,\n                    bool,\n                    bool,\n                ],\n                q.get_nowait(),\n            )\n            self.sampler_to_num_steps_in_queue[sampler_ind] -= 1\n\n            masks.append(not is_first_obs)\n            minigrid_ego_image.append(obs[\"image\"])\n            if self.minigrid_mission_sensor is not None:\n                # noinspection PyTypeChecker\n                minigrid_mission.append(\n                    self.minigrid_mission_sensor.get_observation(\n                        env=None, task=None, minigrid_output_obs=obs\n                    )\n                )\n            expert_actions.append([expert_action])\n\n        to_return = {\n            \"masks\": torch.tensor(masks, device=self.device, dtype=torch.float32).view(\n                self.rollout_len, 1  # steps x mask\n            ),\n            \"minigrid_ego_image\": torch.stack(\n                [torch.tensor(img, device=self.device) for img in minigrid_ego_image],\n                dim=0,\n            ),  # steps x height x width x channels\n            \"expert_action\": torch.tensor(\n                expert_actions, device=self.device, dtype=torch.int64\n            ).view(\n                self.rollout_len  # steps\n            ),\n        }\n        if self.minigrid_mission_sensor is not None:\n            to_return[\"minigrid_mission\"] = torch.stack(\n                [torch.tensor(m, device=self.device) for m in minigrid_mission], dim=0\n            )  # steps x mission_dims\n        return to_return\n\n    def next_batch(self) -> Dict[str, torch.Tensor]:\n        all_data = defaultdict(lambda: [])\n        for rollout_ind in range(self.num_samplers):\n            data_for_ind = self.get_data_for_rollout_ind(sampler_ind=rollout_ind)\n            for key in data_for_ind:\n                all_data[key].append(data_for_ind[key])\n\n        self._total_experiences += self.num_samplers * self.rollout_len\n        return {\n            key: torch.stack(\n                all_data[key],\n                dim=1,\n            )  # new sampler dim\n            for key in all_data\n        }\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/minigrid_sensors.py",
    "content": "from typing import Optional, Any, cast\n\nimport gym\nimport gym_minigrid.minigrid\nimport numpy as np\nimport torch\nfrom babyai.utils.format import InstructionsPreprocessor\nfrom gym_minigrid.minigrid import MiniGridEnv\n\nfrom allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super\nfrom allenact.base_abstractions.task import Task, SubTaskType\n\n# fmt: off\nALL_VOCAB_TOKENS = [\n    \"a\", \"after\", \"and\", \"ball\", \"behind\", \"blue\", \"box\",\n    \"door\", \"front\", \"go\", \"green\", \"grey\", \"in\", \"key\",\n    \"left\", \"next\", \"of\", \"on\", \"open\", \"pick\", \"purple\",\n    \"put\", \"red\", \"right\", \"the\", \"then\", \"to\", \"up\", \"yellow\",\n    \"you\", \"your\",\n]\n# fmt: on\n\n\nclass EgocentricMiniGridSensor(Sensor[MiniGridEnv, Task[MiniGridEnv]]):\n    def __init__(\n        self,\n        agent_view_size: int,\n        view_channels: int = 1,\n        uuid: str = \"minigrid_ego_image\",\n        **kwargs: Any\n    ):\n        self.agent_view_size = agent_view_size\n        self.view_channels = view_channels\n        self.num_objects = (\n            cast(\n                int, max(map(abs, gym_minigrid.minigrid.OBJECT_TO_IDX.values()))  # type: ignore\n            )\n            + 1\n        )\n        self.num_colors = (\n            cast(int, max(map(abs, gym_minigrid.minigrid.COLOR_TO_IDX.values())))  # type: ignore\n            + 1\n        )\n        self.num_states = (\n            cast(int, max(map(abs, gym_minigrid.minigrid.STATE_TO_IDX.values())))  # type: ignore\n            + 1\n        )\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self) -> gym.Space:\n        return gym.spaces.Box(\n            low=0,\n            high=max(self.num_objects, self.num_colors, self.num_states) - 1,\n            shape=(self.agent_view_size, self.agent_view_size, self.view_channels),\n            dtype=int,\n        )\n\n    def get_observation(\n        self,\n        env: MiniGridEnv,\n        task: Optional[SubTaskType],\n        *args,\n        minigrid_output_obs: Optional[np.ndarray] = None,\n        **kwargs: Any\n    ) -> Any:\n        if minigrid_output_obs is not None and minigrid_output_obs[\"image\"].shape == (\n            self.agent_view_size,\n            self.agent_view_size,\n        ):\n            img = minigrid_output_obs[\"image\"][:, :, : self.view_channels]\n        else:\n            env.agent_view_size = self.agent_view_size\n            img = env.gen_obs()[\"image\"][:, :, : self.view_channels]\n\n        assert img.dtype == np.uint8\n        return img\n\n\nclass MiniGridMissionSensor(Sensor[MiniGridEnv, Task[MiniGridEnv]]):\n    def __init__(self, instr_len: int, uuid: str = \"minigrid_mission\", **kwargs: Any):\n\n        self.instr_preprocessor = InstructionsPreprocessor(\n            model_name=\"TMP_SENSOR\", load_vocab_from=None\n        )\n\n        # We initialize the vocabulary with a fixed collection of tokens\n        # and then ensure that the size cannot exceed this number. This\n        # guarantees that sensors on all processes will produce the same\n        # values.\n        for token in ALL_VOCAB_TOKENS:\n            _ = self.instr_preprocessor.vocab[token]\n        self.instr_preprocessor.vocab.max_size = len(ALL_VOCAB_TOKENS)\n\n        self.instr_len = instr_len\n\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self) -> gym.Space:\n        return gym.spaces.Box(\n            low=0,\n            high=self.instr_preprocessor.vocab.max_size,\n            shape=(self.instr_len,),\n            dtype=int,\n        )\n\n    def get_observation(\n        self,\n        env: MiniGridEnv,\n        task: Optional[SubTaskType],\n        *args,\n        minigrid_output_obs: Optional[np.ndarray] = None,\n        **kwargs: Any\n    ) -> Any:\n        if minigrid_output_obs is None:\n            minigrid_output_obs = env.gen_obs()\n\n        out = self.instr_preprocessor([minigrid_output_obs]).view(-1)\n\n        n: int = out.shape[0]\n        if n > self.instr_len:\n            out = out[: self.instr_len]\n        elif n < self.instr_len:\n            out = torch.nn.functional.pad(\n                input=out,\n                pad=[0, self.instr_len - n],\n                value=0,\n            )\n\n        return out.long().numpy()\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/minigrid_tasks.py",
    "content": "import random\nfrom typing import Tuple, Any, List, Dict, Optional, Union, Callable, Sequence, cast\n\nimport gym\nimport networkx as nx\nimport numpy as np\nfrom gym.utils import seeding\nfrom gym_minigrid.envs import CrossingEnv\nfrom gym_minigrid.minigrid import (\n    DIR_TO_VEC,\n    IDX_TO_OBJECT,\n    MiniGridEnv,\n    OBJECT_TO_IDX,\n)\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import Sensor, SensorSuite\nfrom allenact.base_abstractions.task import Task, TaskSampler\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.minigrid_plugin.minigrid_environments import (\n    AskForHelpSimpleCrossing,\n)\n\n\nclass MiniGridTask(Task[CrossingEnv]):\n    _ACTION_NAMES: Tuple[str, ...] = (\"left\", \"right\", \"forward\")\n    _ACTION_IND_TO_MINIGRID_IND = tuple(\n        MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES\n    )\n    _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}\n    _NEIGHBOR_OFFSETS = tuple(\n        [\n            (-1, 0, 0),\n            (0, -1, 0),\n            (0, 0, -1),\n            (1, 0, 0),\n            (0, 1, 0),\n            (0, 0, 1),\n        ]\n    )\n\n    _XY_DIFF_TO_AGENT_DIR = {\n        tuple(vec): dir_ind for dir_ind, vec in enumerate(DIR_TO_VEC)\n    }\n\n    \"\"\" Task around a MiniGrid Env, allows interfacing allenact with\n    MiniGrid tasks. (currently focussed towards LavaCrossing)\n    \"\"\"\n\n    def __init__(\n        self,\n        env: Union[CrossingEnv],\n        sensors: Union[SensorSuite, List[Sensor]],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        task_cache_uid: Optional[str] = None,\n        corrupt_expert_within_actions_of_goal: Optional[int] = None,\n        **kwargs,\n    ):\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n        self._graph: Optional[nx.DiGraph] = None\n        self._minigrid_done = False\n        self._task_cache_uid = task_cache_uid\n        self.corrupt_expert_within_actions_of_goal = (\n            corrupt_expert_within_actions_of_goal\n        )\n        self.closest_agent_has_been_to_goal: Optional[float] = None\n\n    @property\n    def action_space(self) -> gym.spaces.Discrete:\n        return gym.spaces.Discrete(len(self._ACTION_NAMES))\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        return self.env.render(mode=mode)\n\n    def _step(self, action: int) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        minigrid_obs, reward, self._minigrid_done, info = self.env.step(\n            action=self._ACTION_IND_TO_MINIGRID_IND[action]\n        )\n\n        # self.env.render()\n\n        return RLStepResult(\n            observation=self.get_observations(minigrid_output_obs=minigrid_obs),\n            reward=reward,\n            done=self.is_done(),\n            info=info,\n        )\n\n    def get_observations(\n        self, *args, minigrid_output_obs: Optional[Dict[str, Any]] = None, **kwargs\n    ) -> Any:\n        return self.sensor_suite.get_observations(\n            env=self.env, task=self, minigrid_output_obs=minigrid_output_obs\n        )\n\n    def reached_terminal_state(self) -> bool:\n        return self._minigrid_done\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return cls._ACTION_NAMES\n\n    def close(self) -> None:\n        pass\n\n    def metrics(self) -> Dict[str, Any]:\n        # noinspection PyUnresolvedReferences,PyCallingNonCallable\n        env_metrics = self.env.metrics() if hasattr(self.env, \"metrics\") else {}\n        return {\n            **super(MiniGridTask, self).metrics(),\n            **{k: float(v) for k, v in env_metrics.items()},\n            \"success\": int(\n                self.env.was_successful\n                if hasattr(self.env, \"was_successful\")\n                else self.cumulative_reward > 0\n            ),\n        }\n\n    @property\n    def graph_created(self):\n        return self._graph is not None\n\n    @property\n    def graph(self):\n        if self._graph is None:\n            if self._task_cache_uid is not None:\n                if self._task_cache_uid not in self._CACHED_GRAPHS:\n                    self._CACHED_GRAPHS[self._task_cache_uid] = self.generate_graph()\n                self._graph = self._CACHED_GRAPHS[self._task_cache_uid]\n            else:\n                self._graph = self.generate_graph()\n        return self._graph\n\n    @graph.setter\n    def graph(self, graph: nx.DiGraph):\n        self._graph = graph\n\n    @classmethod\n    def possible_neighbor_offsets(cls) -> Tuple[Tuple[int, int, int], ...]:\n        # Tuples of format:\n        # (X translation, Y translation, rotation by 90 degrees)\n        # A constant is returned, this function can be changed if anything\n        # more complex needs to be done.\n\n        # offsets_superset = itertools.product(\n        #     [-1, 0, 1], [-1, 0, 1], [-1, 0, 1]\n        # )\n        #\n        # valid_offsets = []\n        # for off in offsets_superset:\n        #     if (int(off[0] != 0) + int(off[1] != 0) + int(off[2] != 0)) == 1:\n        #         valid_offsets.append(off)\n        #\n        # return tuple(valid_offsets)\n\n        return cls._NEIGHBOR_OFFSETS\n\n    @classmethod\n    def _add_from_to_edge(\n        cls,\n        g: nx.DiGraph,\n        s: Tuple[int, int, int],\n        t: Tuple[int, int, int],\n    ):\n        \"\"\"Adds nodes and corresponding edges to existing nodes.\n        This approach avoids adding the same edge multiple times.\n        Pre-requisite knowledge about MiniGrid:\n        DIR_TO_VEC = [\n            # Pointing right (positive X)\n            np.array((1, 0)),\n            # Down (positive Y)\n            np.array((0, 1)),\n            # Pointing left (negative X)\n            np.array((-1, 0)),\n            # Up (negative Y)\n            np.array((0, -1)),\n        ]\n        or\n        AGENT_DIR_TO_STR = {\n            0: '>',\n            1: 'V',\n            2: '<',\n            3: '^'\n        }\n        This also implies turning right (clockwise) means:\n            agent_dir += 1\n        \"\"\"\n\n        s_x, s_y, s_rot = s\n        t_x, t_y, t_rot = t\n\n        x_diff = t_x - s_x\n        y_diff = t_y - s_y\n        angle_diff = (t_rot - s_rot) % 4\n\n        # If source and target differ by more than one action, continue\n        if (x_diff != 0) + (y_diff != 0) + (angle_diff != 0) != 1 or angle_diff == 2:\n            return\n\n        action = None\n        if angle_diff == 1:\n            action = \"right\"\n        elif angle_diff == 3:\n            action = \"left\"\n        elif cls._XY_DIFF_TO_AGENT_DIR[(x_diff, y_diff)] == s_rot:\n            # if translation is the same direction as source\n            # orientation, then it's a valid forward action\n            action = \"forward\"\n        else:\n            # This is when the source and target aren't one action\n            # apart, despite having dx=1 or dy=1\n            pass\n\n        if action is not None:\n            g.add_edge(s, t, action=action)\n\n    def _add_node_to_graph(\n        self,\n        graph: nx.DiGraph,\n        s: Tuple[int, int, int],\n        valid_node_types: Tuple[str, ...],\n        attr_dict: Dict[Any, Any] = None,\n        include_rotation_free_leaves: bool = False,\n    ):\n        if s in graph:\n            return\n        if attr_dict is None:\n            get_logger().warning(\"adding a node with neighbor checks and no attributes\")\n        graph.add_node(s, **attr_dict)\n\n        if include_rotation_free_leaves:\n            rot_free_leaf = (*s[:-1], None)\n            if rot_free_leaf not in graph:\n                graph.add_node(rot_free_leaf)\n            graph.add_edge(s, rot_free_leaf, action=\"NA\")\n\n        if attr_dict[\"type\"] in valid_node_types:\n            for o in self.possible_neighbor_offsets():\n                t = (s[0] + o[0], s[1] + o[1], (s[2] + o[2]) % 4)\n                if t in graph and graph.nodes[t][\"type\"] in valid_node_types:\n                    self._add_from_to_edge(graph, s, t)\n                    self._add_from_to_edge(graph, t, s)\n\n    def generate_graph(\n        self,\n    ) -> nx.DiGraph:\n        \"\"\"The generated graph is based on the fully observable grid (as the\n        expert sees it all).\n\n        env: environment to generate the graph over\n        \"\"\"\n\n        image = self.env.grid.encode()\n        width, height, _ = image.shape\n        graph = nx.DiGraph()\n\n        # In fully observable grid, there shouldn't be any \"unseen\"\n        # Currently dealing with \"empty\", \"wall\", \"goal\", \"lava\"\n\n        valid_object_ids = np.sort(\n            [OBJECT_TO_IDX[o] for o in [\"empty\", \"wall\", \"lava\", \"goal\"]]\n        )\n\n        assert np.all(np.union1d(image[:, :, 0], valid_object_ids) == valid_object_ids)\n\n        # Grid to nodes\n        for x in range(width):\n            for y in range(height):\n                for rotation in range(4):\n                    type, color, state = image[x, y]\n                    self._add_node_to_graph(\n                        graph,\n                        (x, y, rotation),\n                        attr_dict={\n                            \"type\": IDX_TO_OBJECT[type],\n                            \"color\": color,\n                            \"state\": state,\n                        },\n                        valid_node_types=(\"empty\", \"goal\"),\n                    )\n                    if IDX_TO_OBJECT[type] == \"goal\":\n                        if not graph.has_node(\"unified_goal\"):\n                            graph.add_node(\"unified_goal\")\n                        graph.add_edge((x, y, rotation), \"unified_goal\")\n\n        return graph\n\n    def query_expert(self, **kwargs) -> Tuple[int, bool]:\n        if self._minigrid_done:\n            get_logger().warning(\"Episode is completed, but expert is still queried.\")\n            return -1, False\n\n        paths = []\n        agent_x, agent_y = self.env.agent_pos\n        agent_rot = self.env.agent_dir\n        source_state_key = (agent_x, agent_y, agent_rot)\n        assert source_state_key in self.graph\n\n        paths.append(nx.shortest_path(self.graph, source_state_key, \"unified_goal\"))\n\n        if len(paths) == 0:\n            return -1, False\n\n        shortest_path_ind = int(np.argmin([len(p) for p in paths]))\n\n        if self.closest_agent_has_been_to_goal is None:\n            self.closest_agent_has_been_to_goal = len(paths[shortest_path_ind]) - 1\n        else:\n            self.closest_agent_has_been_to_goal = min(\n                len(paths[shortest_path_ind]) - 1, self.closest_agent_has_been_to_goal\n            )\n\n        if (\n            self.corrupt_expert_within_actions_of_goal is not None\n            and self.corrupt_expert_within_actions_of_goal\n            >= self.closest_agent_has_been_to_goal\n        ):\n            return (\n                int(self.env.np_random.randint(0, len(self.class_action_names()))),\n                True,\n            )\n\n        if len(paths[shortest_path_ind]) == 2:\n            # Since \"unified_goal\" is 1 step away from actual goals\n            # if a path like [actual_goal, unified_goal] exists, then\n            # you are already at a goal.\n            get_logger().warning(\n                \"Shortest path computations suggest we are at\"\n                \" the target but episode does not think so.\"\n            )\n            return -1, False\n\n        next_key_on_shortest_path = paths[shortest_path_ind][1]\n        return (\n            self.class_action_names().index(\n                self.graph.get_edge_data(source_state_key, next_key_on_shortest_path)[\n                    \"action\"\n                ]\n            ),\n            True,\n        )\n\n\nclass AskForHelpSimpleCrossingTask(MiniGridTask):\n    _ACTION_NAMES = (\"left\", \"right\", \"forward\", \"toggle\")\n    _ACTION_IND_TO_MINIGRID_IND = tuple(\n        MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES\n    )\n    _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}\n\n    def __init__(\n        self,\n        env: AskForHelpSimpleCrossing,\n        sensors: Union[SensorSuite, List[Sensor]],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        **kwargs,\n    ):\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n\n        self.did_toggle: List[bool] = []\n\n    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        self.did_toggle.append(self._ACTION_NAMES[action] == \"toggle\")\n        return super(AskForHelpSimpleCrossingTask, self)._step(action=action)\n\n    def metrics(self) -> Dict[str, Any]:\n        return {\n            **super(AskForHelpSimpleCrossingTask, self).metrics(),\n            \"toggle_percent\": float(\n                sum(self.did_toggle) / max(len(self.did_toggle), 1)\n            ),\n        }\n\n\nclass MiniGridTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        env_class: Callable[..., Union[MiniGridEnv]],\n        sensors: Union[SensorSuite, List[Sensor]],\n        env_info: Optional[Dict[str, Any]] = None,\n        max_tasks: Optional[int] = None,\n        num_unique_seeds: Optional[int] = None,\n        task_seeds_list: Optional[List[int]] = None,\n        deterministic_sampling: bool = False,\n        cache_graphs: Optional[bool] = False,\n        task_class: Callable[..., MiniGridTask] = MiniGridTask,\n        repeat_failed_task_for_min_steps: int = 0,\n        extra_task_kwargs: Optional[Dict] = None,\n        **kwargs,\n    ):\n        super(MiniGridTaskSampler, self).__init__()\n        self.sensors = (\n            SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors\n        )\n        self.max_tasks = max_tasks\n        self.num_unique_seeds = num_unique_seeds\n        self.cache_graphs = cache_graphs\n        self.deterministic_sampling = deterministic_sampling\n        self.repeat_failed_task_for_min_steps = repeat_failed_task_for_min_steps\n        self.extra_task_kwargs = (\n            extra_task_kwargs if extra_task_kwargs is not None else {}\n        )\n\n        self._last_env_seed: Optional[int] = None\n        self._last_task: Optional[MiniGridTask] = None\n        self._number_of_steps_taken_with_task_seed = 0\n\n        assert (not deterministic_sampling) or repeat_failed_task_for_min_steps <= 0, (\n            \"If `deterministic_sampling` is True then we require\"\n            \" `repeat_failed_task_for_min_steps <= 0`\"\n        )\n        assert (not self.cache_graphs) or self.num_unique_seeds is not None, (\n            \"When caching graphs you must specify\"\n            \" a number of unique tasks to sample from.\"\n        )\n        assert (self.num_unique_seeds is None) or (\n            0 < self.num_unique_seeds\n        ), \"`num_unique_seeds` must be a positive integer.\"\n\n        self.num_unique_seeds = num_unique_seeds\n        self.task_seeds_list = task_seeds_list\n        if self.task_seeds_list is not None:\n            if self.num_unique_seeds is not None:\n                assert self.num_unique_seeds == len(\n                    self.task_seeds_list\n                ), \"`num_unique_seeds` must equal the length of `task_seeds_list` if both specified.\"\n            self.num_unique_seeds = len(self.task_seeds_list)\n        elif self.num_unique_seeds is not None:\n            self.task_seeds_list = list(range(self.num_unique_seeds))\n        if num_unique_seeds is not None and repeat_failed_task_for_min_steps > 0:\n            raise NotImplementedError(\n                \"`repeat_failed_task_for_min_steps` must be <=0 if number\"\n                \" of unique seeds is not None.\"\n            )\n\n        assert (\n            not self.cache_graphs\n        ) or self.num_unique_seeds <= 1000, \"Too many tasks (graphs) to cache\"\n        assert (not deterministic_sampling) or (\n            self.num_unique_seeds is not None\n        ), \"Cannot use deterministic sampling when `num_unique_seeds` is `None`.\"\n\n        if (not deterministic_sampling) and self.max_tasks:\n            get_logger().warning(\n                \"`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,\"\n                \" this might be a mistake when running testing.\"\n            )\n\n        self.env = env_class(**env_info)\n        self.task_class = task_class\n\n        self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1))\n\n        self.num_tasks_generated = 0\n\n    @property\n    def length(self) -> Union[int, float]:\n        return (\n            float(\"inf\")\n            if self.max_tasks is None\n            else self.max_tasks - self.num_tasks_generated\n        )\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        return None if self.num_unique_seeds is None else self.num_unique_seeds\n\n    @property\n    def last_sampled_task(self) -> Optional[Task]:\n        raise NotImplementedError\n\n    def next_task(self, force_advance_scene: bool = False) -> Optional[MiniGridTask]:\n        if self.length <= 0:\n            return None\n\n        task_cache_uid = None\n        repeating = False\n        if self.num_unique_seeds is not None:\n            if self.deterministic_sampling:\n                self._last_env_seed = self.task_seeds_list[\n                    self.num_tasks_generated % len(self.task_seeds_list)\n                ]\n            else:\n                self._last_env_seed = self.np_seeded_random_gen.choice(\n                    self.task_seeds_list\n                )\n        else:\n            if self._last_task is not None:\n                self._number_of_steps_taken_with_task_seed += (\n                    self._last_task.num_steps_taken()\n                )\n\n            if (\n                self._last_env_seed is not None\n                and self._number_of_steps_taken_with_task_seed\n                < self.repeat_failed_task_for_min_steps\n                and self._last_task.cumulative_reward == 0\n            ):\n                repeating = True\n            else:\n                self._number_of_steps_taken_with_task_seed = 0\n                self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)\n\n        task_has_same_seed_reset = hasattr(self.env, \"same_seed_reset\")\n\n        if self.cache_graphs:\n            task_cache_uid = str(self._last_env_seed)\n\n        if repeating and task_has_same_seed_reset:\n            # noinspection PyUnresolvedReferences\n            self.env.same_seed_reset()\n        else:\n            self.env.seed(self._last_env_seed)\n            self.env.saved_seed = self._last_env_seed\n            self.env.reset()\n\n        self.num_tasks_generated += 1\n        task = self.task_class(\n            **dict(\n                env=self.env,\n                sensors=self.sensors,\n                task_info={},\n                max_steps=self.env.max_steps,\n                task_cache_uid=task_cache_uid,\n            ),\n            **self.extra_task_kwargs,\n        )\n\n        if repeating and self._last_task.graph_created:\n            task.graph = self._last_task.graph\n\n        self._last_task = task\n        return task\n\n    def close(self) -> None:\n        self.env.close()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        return True\n\n    def reset(self) -> None:\n        self.num_tasks_generated = 0\n        self.env.reset()\n\n    def set_seed(self, seed: int) -> None:\n        self.np_seeded_random_gen, _ = seeding.np_random(seed)\n"
  },
  {
    "path": "allenact_plugins/minigrid_plugin/scripts/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/navigation_plugin/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/navigation_plugin/objectnav/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/navigation_plugin/objectnav/models.py",
    "content": "\"\"\"Baseline models for use in the object navigation task.\n\nObject navigation is currently available as a Task in AI2-THOR and\nFacebook's Habitat.\n\"\"\"\n\nfrom typing import Optional, List, Dict, cast, Tuple, Sequence\n\nimport gym\nimport torch\nimport torch.nn as nn\nfrom gym.spaces import Dict as SpaceDict\n\nfrom allenact.algorithms.onpolicy_sync.policy import ObservationType\nfrom allenact.embodiedai.models import resnet as resnet\nfrom allenact.embodiedai.models.basic_models import SimpleCNN\nfrom allenact.embodiedai.models.visual_nav_models import (\n    VisualNavActorCritic,\n    FusionType,\n)\n\n\nclass CatObservations(nn.Module):\n    def __init__(self, ordered_uuids: Sequence[str], dim: int):\n        super().__init__()\n        assert len(ordered_uuids) != 0\n\n        self.ordered_uuids = ordered_uuids\n        self.dim = dim\n\n    def forward(self, observations: ObservationType):\n        if len(self.ordered_uuids) == 1:\n            return observations[self.ordered_uuids[0]]\n        return torch.cat(\n            [observations[uuid] for uuid in self.ordered_uuids], dim=self.dim\n        )\n\n\nclass ObjectNavActorCritic(VisualNavActorCritic):\n    \"\"\"Baseline recurrent actor critic model for object-navigation.\n\n    # Attributes\n    action_space : The space of actions available to the agent. Currently only discrete\n        actions are allowed (so this space will always be of type `gym.spaces.Discrete`).\n    observation_space : The observation space expected by the agent. This observation space\n        should include (optionally) 'rgb' images and 'depth' images and is required to\n        have a component corresponding to the goal `goal_sensor_uuid`.\n    goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor`\n        as an example of such a sensor.\n    hidden_size : The hidden size of the GRU RNN.\n    object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal\n        object type.\n    \"\"\"\n\n    def __init__(\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        goal_sensor_uuid: str,\n        # RNN\n        hidden_size=512,\n        num_rnn_layers=1,\n        rnn_type=\"GRU\",\n        add_prev_actions=False,\n        add_prev_action_null_token=False,\n        action_embed_size=6,\n        # Aux loss\n        multiple_beliefs=False,\n        beliefs_fusion: Optional[FusionType] = None,\n        auxiliary_uuids: Optional[Sequence[str]] = None,\n        # below are custom params\n        rgb_uuid: Optional[str] = None,\n        depth_uuid: Optional[str] = None,\n        object_type_embedding_dim=8,\n        trainable_masked_hidden_state: bool = False,\n        # perception backbone params,\n        backbone=\"gnresnet18\",\n        resnet_baseplanes=32,\n    ):\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(\n            action_space=action_space,\n            observation_space=observation_space,\n            hidden_size=hidden_size,\n            multiple_beliefs=multiple_beliefs,\n            beliefs_fusion=beliefs_fusion,\n            auxiliary_uuids=auxiliary_uuids,\n        )\n\n        self.rgb_uuid = rgb_uuid\n        self.depth_uuid = depth_uuid\n\n        self.goal_sensor_uuid = goal_sensor_uuid\n        self._n_object_types = self.observation_space.spaces[self.goal_sensor_uuid].n\n        self.object_type_embedding_size = object_type_embedding_dim\n\n        self.backbone = backbone\n        if backbone == \"simple_cnn\":\n            self.visual_encoder = SimpleCNN(\n                observation_space=observation_space,\n                output_size=hidden_size,\n                rgb_uuid=rgb_uuid,\n                depth_uuid=depth_uuid,\n            )\n            self.visual_encoder_output_size = hidden_size\n            assert self.is_blind == self.visual_encoder.is_blind\n        elif backbone == \"gnresnet18\":  # resnet family\n            self.visual_encoder = resnet.GroupNormResNetEncoder(\n                observation_space=observation_space,\n                output_size=hidden_size,\n                rgb_uuid=rgb_uuid,\n                depth_uuid=depth_uuid,\n                baseplanes=resnet_baseplanes,\n                ngroups=resnet_baseplanes // 2,\n                make_backbone=getattr(resnet, backbone),\n            )\n            self.visual_encoder_output_size = hidden_size\n            assert self.is_blind == self.visual_encoder.is_blind\n        elif backbone in [\"identity\", \"projection\"]:\n            good_uuids = [\n                uuid for uuid in [self.rgb_uuid, self.depth_uuid] if uuid is not None\n            ]\n            cat_model = CatObservations(\n                ordered_uuids=good_uuids,\n                dim=-1,\n            )\n            after_cat_size = sum(\n                observation_space[uuid].shape[-1] for uuid in good_uuids\n            )\n            if backbone == \"identity\":\n                self.visual_encoder = cat_model\n                self.visual_encoder_output_size = after_cat_size\n            else:\n                self.visual_encoder = nn.Sequential(\n                    cat_model, nn.Linear(after_cat_size, hidden_size), nn.ReLU(True)\n                )\n                self.visual_encoder_output_size = hidden_size\n\n        else:\n            raise NotImplementedError\n\n        self.create_state_encoders(\n            obs_embed_size=self.goal_visual_encoder_output_dims,\n            num_rnn_layers=num_rnn_layers,\n            rnn_type=rnn_type,\n            add_prev_actions=add_prev_actions,\n            add_prev_action_null_token=add_prev_action_null_token,\n            prev_action_embed_size=action_embed_size,\n            trainable_masked_hidden_state=trainable_masked_hidden_state,\n        )\n\n        self.create_actorcritic_head()\n\n        self.create_aux_models(\n            obs_embed_size=self.goal_visual_encoder_output_dims,\n            action_embed_size=action_embed_size,\n        )\n\n        self.object_type_embedding = nn.Embedding(\n            num_embeddings=self._n_object_types,\n            embedding_dim=object_type_embedding_dim,\n        )\n\n        self.train()\n\n    @property\n    def is_blind(self) -> bool:\n        \"\"\"True if the model is blind (e.g. neither 'depth' or 'rgb' is an\n        input observation type).\"\"\"\n        return self.rgb_uuid is None and self.depth_uuid is None\n\n    @property\n    def goal_visual_encoder_output_dims(self):\n        dims = self.object_type_embedding_size\n        if self.is_blind:\n            return dims\n        return dims + self.visual_encoder_output_size\n\n    def get_object_type_encoding(\n        self, observations: Dict[str, torch.Tensor]\n    ) -> torch.Tensor:\n        \"\"\"Get the object type encoding from input batched observations.\"\"\"\n        # noinspection PyTypeChecker\n        return self.object_type_embedding(  # type:ignore\n            observations[self.goal_sensor_uuid].to(torch.int64)\n        )\n\n    def forward_encoder(self, observations: ObservationType) -> torch.Tensor:\n        target_encoding = self.get_object_type_encoding(\n            cast(Dict[str, torch.Tensor], observations)\n        )\n        obs_embeds = [target_encoding]\n\n        if not self.is_blind:\n            perception_embed = self.visual_encoder(observations)\n            obs_embeds = [perception_embed] + obs_embeds\n\n        obs_embeds = torch.cat(obs_embeds, dim=-1)\n        return obs_embeds\n\n\nclass ResnetTensorNavActorCritic(VisualNavActorCritic):\n    def __init__(\n        # base params\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        goal_sensor_uuid: str,\n        hidden_size=512,\n        num_rnn_layers=1,\n        rnn_type=\"GRU\",\n        add_prev_actions=False,\n        add_prev_action_null_token=False,\n        action_embed_size=6,\n        multiple_beliefs=False,\n        beliefs_fusion: Optional[FusionType] = None,\n        auxiliary_uuids: Optional[List[str]] = None,\n        # custom params\n        rgb_resnet_preprocessor_uuid: Optional[str] = None,\n        depth_resnet_preprocessor_uuid: Optional[str] = None,\n        goal_dims: int = 32,\n        resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),\n        combiner_hidden_out_dims: Tuple[int, int] = (128, 32),\n        **kwargs,\n    ):\n        super().__init__(\n            action_space=action_space,\n            observation_space=observation_space,\n            hidden_size=hidden_size,\n            multiple_beliefs=multiple_beliefs,\n            beliefs_fusion=beliefs_fusion,\n            auxiliary_uuids=auxiliary_uuids,\n            **kwargs,\n        )\n\n        if (\n            rgb_resnet_preprocessor_uuid is None\n            or depth_resnet_preprocessor_uuid is None\n        ):\n            resnet_preprocessor_uuid = (\n                rgb_resnet_preprocessor_uuid\n                if rgb_resnet_preprocessor_uuid is not None\n                else depth_resnet_preprocessor_uuid\n            )\n            self.goal_visual_encoder = ResnetTensorGoalEncoder(\n                self.observation_space,\n                goal_sensor_uuid,\n                resnet_preprocessor_uuid,\n                goal_dims,\n                resnet_compressor_hidden_out_dims,\n                combiner_hidden_out_dims,\n            )\n        else:\n            self.goal_visual_encoder = ResnetDualTensorGoalEncoder(  # type:ignore\n                self.observation_space,\n                goal_sensor_uuid,\n                rgb_resnet_preprocessor_uuid,\n                depth_resnet_preprocessor_uuid,\n                goal_dims,\n                resnet_compressor_hidden_out_dims,\n                combiner_hidden_out_dims,\n            )\n\n        self.create_state_encoders(\n            obs_embed_size=self.goal_visual_encoder.output_dims,\n            num_rnn_layers=num_rnn_layers,\n            rnn_type=rnn_type,\n            add_prev_actions=add_prev_actions,\n            add_prev_action_null_token=add_prev_action_null_token,\n            prev_action_embed_size=action_embed_size,\n        )\n\n        self.create_actorcritic_head()\n\n        self.create_aux_models(\n            obs_embed_size=self.goal_visual_encoder.output_dims,\n            action_embed_size=action_embed_size,\n        )\n\n        self.train()\n\n    @property\n    def is_blind(self) -> bool:\n        \"\"\"True if the model is blind (e.g. neither 'depth' or 'rgb' is an\n        input observation type).\"\"\"\n        return self.goal_visual_encoder.is_blind\n\n    def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:\n        return self.goal_visual_encoder(observations)\n\n\nclass ResnetTensorGoalEncoder(nn.Module):\n    def __init__(\n        self,\n        observation_spaces: SpaceDict,\n        goal_sensor_uuid: str,\n        resnet_preprocessor_uuid: str,\n        goal_embed_dims: int = 32,\n        resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),\n        combiner_hidden_out_dims: Tuple[int, int] = (128, 32),\n    ) -> None:\n        super().__init__()\n        self.goal_uuid = goal_sensor_uuid\n        self.resnet_uuid = resnet_preprocessor_uuid\n        self.goal_embed_dims = goal_embed_dims\n        self.resnet_hid_out_dims = resnet_compressor_hidden_out_dims\n        self.combine_hid_out_dims = combiner_hidden_out_dims\n\n        self.goal_space = observation_spaces.spaces[self.goal_uuid]\n        if isinstance(self.goal_space, gym.spaces.Discrete):\n            self.embed_goal = nn.Embedding(\n                num_embeddings=self.goal_space.n,\n                embedding_dim=self.goal_embed_dims,\n            )\n        elif isinstance(self.goal_space, gym.spaces.Box):\n            self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims)\n        else:\n            raise NotImplementedError\n\n        self.blind = self.resnet_uuid not in observation_spaces.spaces\n        if not self.blind:\n            self.resnet_tensor_shape = observation_spaces.spaces[self.resnet_uuid].shape\n            self.resnet_compressor = nn.Sequential(\n                nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1),\n                nn.ReLU(),\n                nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1),\n                nn.ReLU(),\n            )\n            self.target_obs_combiner = nn.Sequential(\n                nn.Conv2d(\n                    self.resnet_hid_out_dims[1] + self.goal_embed_dims,\n                    self.combine_hid_out_dims[0],\n                    1,\n                ),\n                nn.ReLU(),\n                nn.Conv2d(*self.combine_hid_out_dims[0:2], 1),\n            )\n\n    @property\n    def is_blind(self):\n        return self.blind\n\n    @property\n    def output_dims(self):\n        if self.blind:\n            return self.goal_embed_dims\n        else:\n            return (\n                self.combine_hid_out_dims[-1]\n                * self.resnet_tensor_shape[1]\n                * self.resnet_tensor_shape[2]\n            )\n\n    def get_object_type_encoding(\n        self, observations: Dict[str, torch.FloatTensor]\n    ) -> torch.FloatTensor:\n        \"\"\"Get the object type encoding from input batched observations.\"\"\"\n        return cast(\n            torch.FloatTensor,\n            self.embed_goal(observations[self.goal_uuid].to(torch.int64)),\n        )\n\n    def compress_resnet(self, observations):\n        return self.resnet_compressor(observations[self.resnet_uuid])\n\n    def distribute_target(self, observations):\n        target_emb = self.embed_goal(observations[self.goal_uuid])\n        return target_emb.view(-1, self.goal_embed_dims, 1, 1).expand(\n            -1, -1, self.resnet_tensor_shape[-2], self.resnet_tensor_shape[-1]\n        )\n\n    def adapt_input(self, observations):\n        observations = {**observations}\n        resnet = observations[self.resnet_uuid]\n        goal = observations[self.goal_uuid]\n\n        use_agent = False\n        nagent = 1\n\n        if len(resnet.shape) == 6:\n            use_agent = True\n            nstep, nsampler, nagent = resnet.shape[:3]\n        else:\n            nstep, nsampler = resnet.shape[:2]\n\n        observations[self.resnet_uuid] = resnet.view(-1, *resnet.shape[-3:])\n        observations[self.goal_uuid] = goal.view(-1, goal.shape[-1])\n\n        return observations, use_agent, nstep, nsampler, nagent\n\n    @staticmethod\n    def adapt_output(x, use_agent, nstep, nsampler, nagent):\n        if use_agent:\n            return x.view(nstep, nsampler, nagent, -1)\n        return x.view(nstep, nsampler * nagent, -1)\n\n    def forward(self, observations):\n        observations, use_agent, nstep, nsampler, nagent = self.adapt_input(\n            observations\n        )\n\n        if self.blind:\n            return self.embed_goal(observations[self.goal_uuid])\n        embs = [\n            self.compress_resnet(observations),\n            self.distribute_target(observations),\n        ]\n        x = self.target_obs_combiner(\n            torch.cat(\n                embs,\n                dim=1,\n            )\n        )\n        x = x.reshape(x.size(0), -1)  # flatten\n\n        return self.adapt_output(x, use_agent, nstep, nsampler, nagent)\n\n\nclass ResnetDualTensorGoalEncoder(nn.Module):\n    def __init__(\n        self,\n        observation_spaces: SpaceDict,\n        goal_sensor_uuid: str,\n        rgb_resnet_preprocessor_uuid: str,\n        depth_resnet_preprocessor_uuid: str,\n        goal_embed_dims: int = 32,\n        resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),\n        combiner_hidden_out_dims: Tuple[int, int] = (128, 32),\n    ) -> None:\n        super().__init__()\n        self.goal_uuid = goal_sensor_uuid\n        self.rgb_resnet_uuid = rgb_resnet_preprocessor_uuid\n        self.depth_resnet_uuid = depth_resnet_preprocessor_uuid\n        self.goal_embed_dims = goal_embed_dims\n        self.resnet_hid_out_dims = resnet_compressor_hidden_out_dims\n        self.combine_hid_out_dims = combiner_hidden_out_dims\n\n        self.goal_space = observation_spaces.spaces[self.goal_uuid]\n        if isinstance(self.goal_space, gym.spaces.Discrete):\n            self.embed_goal = nn.Embedding(\n                num_embeddings=self.goal_space.n,\n                embedding_dim=self.goal_embed_dims,\n            )\n        elif isinstance(self.goal_space, gym.spaces.Box):\n            self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims)\n        else:\n            raise NotImplementedError\n\n        self.blind = (\n            self.rgb_resnet_uuid not in observation_spaces.spaces\n            or self.depth_resnet_uuid not in observation_spaces.spaces\n        )\n        if not self.blind:\n            self.resnet_tensor_shape = observation_spaces.spaces[\n                self.rgb_resnet_uuid\n            ].shape\n            self.rgb_resnet_compressor = nn.Sequential(\n                nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1),\n                nn.ReLU(),\n                nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1),\n                nn.ReLU(),\n            )\n            self.depth_resnet_compressor = nn.Sequential(\n                nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1),\n                nn.ReLU(),\n                nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1),\n                nn.ReLU(),\n            )\n            self.rgb_target_obs_combiner = nn.Sequential(\n                nn.Conv2d(\n                    self.resnet_hid_out_dims[1] + self.goal_embed_dims,\n                    self.combine_hid_out_dims[0],\n                    1,\n                ),\n                nn.ReLU(),\n                nn.Conv2d(*self.combine_hid_out_dims[0:2], 1),\n            )\n            self.depth_target_obs_combiner = nn.Sequential(\n                nn.Conv2d(\n                    self.resnet_hid_out_dims[1] + self.goal_embed_dims,\n                    self.combine_hid_out_dims[0],\n                    1,\n                ),\n                nn.ReLU(),\n                nn.Conv2d(*self.combine_hid_out_dims[0:2], 1),\n            )\n\n    @property\n    def is_blind(self):\n        return self.blind\n\n    @property\n    def output_dims(self):\n        if self.blind:\n            return self.goal_embed_dims\n        else:\n            return (\n                2\n                * self.combine_hid_out_dims[-1]\n                * self.resnet_tensor_shape[1]\n                * self.resnet_tensor_shape[2]\n            )\n\n    def get_object_type_encoding(\n        self, observations: Dict[str, torch.FloatTensor]\n    ) -> torch.FloatTensor:\n        \"\"\"Get the object type encoding from input batched observations.\"\"\"\n        return cast(\n            torch.FloatTensor,\n            self.embed_goal(observations[self.goal_uuid].to(torch.int64)),\n        )\n\n    def compress_rgb_resnet(self, observations):\n        return self.rgb_resnet_compressor(observations[self.rgb_resnet_uuid])\n\n    def compress_depth_resnet(self, observations):\n        return self.depth_resnet_compressor(observations[self.depth_resnet_uuid])\n\n    def distribute_target(self, observations):\n        target_emb = self.embed_goal(observations[self.goal_uuid])\n        return target_emb.view(-1, self.goal_embed_dims, 1, 1).expand(\n            -1, -1, self.resnet_tensor_shape[-2], self.resnet_tensor_shape[-1]\n        )\n\n    def adapt_input(self, observations):\n        rgb = observations[self.rgb_resnet_uuid]\n        depth = observations[self.depth_resnet_uuid]\n\n        use_agent = False\n        nagent = 1\n\n        if len(rgb.shape) == 6:\n            use_agent = True\n            nstep, nsampler, nagent = rgb.shape[:3]\n        else:\n            nstep, nsampler = rgb.shape[:2]\n\n        observations[self.rgb_resnet_uuid] = rgb.view(-1, *rgb.shape[-3:])\n        observations[self.depth_resnet_uuid] = depth.view(-1, *depth.shape[-3:])\n        observations[self.goal_uuid] = observations[self.goal_uuid].view(-1, 1)\n\n        return observations, use_agent, nstep, nsampler, nagent\n\n    @staticmethod\n    def adapt_output(x, use_agent, nstep, nsampler, nagent):\n        if use_agent:\n            return x.view(nstep, nsampler, nagent, -1)\n        return x.view(nstep, nsampler * nagent, -1)\n\n    def forward(self, observations):\n        observations, use_agent, nstep, nsampler, nagent = self.adapt_input(\n            observations\n        )\n\n        if self.blind:\n            return self.embed_goal(observations[self.goal_uuid])\n        rgb_embs = [\n            self.compress_rgb_resnet(observations),\n            self.distribute_target(observations),\n        ]\n        rgb_x = self.rgb_target_obs_combiner(\n            torch.cat(\n                rgb_embs,\n                dim=1,\n            )\n        )\n        depth_embs = [\n            self.compress_depth_resnet(observations),\n            self.distribute_target(observations),\n        ]\n        depth_x = self.depth_target_obs_combiner(\n            torch.cat(\n                depth_embs,\n                dim=1,\n            )\n        )\n        x = torch.cat([rgb_x, depth_x], dim=1)\n        x = x.reshape(x.shape[0], -1)  # flatten\n\n        return self.adapt_output(x, use_agent, nstep, nsampler, nagent)\n"
  },
  {
    "path": "allenact_plugins/navigation_plugin/pointnav/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/navigation_plugin/pointnav/models.py",
    "content": "\"\"\"Baseline models for use in the point navigation task.\n\nObject navigation is currently available as a Task in AI2-THOR and\nFacebook's Habitat.\n\"\"\"\n\nfrom typing import Optional, List, Union, Sequence\n\nimport gym\nimport torch\nimport torch.nn as nn\nfrom gym.spaces import Dict as SpaceDict\n\nfrom allenact.algorithms.onpolicy_sync.policy import ObservationType\nfrom allenact.embodiedai.models import resnet as resnet\nfrom allenact.embodiedai.models.basic_models import SimpleCNN\nfrom allenact.embodiedai.models.visual_nav_models import (\n    VisualNavActorCritic,\n    FusionType,\n)\n\n\nclass PointNavActorCritic(VisualNavActorCritic):\n    \"\"\"Use raw image as observation to the agent.\"\"\"\n\n    def __init__(\n        # base params\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        goal_sensor_uuid: str,\n        hidden_size=512,\n        num_rnn_layers=1,\n        rnn_type=\"GRU\",\n        add_prev_actions=False,\n        add_prev_action_null_token=False,\n        action_embed_size=4,\n        multiple_beliefs=False,\n        beliefs_fusion: Optional[FusionType] = None,\n        auxiliary_uuids: Optional[Sequence[str]] = None,\n        # custom params\n        rgb_uuid: Optional[str] = None,\n        depth_uuid: Optional[str] = None,\n        embed_coordinates=False,\n        coordinate_embedding_dim=8,\n        coordinate_dims=2,\n        # perception backbone params,\n        backbone=\"gnresnet18\",\n        resnet_baseplanes=32,\n    ):\n        super().__init__(\n            action_space=action_space,\n            observation_space=observation_space,\n            hidden_size=hidden_size,\n            multiple_beliefs=multiple_beliefs,\n            beliefs_fusion=beliefs_fusion,\n            auxiliary_uuids=auxiliary_uuids,\n        )\n\n        self.goal_sensor_uuid = goal_sensor_uuid\n        self.embed_coordinates = embed_coordinates\n        if self.embed_coordinates:\n            self.coordinate_embedding_size = coordinate_embedding_dim\n        else:\n            self.coordinate_embedding_size = coordinate_dims\n\n        self.sensor_fusion = False\n        if rgb_uuid is not None and depth_uuid is not None:\n            self.sensor_fuser = nn.Linear(hidden_size * 2, hidden_size)\n            self.sensor_fusion = True\n\n        self.backbone = backbone\n        if backbone == \"simple_cnn\":\n            self.visual_encoder = SimpleCNN(\n                observation_space=observation_space,\n                output_size=hidden_size,\n                rgb_uuid=rgb_uuid,\n                depth_uuid=depth_uuid,\n            )\n        else:  # resnet family\n            self.visual_encoder = resnet.GroupNormResNetEncoder(\n                observation_space=observation_space,\n                output_size=hidden_size,\n                rgb_uuid=rgb_uuid,\n                depth_uuid=depth_uuid,\n                baseplanes=resnet_baseplanes,\n                ngroups=resnet_baseplanes // 2,\n                make_backbone=getattr(resnet, backbone),\n            )\n\n        if self.embed_coordinates:\n            self.coordinate_embedding = nn.Linear(\n                coordinate_dims, coordinate_embedding_dim\n            )\n\n        self.create_state_encoders(\n            obs_embed_size=self.goal_visual_encoder_output_dims,\n            num_rnn_layers=num_rnn_layers,\n            rnn_type=rnn_type,\n            add_prev_actions=add_prev_actions,\n            add_prev_action_null_token=add_prev_action_null_token,\n            prev_action_embed_size=action_embed_size,\n        )\n\n        self.create_actorcritic_head()\n\n        self.create_aux_models(\n            obs_embed_size=self.goal_visual_encoder_output_dims,\n            action_embed_size=action_embed_size,\n        )\n\n        self.train()\n\n    @property\n    def is_blind(self):\n        return self.visual_encoder.is_blind\n\n    @property\n    def goal_visual_encoder_output_dims(self):\n        dims = self.coordinate_embedding_size\n        if self.is_blind:\n            return dims\n        return dims + self.recurrent_hidden_state_size\n\n    def get_target_coordinates_encoding(self, observations):\n        if self.embed_coordinates:\n            return self.coordinate_embedding(\n                observations[self.goal_sensor_uuid].to(torch.float32)\n            )\n        else:\n            return observations[self.goal_sensor_uuid].to(torch.float32)\n\n    def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:\n        target_encoding = self.get_target_coordinates_encoding(observations)\n        obs_embeds: Union[torch.Tensor, List[torch.Tensor]]\n        obs_embeds = [target_encoding]\n\n        if not self.is_blind:\n            perception_embed = self.visual_encoder(observations)\n            if self.sensor_fusion:\n                perception_embed = self.sensor_fuser(perception_embed)\n            obs_embeds = [perception_embed] + obs_embeds\n\n        obs_embeds = torch.cat(obs_embeds, dim=-1)\n        return obs_embeds\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/__init__.py",
    "content": "from allenact.utils.system import ImportChecker\n\nwith ImportChecker(\n    \"Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`).\"\n):\n    # noinspection PyUnresolvedReferences\n    import ai2thor\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/configs/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/robothor_plugin/extra_environment.yml",
    "content": "channels:\n  - defaults\n  - conda-forge\ndependencies:\n  - ai2thor>=2.5.3\n  - numba\n  - pip\n  - colour\n  - packaging\n  - pip:\n      - numpy-quaternion\n      - pyquaternion>=0.9.9\n      - python-xlib\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/extra_requirements.txt",
    "content": "ai2thor>=2.5.3\nnumpy-quaternion\npyquaternion>=0.9.9\ncolour\nnumba\npackaging\npython-xlib\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/robothor_constants.py",
    "content": "MOVE_AHEAD = \"MoveAhead\"\nROTATE_LEFT = \"RotateLeft\"\nROTATE_RIGHT = \"RotateRight\"\nLOOK_DOWN = \"LookDown\"\nLOOK_UP = \"LookUp\"\nEND = \"End\"\nPASS = \"Pass\"\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/robothor_distributions.py",
    "content": "from typing import Tuple\n\nimport torch\n\nfrom allenact.base_abstractions.distributions import CategoricalDistr, Distr\n\n\nclass TupleCategoricalDistr(Distr):\n    def __init__(self, probs=None, logits=None, validate_args=None):\n        self.dists = CategoricalDistr(\n            probs=probs, logits=logits, validate_args=validate_args\n        )\n\n    def log_prob(self, actions: Tuple[torch.LongTensor, ...]) -> torch.FloatTensor:\n        # flattened output [steps, samplers, num_agents]\n        return self.dists.log_prob(torch.stack(actions, dim=-1))\n\n    def entropy(self) -> torch.FloatTensor:\n        # flattened output [steps, samplers, num_agents]\n        return self.dists.entropy()\n\n    def sample(self, sample_shape=torch.Size()) -> Tuple[torch.LongTensor, ...]:\n        # split and remove trailing singleton dim\n        res = self.dists.sample(sample_shape).split(1, dim=-1)\n        return tuple([r.view(r.shape[:2]) for r in res])\n\n    def mode(self) -> Tuple[torch.LongTensor, ...]:\n        # split and remove trailing singleton dim\n        res = self.dists.mode().split(1, dim=-1)\n        return tuple([r.view(r.shape[:2]) for r in res])\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/robothor_environment.py",
    "content": "import copy\nimport math\nimport random\nimport warnings\nfrom typing import Any, Optional, Dict, List, Union, Tuple, Collection\n\nimport ai2thor.server\nimport numpy as np\nfrom ai2thor.controller import Controller\nfrom ai2thor.fifo_server import FifoServer\nfrom ai2thor.util import metrics\n\nfrom allenact.utils.cache_utils import DynamicDistanceCache\nfrom allenact.utils.experiment_utils import recursive_update\nfrom allenact.utils.system import get_logger\n\n\nclass RoboThorEnvironment:\n    \"\"\"Wrapper for the robo2thor controller providing additional functionality\n    and bookkeeping.\n\n    See [here](https://ai2thor.allenai.org/robothor/documentation) for comprehensive\n     documentation on RoboTHOR.\n\n    # Attributes\n\n    controller : The AI2-THOR controller.\n    config : The AI2-THOR controller configuration\n    \"\"\"\n\n    def __init__(self, all_metadata_available: bool = True, **kwargs):\n        self.config = dict(\n            rotateStepDegrees=30.0,\n            visibilityDistance=1.0,\n            gridSize=0.25,\n            continuousMode=True,\n            snapToGrid=False,\n            agentMode=\"locobot\",\n            width=640,\n            height=480,\n            agentCount=1,\n            server_class=FifoServer,\n        )\n\n        if \"agentCount\" in kwargs:\n            assert kwargs[\"agentCount\"] > 0\n\n        kwargs[\"agentMode\"] = kwargs.get(\"agentMode\", \"locobot\")\n        if kwargs[\"agentMode\"] not in [\"bot\", \"locobot\"]:\n            warnings.warn(\n                f\"The RoboTHOR environment has not been tested using\"\n                f\" an agent of mode '{kwargs['agentMode']}'.\"\n            )\n\n        recursive_update(self.config, kwargs)\n        self.controller = Controller(\n            **self.config,\n        )\n\n        self.all_metadata_available = all_metadata_available\n\n        self.scene_to_reachable_positions: Optional[Dict[str, Any]] = None\n        self.distance_cache: Optional[DynamicDistanceCache] = None\n\n        if self.all_metadata_available:\n            self.scene_to_reachable_positions = {\n                self.scene_name: copy.deepcopy(self.currently_reachable_points)\n            }\n            assert len(self.scene_to_reachable_positions[self.scene_name]) > 10\n\n            self.distance_cache = DynamicDistanceCache(rounding=1)\n\n        self.agent_count = self.config[\"agentCount\"]\n\n        self._extra_teleport_kwargs: Dict[str, Any] = (\n            {}\n        )  # Used for backwards compatability with the teleport action\n\n    def initialize_grid_dimensions(\n        self, reachable_points: Collection[Dict[str, float]]\n    ) -> Tuple[int, int, int, int]:\n        \"\"\"Computes bounding box for reachable points quantized with the\n        current gridSize.\"\"\"\n        points = {\n            (\n                round(p[\"x\"] / self.config[\"gridSize\"]),\n                round(p[\"z\"] / self.config[\"gridSize\"]),\n            ): p\n            for p in reachable_points\n        }\n\n        assert len(reachable_points) == len(points)\n\n        xmin, xmax = min([p[0] for p in points]), max([p[0] for p in points])\n        zmin, zmax = min([p[1] for p in points]), max([p[1] for p in points])\n\n        return xmin, xmax, zmin, zmax\n\n    def set_object_filter(self, object_ids: List[str]):\n        self.controller.step(\"SetObjectFilter\", objectIds=object_ids, renderImage=False)\n\n    def reset_object_filter(self):\n        self.controller.step(\"ResetObjectFilter\", renderImage=False)\n\n    def path_from_point_to_object_type(\n        self, point: Dict[str, float], object_type: str, allowed_error: float\n    ) -> Optional[List[Dict[str, float]]]:\n        event = self.controller.step(\n            action=\"GetShortestPath\",\n            objectType=object_type,\n            position=point,\n            allowedError=allowed_error,\n        )\n        if event.metadata[\"lastActionSuccess\"]:\n            return event.metadata[\"actionReturn\"][\"corners\"]\n        else:\n            get_logger().debug(\n                \"Failed to find path for {} in {}. Start point {}, agent state {}.\".format(\n                    object_type,\n                    self.controller.last_event.metadata[\"sceneName\"],\n                    point,\n                    self.agent_state(),\n                )\n            )\n            return None\n\n    def distance_from_point_to_object_type(\n        self, point: Dict[str, float], object_type: str, allowed_error: float\n    ) -> float:\n        \"\"\"Minimal geodesic distance from a point to an object of the given\n        type.\n\n        It might return -1.0 for unreachable targets.\n        \"\"\"\n        path = self.path_from_point_to_object_type(point, object_type, allowed_error)\n        if path:\n            # Because `allowed_error != 0` means that the path returned above might not start\n            # at `point`, we explicitly add any offset there is.\n            s_dist = math.sqrt(\n                (point[\"x\"] - path[0][\"x\"]) ** 2 + (point[\"z\"] - path[0][\"z\"]) ** 2\n            )\n            return metrics.path_distance(path) + s_dist\n        return -1.0\n\n    def distance_to_object_type(self, object_type: str, agent_id: int = 0) -> float:\n        \"\"\"Minimal geodesic distance to object of given type from agent's\n        current location.\n\n        It might return -1.0 for unreachable targets.\n        \"\"\"\n        assert 0 <= agent_id < self.agent_count\n        assert (\n            self.all_metadata_available\n        ), \"`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`.\"\n\n        def retry_dist(position: Dict[str, float], object_type: str):\n            allowed_error = 0.05\n            debug_log = \"\"\n            d = -1.0\n            while allowed_error < 2.5:\n                d = self.distance_from_point_to_object_type(\n                    position, object_type, allowed_error\n                )\n                if d < 0:\n                    debug_log = (\n                        f\"In scene {self.scene_name}, could not find a path from {position} to {object_type} with\"\n                        f\" {allowed_error} error tolerance. Increasing this tolerance to\"\n                        f\" {2 * allowed_error} any trying again.\"\n                    )\n                    allowed_error *= 2\n                else:\n                    break\n            if d < 0:\n                get_logger().debug(\n                    f\"In scene {self.scene_name}, could not find a path from {position} to {object_type}\"\n                    f\" with {allowed_error} error tolerance. Returning a distance of -1.\"\n                )\n            elif debug_log != \"\":\n                get_logger().debug(debug_log)\n            return d\n\n        return self.distance_cache.find_distance(\n            self.scene_name,\n            self.controller.last_event.events[agent_id].metadata[\"agent\"][\"position\"],\n            object_type,\n            retry_dist,\n        )\n\n    def path_from_point_to_point(\n        self, position: Dict[str, float], target: Dict[str, float], allowedError: float\n    ) -> Optional[List[Dict[str, float]]]:\n        try:\n            return self.controller.step(\n                action=\"GetShortestPathToPoint\",\n                position=position,\n                target=target,\n                allowedError=allowedError,\n            ).metadata[\"actionReturn\"][\"corners\"]\n        except ValueError:\n            raise\n        except Exception:\n            get_logger().debug(\n                \"Failed to find path for {} in {}. Start point {}, agent state {}.\".format(\n                    target,\n                    self.controller.last_event.metadata[\"sceneName\"],\n                    position,\n                    self.agent_state(),\n                )\n            )\n            return None\n\n    def distance_from_point_to_point(\n        self, position: Dict[str, float], target: Dict[str, float], allowed_error: float\n    ) -> float:\n        path = self.path_from_point_to_point(position, target, allowed_error)\n        if path:\n            # Because `allowed_error != 0` means that the path returned above might not start\n            # or end exactly at the position/target points, we explictly add any offset there is.\n            s_dist = math.sqrt(\n                (position[\"x\"] - path[0][\"x\"]) ** 2\n                + (position[\"z\"] - path[0][\"z\"]) ** 2\n            )\n            t_dist = math.sqrt(\n                (target[\"x\"] - path[-1][\"x\"]) ** 2 + (target[\"z\"] - path[-1][\"z\"]) ** 2\n            )\n            return metrics.path_distance(path) + s_dist + t_dist\n        return -1.0\n\n    def distance_to_point(self, target: Dict[str, float], agent_id: int = 0) -> float:\n        \"\"\"Minimal geodesic distance to end point from agent's current\n        location.\n\n        It might return -1.0 for unreachable targets.\n        \"\"\"\n        assert 0 <= agent_id < self.agent_count\n        assert (\n            self.all_metadata_available\n        ), \"`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`.\"\n\n        def retry_dist(position: Dict[str, float], target: Dict[str, float]):\n            allowed_error = 0.05\n            debug_log = \"\"\n            d = -1.0\n            while allowed_error < 2.5:\n                d = self.distance_from_point_to_point(position, target, allowed_error)\n                if d < 0:\n                    debug_log = (\n                        f\"In scene {self.scene_name}, could not find a path from {position} to {target} with\"\n                        f\" {allowed_error} error tolerance. Increasing this tolerance to\"\n                        f\" {2 * allowed_error} any trying again.\"\n                    )\n                    allowed_error *= 2\n                else:\n                    break\n            if d < 0:\n                get_logger().debug(\n                    f\"In scene {self.scene_name}, could not find a path from {position} to {target}\"\n                    f\" with {allowed_error} error tolerance. Returning a distance of -1.\"\n                )\n            elif debug_log != \"\":\n                get_logger().debug(debug_log)\n            return d\n\n        return self.distance_cache.find_distance(\n            self.scene_name,\n            self.controller.last_event.events[agent_id].metadata[\"agent\"][\"position\"],\n            target,\n            retry_dist,\n        )\n\n    def agent_state(self, agent_id: int = 0) -> Dict:\n        \"\"\"Return agent position, rotation and horizon.\"\"\"\n        assert 0 <= agent_id < self.agent_count\n\n        agent_meta = self.last_event.events[agent_id].metadata[\"agent\"]\n        return {\n            **{k: float(v) for k, v in agent_meta[\"position\"].items()},\n            \"rotation\": {k: float(v) for k, v in agent_meta[\"rotation\"].items()},\n            \"horizon\": round(float(agent_meta[\"cameraHorizon\"]), 1),\n        }\n\n    def teleport(\n        self,\n        pose: Dict[str, float],\n        rotation: Dict[str, float],\n        horizon: float = 0.0,\n        agent_id: int = 0,\n    ):\n        assert 0 <= agent_id < self.agent_count\n        try:\n            e = self.controller.step(\n                action=\"TeleportFull\",\n                x=pose[\"x\"],\n                y=pose[\"y\"],\n                z=pose[\"z\"],\n                rotation=rotation,\n                horizon=horizon,\n                agentId=agent_id,\n                **self._extra_teleport_kwargs,\n            )\n        except ValueError as e:\n            if len(self._extra_teleport_kwargs) == 0:\n                self._extra_teleport_kwargs[\"standing\"] = True\n            else:\n                raise e\n            return self.teleport(\n                pose=pose, rotation=rotation, horizon=horizon, agent_id=agent_id\n            )\n        return e.metadata[\"lastActionSuccess\"]\n\n    def reset(\n        self, scene_name: str = None, filtered_objects: Optional[List[str]] = None\n    ) -> None:\n        \"\"\"Resets scene to a known initial state.\"\"\"\n        if scene_name is not None and scene_name != self.scene_name:\n            self.controller.reset(scene_name)\n            assert self.last_action_success, \"Could not reset to new scene\"\n\n            if (\n                self.all_metadata_available\n                and scene_name not in self.scene_to_reachable_positions\n            ):\n                self.scene_to_reachable_positions[scene_name] = copy.deepcopy(\n                    self.currently_reachable_points\n                )\n                assert len(self.scene_to_reachable_positions[scene_name]) > 10\n        if filtered_objects:\n            self.set_object_filter(filtered_objects)\n        else:\n            self.reset_object_filter()\n\n    def random_reachable_state(\n        self, seed: Optional[int] = None\n    ) -> Dict[str, Union[Dict[str, float], float]]:\n        \"\"\"Returns a random reachable location in the scene.\"\"\"\n        assert (\n            self.all_metadata_available\n        ), \"`random_reachable_state` cannot be called when `self.all_metadata_available` is `False`.\"\n\n        if seed is not None:\n            random.seed(seed)\n        # xyz = random.choice(self.currently_reachable_points)\n        assert len(self.scene_to_reachable_positions[self.scene_name]) > 10\n        xyz = copy.deepcopy(\n            random.choice(self.scene_to_reachable_positions[self.scene_name])\n        )\n        rotation = random.choice(\n            np.arange(0.0, 360.0, self.config[\"rotateStepDegrees\"])\n        )\n        horizon = 0.0  # random.choice([0.0, 30.0, 330.0])\n        return {\n            **{k: float(v) for k, v in xyz.items()},\n            \"rotation\": {\"x\": 0.0, \"y\": float(rotation), \"z\": 0.0},\n            \"horizon\": float(horizon),\n        }\n\n    def randomize_agent_location(\n        self,\n        seed: int = None,\n        partial_position: Optional[Dict[str, float]] = None,\n        agent_id: int = 0,\n    ) -> Dict[str, Union[Dict[str, float], float]]:\n        \"\"\"Teleports the agent to a random reachable location in the scene.\"\"\"\n        assert 0 <= agent_id < self.agent_count\n\n        if partial_position is None:\n            partial_position = {}\n        k = 0\n        state: Optional[Dict] = None\n\n        while k == 0 or (not self.last_action_success and k < 10):\n            # self.reset()\n            state = {**self.random_reachable_state(seed=seed), **partial_position}\n            # get_logger().debug(\"picked target location {}\".format(state))\n            self.controller.step(\"TeleportFull\", **state, agentId=agent_id)\n            k += 1\n\n        if not self.last_action_success:\n            get_logger().warning(\n                (\n                    \"Randomize agent location in scene {} and current random state {}\"\n                    \" with seed {} and partial position {} failed in \"\n                    \"10 attempts. Forcing the action.\"\n                ).format(self.scene_name, state, seed, partial_position)\n            )\n            self.controller.step(\"TeleportFull\", **state, force_action=True, agentId=agent_id)  # type: ignore\n            assert self.last_action_success, \"Force action failed with {}\".format(state)\n\n        # get_logger().debug(\"location after teleport full {}\".format(self.agent_state()))\n        # self.controller.step(\"TeleportFull\", **self.agent_state())  # TODO only for debug\n        # get_logger().debug(\"location after re-teleport full {}\".format(self.agent_state()))\n\n        return self.agent_state(agent_id=agent_id)\n\n    def known_good_locations_list(self):\n        assert (\n            self.all_metadata_available\n        ), \"`known_good_locations_list` cannot be called when `self.all_metadata_available` is `False`.\"\n        return self.scene_to_reachable_positions[self.scene_name]\n\n    @property\n    def currently_reachable_points(self) -> List[Dict[str, float]]:\n        \"\"\"List of {\"x\": x, \"y\": y, \"z\": z} locations in the scene that are\n        currently reachable.\"\"\"\n        self.controller.step(action=\"GetReachablePositions\")\n        assert (\n            self.last_action_success\n        ), f\"Could not get reachable positions for reason {self.last_event.metadata['errorMessage']}.\"\n        return self.last_action_return\n\n    @property\n    def scene_name(self) -> str:\n        \"\"\"Current ai2thor scene.\"\"\"\n        return self.controller.last_event.metadata[\"sceneName\"].replace(\"_physics\", \"\")\n\n    @property\n    def current_frame(self) -> np.ndarray:\n        \"\"\"Returns rgb image corresponding to the agent's egocentric view.\"\"\"\n        return self.controller.last_event.frame\n\n    @property\n    def current_depth(self) -> np.ndarray:\n        \"\"\"Returns depth image corresponding to the agent's egocentric view.\"\"\"\n        return self.controller.last_event.depth_frame\n\n    @property\n    def current_frames(self) -> List[np.ndarray]:\n        \"\"\"Returns rgb images corresponding to the agents' egocentric views.\"\"\"\n        return [\n            self.controller.last_event.events[agent_id].frame\n            for agent_id in range(self.agent_count)\n        ]\n\n    @property\n    def current_depths(self) -> List[np.ndarray]:\n        \"\"\"Returns depth images corresponding to the agents' egocentric\n        views.\"\"\"\n        return [\n            self.controller.last_event.events[agent_id].depth_frame\n            for agent_id in range(self.agent_count)\n        ]\n\n    @property\n    def last_event(self) -> ai2thor.server.Event:\n        \"\"\"Last event returned by the controller.\"\"\"\n        return self.controller.last_event\n\n    @property\n    def last_action(self) -> str:\n        \"\"\"Last action, as a string, taken by the agent.\"\"\"\n        return self.controller.last_event.metadata[\"lastAction\"]\n\n    @property\n    def last_action_success(self) -> bool:\n        \"\"\"Was the last action taken by the agent a success?\"\"\"\n        return self.controller.last_event.metadata[\"lastActionSuccess\"]\n\n    @property\n    def last_action_return(self) -> Any:\n        \"\"\"Get the value returned by the last action (if applicable).\n\n        For an example of an action that returns a value, see\n        `\"GetReachablePositions\"`.\n        \"\"\"\n        return self.controller.last_event.metadata[\"actionReturn\"]\n\n    def step(\n        self,\n        action_dict: Optional[Dict[str, Union[str, int, float, Dict]]] = None,\n        **kwargs: Union[str, int, float, Dict],\n    ) -> ai2thor.server.Event:\n        \"\"\"Take a step in the ai2thor environment.\"\"\"\n        if action_dict is None:\n            action_dict = dict()\n        action_dict.update(kwargs)\n\n        return self.controller.step(**action_dict)\n\n    def stop(self):\n        \"\"\"Stops the ai2thor controller.\"\"\"\n        try:\n            self.controller.stop()\n        except Exception as e:\n            get_logger().warning(str(e))\n\n    def all_objects(self) -> List[Dict[str, Any]]:\n        \"\"\"Return all object metadata.\"\"\"\n        return self.controller.last_event.metadata[\"objects\"]\n\n    def all_objects_with_properties(\n        self, properties: Dict[str, Any]\n    ) -> List[Dict[str, Any]]:\n        \"\"\"Find all objects with the given properties.\"\"\"\n        objects = []\n        for o in self.all_objects():\n            satisfies_all = True\n            for k, v in properties.items():\n                if o[k] != v:\n                    satisfies_all = False\n                    break\n            if satisfies_all:\n                objects.append(o)\n        return objects\n\n    def visible_objects(self) -> List[Dict[str, Any]]:\n        \"\"\"Return all visible objects.\"\"\"\n        return self.all_objects_with_properties({\"visible\": True})\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/robothor_models.py",
    "content": "from typing import Tuple, Optional\n\nimport gym\nimport torch\nfrom gym.spaces import Dict as SpaceDict\n\nfrom allenact.algorithms.onpolicy_sync.policy import (\n    ActorCriticModel,\n    LinearActorCriticHead,\n    DistributionType,\n    Memory,\n    ObservationType,\n)\nfrom allenact.base_abstractions.misc import ActorCriticOutput\nfrom allenact.embodiedai.models.basic_models import RNNStateEncoder, SimpleCNN\nfrom allenact_plugins.robothor_plugin.robothor_distributions import (\n    TupleCategoricalDistr,\n)\n\n\nclass TupleLinearActorCriticHead(LinearActorCriticHead):\n    def forward(self, x):\n        out = self.actor_and_critic(x)\n\n        logits = out[..., :-1]\n        values = out[..., -1:]\n        # noinspection PyArgumentList\n        return (\n            TupleCategoricalDistr(logits=logits),  # [steps, samplers, ...]\n            values.view(*values.shape[:2], -1),  # [steps, samplers, flattened]\n        )\n\n\nclass NavToPartnerActorCriticSimpleConvRNN(ActorCriticModel[TupleCategoricalDistr]):\n    action_space: gym.spaces.Tuple\n\n    def __init__(\n        self,\n        action_space: gym.spaces.Tuple,\n        observation_space: SpaceDict,\n        rgb_uuid: Optional[str] = \"rgb\",\n        hidden_size=512,\n        num_rnn_layers=1,\n        rnn_type=\"GRU\",\n    ):\n        super().__init__(action_space=action_space, observation_space=observation_space)\n\n        self._hidden_size = hidden_size\n\n        self.rgb_uuid = rgb_uuid\n\n        self.visual_encoder = SimpleCNN(\n            observation_space=observation_space,\n            output_size=hidden_size,\n            rgb_uuid=self.rgb_uuid,\n            depth_uuid=None,\n        )\n\n        self.state_encoder = RNNStateEncoder(\n            0 if self.is_blind else self.recurrent_hidden_state_size,\n            self._hidden_size,\n            num_layers=num_rnn_layers,\n            rnn_type=rnn_type,\n        )\n\n        self.actor_critic = TupleLinearActorCriticHead(\n            self._hidden_size, action_space[0].n\n        )\n\n        self.train()\n\n    @property\n    def output_size(self):\n        return self._hidden_size\n\n    @property\n    def is_blind(self):\n        return self.visual_encoder.is_blind\n\n    @property\n    def num_recurrent_layers(self):\n        return self.state_encoder.num_recurrent_layers\n\n    @property\n    def recurrent_hidden_state_size(self):\n        return self._hidden_size\n\n    @property\n    def num_agents(self):\n        return len(self.action_space)\n\n    def _recurrent_memory_specification(self):\n        return dict(\n            rnn=(\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"agent\", self.num_agents),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n        )\n\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        if not self.is_blind:\n            perception_embed = self.visual_encoder(observations)\n        else:\n            # TODO manage blindness for all agents simultaneously or separate?\n            raise NotImplementedError()\n\n        # TODO alternative where all agents consume all observations\n        x, rnn_hidden_states = self.state_encoder(\n            perception_embed, memory.tensor(\"rnn\"), masks\n        )\n\n        dists, vals = self.actor_critic(x)\n\n        return (\n            ActorCriticOutput(\n                distributions=dists,\n                values=vals,\n                extras={},\n            ),\n            memory.set_tensor(\"rnn\", rnn_hidden_states),\n        )\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/robothor_preprocessors.py",
    "content": "from collections import OrderedDict\nfrom typing import Dict, Any, Optional, List, cast\n\nimport gym\nimport numpy as np\nimport torch\nfrom gym.spaces.dict import Dict as SpaceDict\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.cacheless_frcnn import fasterrcnn_resnet50_fpn\nfrom allenact.utils.misc_utils import prepare_locals_for_super\n\n\nclass BatchedFasterRCNN(torch.nn.Module):\n    # fmt: off\n    COCO_INSTANCE_CATEGORY_NAMES = [\n        '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',\n        'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',\n        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n        'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',\n        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n        'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',\n        'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',\n        'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n        'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',\n        'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',\n        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',\n        'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'\n    ]\n    # fmt: on\n\n    def __init__(self, thres=0.12, maxdets=3, res=7):\n        super().__init__()\n        self.model = fasterrcnn_resnet50_fpn(pretrained=True)\n        self.eval()\n\n        self.min_score = thres\n        self.maxdets = maxdets\n        self.res = res\n\n    def detector_tensor(self, boxes, classes, scores, aspect_ratio=1.0):\n        res, maxdets = self.res, self.maxdets\n        bins = np.array(list(range(res + 1)))[1:-1] / res\n\n        res_classes = torch.zeros(\n            res, res, maxdets, dtype=torch.int64\n        )  # 0 is background\n        res_boxes = -1 * torch.ones(\n            res, res, maxdets, 5\n        )  # regular range is [0, 1] (vert) or [0, aspect_ratio] (horiz)\n\n        temp = [[[] for _ in range(res)] for _ in range(res)]  # grid of arrays\n\n        # # TODO Debug\n        # print('NEW IMAGE')\n\n        for it in range(classes.shape[0]):\n            cx = (boxes[it, 0].item() + boxes[it, 2].item()) / 2\n            cy = (boxes[it, 1].item() + boxes[it, 3].item()) / 2\n\n            px = np.digitize(cx, bins=aspect_ratio * bins).item()\n            py = np.digitize(cy, bins=bins).item()\n\n            temp[py][px].append(\n                (\n                    scores[it][classes[it]].item(),  # prob\n                    (boxes[it, 2] - boxes[it, 0]).item() / aspect_ratio,  # width\n                    (boxes[it, 3] - boxes[it, 1]).item(),  # height\n                    boxes[it, 0].item() / aspect_ratio,  # x\n                    boxes[it, 1].item(),  # y\n                    classes[it].item(),  # class\n                )\n            )\n\n            # # TODO Debug:\n            # print(self.COCO_INSTANCE_CATEGORY_NAMES[classes[it].item()])\n\n        for py in range(res):\n            for px in range(res):\n                order = sorted(temp[py][px], reverse=True)[:maxdets]\n                for it, data in enumerate(order):\n                    res_classes[py, px, it] = data[-1]\n                    res_boxes[py, px, it, :] = torch.tensor(\n                        list(data[:-1])\n                    )  # prob, size, top left\n\n        res_classes = res_classes.permute(2, 0, 1).unsqueeze(0).contiguous()\n        res_boxes = (\n            res_boxes.view(res, res, -1).permute(2, 0, 1).unsqueeze(0).contiguous()\n        )\n\n        return res_classes, res_boxes\n\n    def forward(self, imbatch):\n        with torch.no_grad():\n            imglist = [im_in.squeeze(0) for im_in in imbatch.split(split_size=1, dim=0)]\n\n            # # TODO Debug\n            # import cv2\n            # for it, im_in in enumerate(imglist):\n            #     cvim = 255.0 * im_in.to('cpu').permute(1, 2, 0).numpy()[:, :, ::-1]\n            #     cv2.imwrite('test_highres{}.png'.format(it), cvim)\n\n            preds = self.model(imglist)\n\n            keeps = [\n                pred[\"scores\"] > self.min_score for pred in preds\n            ]  # already  after nms\n\n            # [0, 1] for rows, [0, aspect_ratio] for cols (im_in is C x H x W), with all images of same size (batch)\n            all_boxes = [\n                pred[\"boxes\"][keep] / imbatch.shape[-2]\n                for pred, keep in zip(preds, keeps)\n            ]\n            all_classes = [pred[\"labels\"][keep] for pred, keep in zip(preds, keeps)]\n            all_pred_scores = [pred[\"scores\"][keep] for pred, keep in zip(preds, keeps)]\n\n            # hack: fill in a full prob score (all classes, 0 score if undetected) for each box, for backwards compatibility\n            all_scores = [\n                torch.zeros(pred_scores.shape[0], 91, device=pred_scores.device)\n                for pred_scores in all_pred_scores\n            ]\n            all_scores = [\n                torch.where(\n                    torch.arange(91, device=pred_scores.device).unsqueeze(0)\n                    == merged_classes.unsqueeze(1),\n                    pred_scores.unsqueeze(1),\n                    scores,\n                )\n                for merged_classes, pred_scores, scores in zip(\n                    all_classes, all_pred_scores, all_scores\n                )\n            ]\n\n            all_classes_boxes = [\n                self.detector_tensor(\n                    boxes,\n                    classes,\n                    scores,\n                    aspect_ratio=imbatch.shape[-1] / imbatch.shape[-2],\n                )\n                for boxes, classes, scores in zip(all_boxes, all_classes, all_scores)\n            ]\n\n            classes = torch.cat(\n                [classes_boxes[0] for classes_boxes in all_classes_boxes], dim=0\n            ).to(imbatch.device)\n            boxes = torch.cat(\n                [classes_boxes[1] for classes_boxes in all_classes_boxes], dim=0\n            ).to(imbatch.device)\n\n        return classes, boxes\n\n\nclass FasterRCNNPreProcessorRoboThor(Preprocessor):\n    \"\"\"Preprocess RGB image using a ResNet model.\"\"\"\n\n    COCO_INSTANCE_CATEGORY_NAMES = BatchedFasterRCNN.COCO_INSTANCE_CATEGORY_NAMES\n\n    def __init__(\n        self,\n        input_uuids: List[str],\n        output_uuid: str,\n        input_height: int,\n        input_width: int,\n        max_dets: int,\n        detector_spatial_res: int,\n        detector_thres: float,\n        device: Optional[torch.device] = None,\n        device_ids: Optional[List[torch.device]] = None,\n        **kwargs: Any,\n    ):\n        self.input_height = input_height\n        self.input_width = input_width\n        self.max_dets = max_dets\n        self.detector_spatial_res = detector_spatial_res\n        self.detector_thres = detector_thres\n        self.device = torch.device(\"cpu\") if device is None else device\n        self.device_ids = device_ids or cast(\n            List[torch.device], list(range(torch.cuda.device_count()))\n        )\n\n        self.frcnn: BatchedFasterRCNN = BatchedFasterRCNN(\n            thres=self.detector_thres,\n            maxdets=self.max_dets,\n            res=self.detector_spatial_res,\n        )\n\n        spaces: OrderedDict[str, gym.Space] = OrderedDict()\n        shape = (self.max_dets, self.detector_spatial_res, self.detector_spatial_res)\n        spaces[\"frcnn_classes\"] = gym.spaces.Box(\n            low=0,  # 0 is bg\n            high=len(self.COCO_INSTANCE_CATEGORY_NAMES) - 1,\n            shape=shape,\n            dtype=np.int64,\n        )\n        shape = (\n            self.max_dets * 5,\n            self.detector_spatial_res,\n            self.detector_spatial_res,\n        )\n        spaces[\"frcnn_boxes\"] = gym.spaces.Box(low=-np.inf, high=np.inf, shape=shape)\n\n        assert (\n            len(input_uuids) == 1\n        ), \"fasterrcnn preprocessor can only consume one observation type\"\n\n        observation_space = SpaceDict(spaces=spaces)\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def to(self, device: torch.device) -> \"FasterRCNNPreProcessorRoboThor\":\n        self.frcnn = self.frcnn.to(device)\n        self.device = device\n        return self\n\n    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:\n        frames_tensor = (\n            obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)\n        )  # bhwc -> bchw (unnormalized)\n        classes, boxes = self.frcnn(frames_tensor)\n\n        return {\"frcnn_classes\": classes, \"frcnn_boxes\": boxes}\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/robothor_sensors.py",
    "content": "from typing import Any, Tuple, Optional\n\nimport ai2thor.controller\nimport gym\nimport numpy as np\nimport quaternion  # noqa # pylint: disable=unused-import\n\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    RGBSensorThor,\n    THOR_ENV_TYPE,\n    THOR_TASK_TYPE,\n)\nfrom allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment\nfrom allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask\n\n\nclass RGBSensorRoboThor(RGBSensorThor):\n    \"\"\"Sensor for RGB images in RoboTHOR.\n\n    Returns from a running RoboThorEnvironment instance, the current RGB\n    frame corresponding to the agent's egocentric view.\n    \"\"\"\n\n    def __init__(self, *args: Any, **kwargs: Any):\n        get_logger().warning(\n            \"`RGBSensorRoboThor` is deprecated, use `RGBSensorThor` instead.\"\n        )\n        super().__init__(*args, **kwargs)\n\n\nclass RGBSensorMultiRoboThor(RGBSensor[RoboThorEnvironment, Task[RoboThorEnvironment]]):\n    \"\"\"Sensor for RGB images in RoboTHOR.\n\n    Returns from a running RoboThorEnvironment instance, the current RGB\n    frame corresponding to the agent's egocentric view.\n    \"\"\"\n\n    def __init__(self, agent_count: int = 2, **kwargs):\n        # TODO take all named args from superclass and pass with super().__init__(**prepare_locals_for_super(locals()))\n        super().__init__(**kwargs)\n        self.agent_count = agent_count\n        self.agent_id = 0\n\n    def frame_from_env(\n        self, env: RoboThorEnvironment, task: Optional[Task[RoboThorEnvironment]]\n    ) -> np.ndarray:\n        return env.current_frames[self.agent_id].copy()\n\n    def get_observation(\n        self,\n        env: RoboThorEnvironment,\n        task: Task[RoboThorEnvironment],\n        *args: Any,\n        **kwargs: Any\n    ) -> Any:\n        obs = []\n        for self.agent_id in range(self.agent_count):\n            obs.append(super().get_observation(env, task, *args, **kwargs))\n        return np.stack(obs, axis=0)  # agents x width x height x channels\n\n\nclass GPSCompassSensorRoboThor(Sensor[RoboThorEnvironment, PointNavTask]):\n    def __init__(self, uuid: str = \"target_coordinates_ind\", **kwargs: Any):\n        observation_space = self._get_observation_space()\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def _get_observation_space(self):\n        return gym.spaces.Box(\n            low=np.finfo(np.float32).min,\n            high=np.finfo(np.float32).max,\n            shape=(2,),\n            dtype=np.float32,\n        )\n\n    @staticmethod\n    def _compute_pointgoal(\n        source_position: np.ndarray,\n        source_rotation: np.quaternion,\n        goal_position: np.ndarray,\n    ):\n        direction_vector = goal_position - source_position\n        direction_vector_agent = GPSCompassSensorRoboThor.quaternion_rotate_vector(\n            source_rotation.inverse(), direction_vector\n        )\n\n        rho, phi = GPSCompassSensorRoboThor.cartesian_to_polar(\n            direction_vector_agent[2], -direction_vector_agent[0]\n        )\n        return np.array([rho, phi], dtype=np.float32)\n\n    @staticmethod\n    def quaternion_from_y_angle(angle: float) -> np.quaternion:\n        r\"\"\"Creates a quaternion from rotation angle around y axis\"\"\"\n        return GPSCompassSensorRoboThor.quaternion_from_coeff(\n            np.array(\n                [0.0, np.sin(np.pi * angle / 360.0), 0.0, np.cos(np.pi * angle / 360.0)]\n            )\n        )\n\n    @staticmethod\n    def quaternion_from_coeff(coeffs: np.ndarray) -> np.quaternion:\n        r\"\"\"Creates a quaternions from coeffs in [x, y, z, w] format\"\"\"\n        quat = np.quaternion(0, 0, 0, 0)\n        quat.real = coeffs[3]\n        quat.imag = coeffs[0:3]\n        return quat\n\n    @staticmethod\n    def cartesian_to_polar(x, y):\n        rho = np.sqrt(x**2 + y**2)\n        phi = np.arctan2(y, x)\n        return rho, phi\n\n    @staticmethod\n    def quaternion_rotate_vector(quat: np.quaternion, v: np.array) -> np.array:\n        r\"\"\"Rotates a vector by a quaternion\n        Args:\n            quat: The quaternion to rotate by\n            v: The vector to rotate\n        Returns:\n            np.array: The rotated vector\n        \"\"\"\n        vq = np.quaternion(0, 0, 0, 0)\n        vq.imag = v\n        return (quat * vq * quat.inverse()).imag\n\n    def get_observation(\n        self,\n        env: RoboThorEnvironment,\n        task: Optional[PointNavTask],\n        *args: Any,\n        **kwargs: Any\n    ) -> Any:\n\n        agent_state = env.agent_state()\n        agent_position = np.array([agent_state[k] for k in [\"x\", \"y\", \"z\"]])\n        rotation_world_agent = self.quaternion_from_y_angle(\n            agent_state[\"rotation\"][\"y\"]\n        )\n\n        goal_position = np.array([task.task_info[\"target\"][k] for k in [\"x\", \"y\", \"z\"]])\n\n        return self._compute_pointgoal(\n            agent_position, rotation_world_agent, goal_position\n        )\n\n\nclass DepthSensorThor(\n    DepthSensor[\n        THOR_ENV_TYPE,\n        THOR_TASK_TYPE,\n    ],\n):\n    def __init__(\n        self,\n        use_resnet_normalization: Optional[bool] = None,\n        use_normalization: Optional[bool] = None,\n        mean: Optional[np.ndarray] = np.array([[0.5]], dtype=np.float32),\n        stdev: Optional[np.ndarray] = np.array([[0.25]], dtype=np.float32),\n        height: Optional[int] = None,\n        width: Optional[int] = None,\n        uuid: str = \"depth\",\n        output_shape: Optional[Tuple[int, ...]] = None,\n        output_channels: int = 1,\n        unnormalized_infimum: float = 0.0,\n        unnormalized_supremum: float = 5.0,\n        scale_first: bool = False,\n        **kwargs: Any\n    ):\n        # Give priority to use_normalization, but use_resnet_normalization for backward compat. if not set\n        if use_resnet_normalization is not None and use_normalization is None:\n            use_normalization = use_resnet_normalization\n        elif use_normalization is None:\n            use_normalization = False\n\n        super().__init__(**prepare_locals_for_super(locals()))\n\n    def frame_from_env(\n        self, env: THOR_ENV_TYPE, task: Optional[THOR_TASK_TYPE]\n    ) -> np.ndarray:\n        if not isinstance(env, ai2thor.controller.Controller):\n            return env.controller.last_event.depth_frame\n\n        return env.last_event.depth_frame\n\n\nclass DepthSensorRoboThor(DepthSensorThor):\n    # For backwards compatibility\n    def __init__(self, *args: Any, **kwargs: Any):\n        get_logger().warning(\n            \"`DepthSensorRoboThor` is deprecated, use `DepthSensorThor` instead.\"\n        )\n        super().__init__(*args, **kwargs)\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/robothor_task_samplers.py",
    "content": "import copy\nimport gzip\nimport json\nimport random\nfrom typing import List, Optional, Union, Dict, Any, cast, Tuple\n\nimport gym\n\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.cache_utils import str_to_pos_for_cache\nfrom allenact.utils.experiment_utils import set_seed, set_deterministic_cudnn\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment\nfrom allenact_plugins.robothor_plugin.robothor_tasks import (\n    ObjectNavTask,\n    PointNavTask,\n    NavToPartnerTask,\n)\n\n\nclass ObjectNavTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        scenes: Union[List[str], str],\n        object_types: List[str],\n        sensors: List[Sensor],\n        max_steps: int,\n        env_args: Dict[str, Any],\n        action_space: gym.Space,\n        rewards_config: Dict,\n        scene_period: Optional[Union[int, str]] = None,\n        max_tasks: Optional[int] = None,\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        allow_flipping: bool = False,\n        dataset_first: int = -1,\n        dataset_last: int = -1,\n        **kwargs,\n    ) -> None:\n        self.rewards_config = rewards_config\n        self.env_args = env_args\n        self.scenes = scenes\n        self.object_types = object_types\n        self.env: Optional[RoboThorEnvironment] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_space = action_space\n        self.allow_flipping = allow_flipping\n\n        self.scenes_is_dataset = (dataset_first >= 0) or (dataset_last >= 0)\n\n        if not self.scenes_is_dataset:\n            assert isinstance(\n                self.scenes, List\n            ), \"When not using a dataset, scenes ({}) must be a list\".format(\n                self.scenes\n            )\n            self.scene_counter: Optional[int] = None\n            self.scene_order: Optional[List[str]] = None\n            self.scene_id: Optional[int] = None\n            self.scene_period: Optional[Union[str, int]] = (\n                scene_period  # default makes a random choice\n            )\n            self.max_tasks: Optional[int] = None\n            self.reset_tasks = max_tasks\n        else:\n            assert isinstance(\n                self.scenes, str\n            ), \"When using a dataset, scenes ({}) must be a json file name string\".format(\n                self.scenes\n            )\n            with open(self.scenes, \"r\") as f:\n                self.dataset_episodes = json.load(f)\n                # get_logger().debug(\"Loaded {} object nav episodes\".format(len(self.dataset_episodes)))\n            self.dataset_first = dataset_first if dataset_first >= 0 else 0\n            self.dataset_last = (\n                dataset_last if dataset_last >= 0 else len(self.dataset_episodes) - 1\n            )\n            assert (\n                0 <= self.dataset_first <= self.dataset_last\n            ), \"dataset_last {} must be >= dataset_first {} >= 0\".format(\n                dataset_last, dataset_first\n            )\n            self.reset_tasks = self.dataset_last - self.dataset_first + 1\n            # get_logger().debug(\"{} tasks ({}, {}) in sampler\".format(self.reset_tasks, self.dataset_first, self.dataset_last))\n\n        self._last_sampled_task: Optional[ObjectNavTask] = None\n\n        self.seed: Optional[int] = None\n        self.set_seed(seed)\n\n        if deterministic_cudnn:\n            set_deterministic_cudnn()\n\n        self.reset()\n\n    def _create_environment(self) -> RoboThorEnvironment:\n        env = RoboThorEnvironment(**self.env_args)\n        return env\n\n    @property\n    def length(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled. Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        return self.reset_tasks\n\n    @property\n    def last_sampled_task(self) -> Optional[ObjectNavTask]:\n        return self._last_sampled_task\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"Check if observation spaces equal.\n\n        # Returns\n\n        True if all Tasks that can be sampled by this sampler have the\n        same observation space. Otherwise False.\n        \"\"\"\n        return True\n\n    def sample_scene(self, force_advance_scene: bool):\n        if force_advance_scene:\n            if self.scene_period != \"manual\":\n                get_logger().warning(\n                    \"When sampling scene, have `force_advance_scene == True`\"\n                    \"but `self.scene_period` is not equal to 'manual',\"\n                    \"this may cause unexpected behavior.\"\n                )\n            self.scene_id = (1 + self.scene_id) % len(self.scenes)\n            if self.scene_id == 0:\n                random.shuffle(self.scene_order)\n\n        if self.scene_period is None:\n            # Random scene\n            self.scene_id = random.randint(0, len(self.scenes) - 1)\n        elif self.scene_period == \"manual\":\n            pass\n        elif self.scene_counter >= cast(int, self.scene_period):\n            if self.scene_id == len(self.scene_order) - 1:\n                # Randomize scene order for next iteration\n                random.shuffle(self.scene_order)\n                # Move to next scene\n                self.scene_id = 0\n            else:\n                # Move to next scene\n                self.scene_id += 1\n            # Reset scene counter\n            self.scene_counter = 1\n        elif isinstance(self.scene_period, int):\n            # Stay in current scene\n            self.scene_counter += 1\n        else:\n            raise NotImplementedError(\n                \"Invalid scene_period {}\".format(self.scene_period)\n            )\n\n        if self.max_tasks is not None:\n            self.max_tasks -= 1\n\n        return self.scenes[int(self.scene_order[self.scene_id])]\n\n    # def sample_episode(self, scene):\n    #     self.scene_counters[scene] = (self.scene_counters[scene] + 1) % len(self.scene_to_episodes[scene])\n    #     if self.scene_counters[scene] == 0:\n    #         random.shuffle(self.scene_to_episodes[scene])\n    #     return self.scene_to_episodes[scene][self.scene_counters[scene]]\n\n    def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            # get_logger().debug(\"max_tasks {}\".format(self.max_tasks))\n            return None\n\n        if not self.scenes_is_dataset:\n            scene = self.sample_scene(force_advance_scene)\n\n            if self.env is not None:\n                if scene.replace(\"_physics\", \"\") != self.env.scene_name.replace(\n                    \"_physics\", \"\"\n                ):\n                    self.env.reset(scene)\n            else:\n                self.env = self._create_environment()\n                self.env.reset(scene_name=scene)\n\n            pose = self.env.randomize_agent_location()\n\n            object_types_in_scene = set(\n                [o[\"objectType\"] for o in self.env.last_event.metadata[\"objects\"]]\n            )\n\n            task_info = {\"scene\": scene}\n            for ot in random.sample(self.object_types, len(self.object_types)):\n                if ot in object_types_in_scene:\n                    task_info[\"object_type\"] = ot\n                    break\n\n            if len(task_info) == 0:\n                get_logger().warning(\n                    \"Scene {} does not contain any\"\n                    \" objects of any of the types {}.\".format(scene, self.object_types)\n                )\n\n            task_info[\"initial_position\"] = {k: pose[k] for k in [\"x\", \"y\", \"z\"]}\n            task_info[\"initial_orientation\"] = cast(Dict[str, float], pose[\"rotation\"])[\n                \"y\"\n            ]\n        else:\n            assert self.max_tasks is not None\n            next_task_id = self.dataset_first + self.max_tasks - 1\n            # get_logger().debug(\"task {}\".format(next_task_id))\n            assert (\n                self.dataset_first <= next_task_id <= self.dataset_last\n            ), \"wrong task_id {} for min {} max {}\".format(\n                next_task_id, self.dataset_first, self.dataset_last\n            )\n            task_info = copy.deepcopy(self.dataset_episodes[next_task_id])\n\n            scene = task_info[\"scene\"]\n            if self.env is not None:\n                if scene.replace(\"_physics\", \"\") != self.env.scene_name.replace(\n                    \"_physics\", \"\"\n                ):\n                    self.env.reset(scene_name=scene)\n            else:\n                self.env = self._create_environment()\n                self.env.reset(scene_name=scene)\n\n            self.env.step(\n                {\n                    \"action\": \"TeleportFull\",\n                    **{k: float(v) for k, v in task_info[\"initial_position\"].items()},\n                    \"rotation\": {\n                        \"x\": 0.0,\n                        \"y\": float(task_info[\"initial_orientation\"]),\n                        \"z\": 0.0,\n                    },\n                    \"horizon\": 0.0,\n                    \"standing\": True,\n                }\n            )\n            assert self.env.last_action_success, \"Failed to reset agent for {}\".format(\n                task_info\n            )\n\n            self.max_tasks -= 1\n\n        # task_info[\"actions\"] = []  # TODO populated by Task(Generic[EnvType]).step(...) but unused\n\n        if self.allow_flipping and random.random() > 0.5:\n            task_info[\"mirrored\"] = True\n        else:\n            task_info[\"mirrored\"] = False\n\n        self._last_sampled_task = ObjectNavTask(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n            reward_configs=self.rewards_config,\n        )\n        return self._last_sampled_task\n\n    def reset(self):\n        if not self.scenes_is_dataset:\n            self.scene_counter = 0\n            self.scene_order = list(range(len(self.scenes)))\n            random.shuffle(self.scene_order)\n            self.scene_id = 0\n        self.max_tasks = self.reset_tasks\n\n    def set_seed(self, seed: int):\n        self.seed = seed\n        if seed is not None:\n            set_seed(seed)\n\n\nclass ObjectNavDatasetTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        scenes: List[str],\n        scene_directory: str,\n        sensors: List[Sensor],\n        max_steps: int,\n        env_args: Dict[str, Any],\n        action_space: gym.Space,\n        rewards_config: Dict,\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        loop_dataset: bool = True,\n        allow_flipping=False,\n        env_class=RoboThorEnvironment,\n        randomize_materials_in_training: bool = False,\n        **kwargs,\n    ) -> None:\n        self.rewards_config = rewards_config\n        self.env_args = env_args\n        self.scenes = scenes\n        self.episodes = {\n            scene: ObjectNavDatasetTaskSampler.load_dataset(\n                scene, scene_directory + \"/episodes\"\n            )\n            for scene in scenes\n        }\n\n        # Only keep episodes containing desired objects\n        if \"object_types\" in kwargs:\n            self.episodes = {\n                scene: [\n                    ep for ep in episodes if ep[\"object_type\"] in kwargs[\"object_types\"]\n                ]\n                for scene, episodes in self.episodes.items()\n            }\n            self.episodes = {\n                scene: episodes\n                for scene, episodes in self.episodes.items()\n                if len(episodes) > 0\n            }\n            self.scenes = [scene for scene in self.scenes if scene in self.episodes]\n\n        self.env_class = env_class\n        self.object_types = [\n            ep[\"object_type\"] for scene in self.episodes for ep in self.episodes[scene]\n        ]\n        self.env: Optional[RoboThorEnvironment] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_space = action_space\n        self.allow_flipping = allow_flipping\n        self.scene_counter: Optional[int] = None\n        self.scene_order: Optional[List[str]] = None\n        self.scene_id: Optional[int] = None\n        # get the total number of tasks assigned to this process\n        if loop_dataset:\n            self.max_tasks = None\n        else:\n            self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes)\n        self.reset_tasks = self.max_tasks\n        self.scene_index = 0\n        self.episode_index = 0\n        self.randomize_materials_in_training = randomize_materials_in_training\n\n        self._last_sampled_task: Optional[ObjectNavTask] = None\n\n        self.seed: Optional[int] = None\n        self.set_seed(seed)\n\n        if deterministic_cudnn:\n            set_deterministic_cudnn()\n\n        self.reset()\n\n    def _create_environment(self) -> RoboThorEnvironment:\n        env = self.env_class(**self.env_args)\n        return env\n\n    @staticmethod\n    def load_dataset(scene: str, base_directory: str) -> List[Dict]:\n        filename = (\n            \"/\".join([base_directory, scene])\n            if base_directory[-1] != \"/\"\n            else \"\".join([base_directory, scene])\n        )\n        filename += \".json.gz\"\n        fin = gzip.GzipFile(filename, \"r\")\n        json_bytes = fin.read()\n        fin.close()\n        json_str = json_bytes.decode(\"utf-8\")\n        data = json.loads(json_str)\n        random.shuffle(data)\n        return data\n\n    @staticmethod\n    def load_distance_cache_from_file(scene: str, base_directory: str) -> Dict:\n        filename = (\n            \"/\".join([base_directory, scene])\n            if base_directory[-1] != \"/\"\n            else \"\".join([base_directory, scene])\n        )\n        filename += \".json.gz\"\n        fin = gzip.GzipFile(filename, \"r\")\n        json_bytes = fin.read()\n        fin.close()\n        json_str = json_bytes.decode(\"utf-8\")\n        data = json.loads(json_str)\n        return data\n\n    @property\n    def __len__(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled. Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        return self.reset_tasks\n\n    @property\n    def last_sampled_task(self) -> Optional[ObjectNavTask]:\n        return self._last_sampled_task\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"Check if observation spaces equal.\n\n        # Returns\n\n        True if all Tasks that can be sampled by this sampler have the\n            same observation space. Otherwise False.\n        \"\"\"\n        return True\n\n    @property\n    def length(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled. Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n    def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            return None\n\n        if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]):\n            self.scene_index = (self.scene_index + 1) % len(self.scenes)\n            # shuffle the new list of episodes to train on\n            random.shuffle(self.episodes[self.scenes[self.scene_index]])\n            self.episode_index = 0\n        scene = self.scenes[self.scene_index]\n        episode = self.episodes[scene][self.episode_index]\n        if self.env is None:\n            self.env = self._create_environment()\n\n        if scene.replace(\"_physics\", \"\") != self.env.scene_name.replace(\"_physics\", \"\"):\n            self.env.reset(scene_name=scene)\n        else:\n            self.env.reset_object_filter()\n\n        self.env.set_object_filter(\n            object_ids=[\n                o[\"objectId\"]\n                for o in self.env.last_event.metadata[\"objects\"]\n                if o[\"objectType\"] == episode[\"object_type\"]\n            ]\n        )\n\n        # only randomize materials in train scenes\n        were_materials_randomized = False\n        if self.randomize_materials_in_training:\n            if (\n                \"Train\" in scene\n                or int(scene.replace(\"FloorPlan\", \"\").replace(\"_physics\", \"\")) % 100\n                < 21\n            ):\n                were_materials_randomized = True\n                self.env.controller.step(action=\"RandomizeMaterials\")\n\n        task_info = {\n            \"scene\": scene,\n            \"object_type\": episode[\"object_type\"],\n            \"materials_randomized\": were_materials_randomized,\n        }\n        if len(task_info) == 0:\n            get_logger().warning(\n                \"Scene {} does not contain any\"\n                \" objects of any of the types {}.\".format(scene, self.object_types)\n            )\n        task_info[\"initial_position\"] = episode[\"initial_position\"]\n        task_info[\"initial_orientation\"] = episode[\"initial_orientation\"]\n        task_info[\"initial_horizon\"] = episode.get(\"initial_horizon\", 0)\n        task_info[\"distance_to_target\"] = episode.get(\"shortest_path_length\")\n        task_info[\"path_to_target\"] = episode.get(\"shortest_path\")\n        task_info[\"object_type\"] = episode[\"object_type\"]\n        task_info[\"id\"] = episode[\"id\"]\n        if self.allow_flipping and random.random() > 0.5:\n            task_info[\"mirrored\"] = True\n        else:\n            task_info[\"mirrored\"] = False\n\n        self.episode_index += 1\n        if self.max_tasks is not None:\n            self.max_tasks -= 1\n        if not self.env.teleport(\n            pose=episode[\"initial_position\"],\n            rotation=episode[\"initial_orientation\"],\n            horizon=episode.get(\"initial_horizon\", 0),\n        ):\n            return self.next_task()\n        self._last_sampled_task = ObjectNavTask(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n            reward_configs=self.rewards_config,\n        )\n        return self._last_sampled_task\n\n    def reset(self):\n        self.episode_index = 0\n        self.scene_index = 0\n        self.max_tasks = self.reset_tasks\n\n    def set_seed(self, seed: int):\n        self.seed = seed\n        if seed is not None:\n            set_seed(seed)\n\n\nclass PointNavTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        scenes: List[str],\n        # object_types: List[str],\n        # scene_to_episodes: List[Dict[str, Any]],\n        sensors: List[Sensor],\n        max_steps: int,\n        env_args: Dict[str, Any],\n        action_space: gym.Space,\n        rewards_config: Dict,\n        scene_period: Optional[Union[int, str]] = None,\n        max_tasks: Optional[int] = None,\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        **kwargs,\n    ) -> None:\n        self.rewards_config = rewards_config\n        self.env_args = env_args\n        self.scenes = scenes\n        # self.object_types = object_types\n        # self.scene_to_episodes = scene_to_episodes\n        # self.scene_counters = {scene: -1 for scene in self.scene_to_episodes}\n        # self.scenes = list(self.scene_to_episodes.keys())\n        self.env: Optional[RoboThorEnvironment] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_space = action_space\n\n        self.scene_counter: Optional[int] = None\n        self.scene_order: Optional[List[str]] = None\n        self.scene_id: Optional[int] = None\n        self.scene_period: Optional[Union[str, int]] = (\n            scene_period  # default makes a random choice\n        )\n        self.max_tasks: Optional[int] = None\n        self.reset_tasks = max_tasks\n\n        self._last_sampled_task: Optional[PointNavTask] = None\n\n        self.seed: Optional[int] = None\n        self.set_seed(seed)\n\n        if deterministic_cudnn:\n            set_deterministic_cudnn()\n\n        self.reset()\n\n    def _create_environment(self) -> RoboThorEnvironment:\n        env = RoboThorEnvironment(**self.env_args)\n        return env\n\n    @property\n    def length(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled.\n        Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        # total = 0\n        # for scene in self.scene_to_episodes:\n        #     total += len(self.scene_to_episodes[scene])\n        # return total\n        return self.reset_tasks\n\n    @property\n    def last_sampled_task(self) -> Optional[PointNavTask]:\n        return self._last_sampled_task\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"Check if observation spaces equal.\n\n        # Returns\n\n        True if all Tasks that can be sampled by this sampler\n        have the     same observation space. Otherwise False.\n        \"\"\"\n        return True\n\n    def sample_scene(self, force_advance_scene: bool):\n        if force_advance_scene:\n            if self.scene_period != \"manual\":\n                get_logger().warning(\n                    \"When sampling scene, have `force_advance_scene == True`\"\n                    \"but `self.scene_period` is not equal to 'manual',\"\n                    \"this may cause unexpected behavior.\"\n                )\n            self.scene_id = (1 + self.scene_id) % len(self.scenes)\n            if self.scene_id == 0:\n                random.shuffle(self.scene_order)\n\n        if self.scene_period is None:\n            # Random scene\n            self.scene_id = random.randint(0, len(self.scenes) - 1)\n        elif self.scene_period == \"manual\":\n            pass\n        elif self.scene_counter >= cast(int, self.scene_period):\n            if self.scene_id == len(self.scene_order) - 1:\n                # Randomize scene order for next iteration\n                random.shuffle(self.scene_order)\n                # Move to next scene\n                self.scene_id = 0\n            else:\n                # Move to next scene\n                self.scene_id += 1\n            # Reset scene counter\n            self.scene_counter = 1\n        elif isinstance(self.scene_period, int):\n            # Stay in current scene\n            self.scene_counter += 1\n        else:\n            raise NotImplementedError(\n                \"Invalid scene_period {}\".format(self.scene_period)\n            )\n\n        if self.max_tasks is not None:\n            self.max_tasks -= 1\n\n        return self.scenes[int(self.scene_order[self.scene_id])]\n\n    # def sample_episode(self, scene):\n    #     self.scene_counters[scene] = (self.scene_counters[scene] + 1) % len(self.scene_to_episodes[scene])\n    #     if self.scene_counters[scene] == 0:\n    #         random.shuffle(self.scene_to_episodes[scene])\n    #     return self.scene_to_episodes[scene][self.scene_counters[scene]]\n\n    def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            return None\n\n        scene = self.sample_scene(force_advance_scene)\n\n        if self.env is not None:\n            if scene.replace(\"_physics\", \"\") != self.env.scene_name.replace(\n                \"_physics\", \"\"\n            ):\n                self.env.reset(scene_name=scene)\n        else:\n            self.env = self._create_environment()\n            self.env.reset(scene_name=scene)\n\n        # task_info = copy.deepcopy(self.sample_episode(scene))\n        # task_info['target'] = task_info['target_position']\n        # task_info['actions'] = []\n\n        locs = self.env.known_good_locations_list()\n        # get_logger().debug(\"locs[0] {} locs[-1] {}\".format(locs[0], locs[-1]))\n\n        ys = [loc[\"y\"] for loc in locs]\n        miny = min(ys)\n        maxy = max(ys)\n        assert maxy - miny < 1e-6, \"miny {} maxy {} for scene {}\".format(\n            miny, maxy, scene\n        )\n\n        too_close_to_target = True\n        target: Optional[Dict[str, float]] = None\n        for _ in range(10):\n            self.env.randomize_agent_location()\n            target = copy.copy(random.choice(locs))\n            too_close_to_target = self.env.distance_to_point(target) <= 0\n            if not too_close_to_target:\n                break\n\n        pose = self.env.agent_state()\n\n        task_info = {\n            \"scene\": scene,\n            \"initial_position\": {k: pose[k] for k in [\"x\", \"y\", \"z\"]},\n            \"initial_orientation\": pose[\"rotation\"][\"y\"],\n            \"target\": target,\n            \"actions\": [],\n        }\n\n        if too_close_to_target:\n            get_logger().warning(\"No path for sampled episode {}\".format(task_info))\n        # else:\n        #     get_logger().debug(\"Path found for sampled episode {}\".format(task_info))\n\n        # pose = {**task_info['initial_position'], 'rotation': {'x': 0.0, 'y': task_info['initial_orientation'], 'z': 0.0}, 'horizon': 0.0}\n        # self.env.step({\"action\": \"TeleportFull\", **pose})\n        # assert self.env.last_action_success, \"Failed to initialize agent to {} in {} for epsiode {}\".format(pose, scene, task_info)\n\n        self._last_sampled_task = PointNavTask(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n            reward_configs=self.rewards_config,\n        )\n        return self._last_sampled_task\n\n    def reset(self):\n        self.scene_counter = 0\n        self.scene_order = list(range(len(self.scenes)))\n        random.shuffle(self.scene_order)\n        self.scene_id = 0\n        self.max_tasks = self.reset_tasks\n\n        # for scene in self.scene_to_episodes:\n        #     random.shuffle(self.scene_to_episodes[scene])\n        # for scene in self.scene_counters:\n        #     self.scene_counters[scene] = -1\n\n    def set_seed(self, seed: int):\n        self.seed = seed\n        if seed is not None:\n            set_seed(seed)\n\n\nclass PointNavDatasetTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        scenes: List[str],\n        scene_directory: str,\n        sensors: List[Sensor],\n        max_steps: int,\n        env_args: Dict[str, Any],\n        action_space: gym.Space,\n        rewards_config: Dict,\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        loop_dataset: bool = True,\n        shuffle_dataset: bool = True,\n        allow_flipping=False,\n        env_class=RoboThorEnvironment,\n        **kwargs,\n    ) -> None:\n        self.rewards_config = rewards_config\n        self.env_args = env_args\n        self.scenes = scenes\n        self.shuffle_dataset: bool = shuffle_dataset\n        self.episodes = {\n            scene: ObjectNavDatasetTaskSampler.load_dataset(\n                scene, scene_directory + \"/episodes\"\n            )\n            for scene in scenes\n        }\n        self.env_class = env_class\n        self.env: Optional[RoboThorEnvironment] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_space = action_space\n        self.allow_flipping = allow_flipping\n        self.scene_counter: Optional[int] = None\n        self.scene_order: Optional[List[str]] = None\n        self.scene_id: Optional[int] = None\n        # get the total number of tasks assigned to this process\n        if loop_dataset:\n            self.max_tasks = None\n        else:\n            self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes)\n        self.reset_tasks = self.max_tasks\n        self.scene_index = 0\n        self.episode_index = 0\n\n        self._last_sampled_task: Optional[PointNavTask] = None\n\n        self.seed: Optional[int] = None\n        self.set_seed(seed)\n\n        if deterministic_cudnn:\n            set_deterministic_cudnn()\n\n        self.reset()\n\n    def _create_environment(self) -> RoboThorEnvironment:\n        env = self.env_class(**self.env_args)\n        return env\n\n    @property\n    def __len__(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled. Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        return self.reset_tasks\n\n    @property\n    def last_sampled_task(self) -> Optional[PointNavTask]:\n        return self._last_sampled_task\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"Check if observation spaces equal.\n\n        # Returns\n\n        True if all Tasks that can be sampled by this sampler have the\n            same observation space. Otherwise False.\n        \"\"\"\n        return True\n\n    def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            return None\n\n        if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]):\n            self.scene_index = (self.scene_index + 1) % len(self.scenes)\n            # shuffle the new list of episodes to train on\n            if self.shuffle_dataset:\n                random.shuffle(self.episodes[self.scenes[self.scene_index]])\n            self.episode_index = 0\n\n        scene = self.scenes[self.scene_index]\n        episode = self.episodes[scene][self.episode_index]\n        if self.env is not None:\n            if scene.replace(\"_physics\", \"\") != self.env.scene_name.replace(\n                \"_physics\", \"\"\n            ):\n                self.env.reset(scene_name=scene, filtered_objects=[])\n        else:\n            self.env = self._create_environment()\n            self.env.reset(scene_name=scene, filtered_objects=[])\n\n        def to_pos(s):\n            if isinstance(s, (Dict, Tuple)):\n                return s\n            if isinstance(s, float):\n                return {\"x\": 0, \"y\": s, \"z\": 0}\n            return str_to_pos_for_cache(s)\n\n        for k in [\"initial_position\", \"initial_orientation\", \"target_position\"]:\n            episode[k] = to_pos(episode[k])\n\n        task_info = {\n            \"scene\": scene,\n            \"initial_position\": episode[\"initial_position\"],\n            \"initial_orientation\": episode[\"initial_orientation\"],\n            \"target\": episode[\"target_position\"],\n            \"shortest_path\": episode[\"shortest_path\"],\n            \"distance_to_target\": episode[\"shortest_path_length\"],\n            \"id\": episode[\"id\"],\n        }\n\n        if self.allow_flipping and random.random() > 0.5:\n            task_info[\"mirrored\"] = True\n        else:\n            task_info[\"mirrored\"] = False\n\n        self.episode_index += 1\n        if self.max_tasks is not None:\n            self.max_tasks -= 1\n\n        if not self.env.teleport(\n            pose=episode[\"initial_position\"], rotation=episode[\"initial_orientation\"]\n        ):\n            return self.next_task()\n\n        self._last_sampled_task = PointNavTask(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n            reward_configs=self.rewards_config,\n        )\n\n        return self._last_sampled_task\n\n    def reset(self):\n        self.episode_index = 0\n        self.scene_index = 0\n        self.max_tasks = self.reset_tasks\n\n    def set_seed(self, seed: int):\n        self.seed = seed\n        if seed is not None:\n            set_seed(seed)\n\n    @property\n    def length(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled.\n        Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n\nclass NavToPartnerTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        scenes: List[str],\n        sensors: List[Sensor],\n        max_steps: int,\n        env_args: Dict[str, Any],\n        action_space: gym.Space,\n        rewards_config: Dict,\n        scene_period: Optional[Union[int, str]] = None,\n        max_tasks: Optional[int] = None,\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        **kwargs,\n    ) -> None:\n        self.rewards_config = rewards_config\n        self.env_args = env_args\n        self.scenes = scenes\n        self.env: Optional[RoboThorEnvironment] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_space = action_space\n\n        self.scene_counter: Optional[int] = None\n        self.scene_order: Optional[List[str]] = None\n        self.scene_id: Optional[int] = None\n        self.scene_period: Optional[Union[str, int]] = (\n            scene_period  # default makes a random choice\n        )\n        self.max_tasks: Optional[int] = None\n        self.reset_tasks = max_tasks\n\n        self._last_sampled_task: Optional[NavToPartnerTask] = None\n\n        self.seed: Optional[int] = None\n        self.set_seed(seed)\n\n        if deterministic_cudnn:\n            set_deterministic_cudnn()\n\n        self.reset()\n\n    def _create_environment(self) -> RoboThorEnvironment:\n        assert (\n            self.env_args[\"agentCount\"] == 2\n        ), \"NavToPartner is only defined for 2 agents!\"\n        env = RoboThorEnvironment(**self.env_args)\n        return env\n\n    @property\n    def length(self) -> Union[int, float]:\n        \"\"\"Length.\n\n        # Returns\n\n        Number of total tasks remaining that can be sampled.\n        Can be float('inf').\n        \"\"\"\n        return float(\"inf\") if self.max_tasks is None else self.max_tasks\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        return self.reset_tasks\n\n    @property\n    def last_sampled_task(self) -> Optional[NavToPartnerTask]:\n        return self._last_sampled_task\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        \"\"\"Check if observation spaces equal.\n\n        # Returns\n\n        True if all Tasks that can be sampled by this sampler\n        have the     same observation space. Otherwise False.\n        \"\"\"\n        return True\n\n    def sample_scene(self, force_advance_scene: bool):\n        if force_advance_scene:\n            if self.scene_period != \"manual\":\n                get_logger().warning(\n                    \"When sampling scene, have `force_advance_scene == True`\"\n                    \"but `self.scene_period` is not equal to 'manual',\"\n                    \"this may cause unexpected behavior.\"\n                )\n            self.scene_id = (1 + self.scene_id) % len(self.scenes)\n            if self.scene_id == 0:\n                random.shuffle(self.scene_order)\n\n        if self.scene_period is None:\n            # Random scene\n            self.scene_id = random.randint(0, len(self.scenes) - 1)\n        elif self.scene_period == \"manual\":\n            pass\n        elif self.scene_counter >= cast(int, self.scene_period):\n            if self.scene_id == len(self.scene_order) - 1:\n                # Randomize scene order for next iteration\n                random.shuffle(self.scene_order)\n                # Move to next scene\n                self.scene_id = 0\n            else:\n                # Move to next scene\n                self.scene_id += 1\n            # Reset scene counter\n            self.scene_counter = 1\n        elif isinstance(self.scene_period, int):\n            # Stay in current scene\n            self.scene_counter += 1\n        else:\n            raise NotImplementedError(\n                \"Invalid scene_period {}\".format(self.scene_period)\n            )\n\n        if self.max_tasks is not None:\n            self.max_tasks -= 1\n\n        return self.scenes[int(self.scene_order[self.scene_id])]\n\n    def next_task(\n        self, force_advance_scene: bool = False\n    ) -> Optional[NavToPartnerTask]:\n        if self.max_tasks is not None and self.max_tasks <= 0:\n            return None\n\n        scene = self.sample_scene(force_advance_scene)\n\n        if self.env is not None:\n            if scene.replace(\"_physics\", \"\") != self.env.scene_name.replace(\n                \"_physics\", \"\"\n            ):\n                self.env.reset(scene_name=scene)\n        else:\n            self.env = self._create_environment()\n            self.env.reset(scene_name=scene)\n\n        too_close_to_target = True\n        for _ in range(10):\n            self.env.randomize_agent_location(agent_id=0)\n            self.env.randomize_agent_location(agent_id=1)\n\n            pose1 = self.env.agent_state(0)\n            pose2 = self.env.agent_state(1)\n            dist = self.env.distance_cache.find_distance(\n                self.env.scene_name,\n                {k: pose1[k] for k in [\"x\", \"y\", \"z\"]},\n                {k: pose2[k] for k in [\"x\", \"y\", \"z\"]},\n                self.env.distance_from_point_to_point,\n            )\n\n            too_close_to_target = (\n                dist <= 1.25 * self.rewards_config[\"max_success_distance\"]\n            )\n            if not too_close_to_target:\n                break\n\n        task_info = {\n            \"scene\": scene,\n            \"initial_position1\": {k: pose1[k] for k in [\"x\", \"y\", \"z\"]},\n            \"initial_position2\": {k: pose2[k] for k in [\"x\", \"y\", \"z\"]},\n            \"initial_orientation1\": pose1[\"rotation\"][\"y\"],\n            \"initial_orientation2\": pose2[\"rotation\"][\"y\"],\n            \"id\": \"_\".join(\n                [scene]\n                # + [\"%4.2f\" % pose1[k] for k in [\"x\", \"y\", \"z\"]]\n                # + [\"%4.2f\" % pose1[\"rotation\"][\"y\"]]\n                # + [\"%4.2f\" % pose2[k] for k in [\"x\", \"y\", \"z\"]]\n                # + [\"%4.2f\" % pose2[\"rotation\"][\"y\"]]\n                + [\"%d\" % random.randint(0, 2**63 - 1)]\n            ),\n        }\n\n        if too_close_to_target:\n            get_logger().warning(\"Bad sampled episode {}\".format(task_info))\n\n        self._last_sampled_task = NavToPartnerTask(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_space,\n            reward_configs=self.rewards_config,\n        )\n        return self._last_sampled_task\n\n    def reset(self):\n        self.scene_counter = 0\n        self.scene_order = list(range(len(self.scenes)))\n        random.shuffle(self.scene_order)\n        self.scene_id = 0\n        self.max_tasks = self.reset_tasks\n\n    def set_seed(self, seed: int):\n        self.seed = seed\n        if seed is not None:\n            set_seed(seed)\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/robothor_tasks.py",
    "content": "import math\nfrom typing import Tuple, List, Dict, Any, Optional, Union, Sequence, cast\n\nimport gym\nimport numpy as np\n\nfrom allenact.base_abstractions.misc import RLStepResult\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact.utils.system import get_logger\nfrom allenact.utils.tensor_utils import tile_images\nfrom allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment\nfrom allenact_plugins.robothor_plugin.robothor_constants import (\n    MOVE_AHEAD,\n    ROTATE_LEFT,\n    ROTATE_RIGHT,\n    END,\n    LOOK_UP,\n    LOOK_DOWN,\n)\nfrom allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment\n\n\ndef spl_metric(\n    success: bool, optimal_distance: float, travelled_distance: float\n) -> Optional[float]:\n    if not success:\n        return 0.0\n    elif optimal_distance < 0:\n        return None\n    elif optimal_distance == 0:\n        if travelled_distance == 0:\n            return 1.0\n        else:\n            return 0.0\n    else:\n        travelled_distance = max(travelled_distance, optimal_distance)\n        return optimal_distance / travelled_distance\n\n\nclass PointNavTask(Task[RoboThorEnvironment]):\n    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END)\n\n    def __init__(\n        self,\n        env: RoboThorEnvironment,\n        sensors: List[Sensor],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        reward_configs: Dict[str, Any],\n        **kwargs,\n    ) -> None:\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n        self.reward_configs = reward_configs\n        self._took_end_action: bool = False\n        self._success: Optional[bool] = False\n        self.last_geodesic_distance = self.env.distance_to_point(\n            self.task_info[\"target\"]\n        )\n\n        self.optimal_distance = self.last_geodesic_distance\n        self._rewards: List[float] = []\n        self._distance_to_goal: List[float] = []\n        self._metrics = None\n        self.path: List[Any] = (\n            []\n        )  # the initial coordinate will be directly taken from the optimal path\n        self.travelled_distance = 0.0\n\n        self.task_info[\"followed_path\"] = [self.env.agent_state()]\n        self.task_info[\"action_names\"] = self.action_names()\n\n    @property\n    def action_space(self):\n        return gym.spaces.Discrete(len(self._actions))\n\n    def reached_terminal_state(self) -> bool:\n        return self._took_end_action\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return cls._actions\n\n    def close(self) -> None:\n        self.env.stop()\n\n    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        action_str = self.class_action_names()[action]\n\n        if action_str == END:\n            self._took_end_action = True\n            self._success = self._is_goal_in_range()\n            self.last_action_success = self._success\n        else:\n            self.env.step({\"action\": action_str})\n            self.last_action_success = self.env.last_action_success\n            pose = self.env.agent_state()\n            self.path.append({k: pose[k] for k in [\"x\", \"y\", \"z\"]})\n            self.task_info[\"followed_path\"].append(pose)\n        if len(self.path) > 1:\n            self.travelled_distance += IThorEnvironment.position_dist(\n                p0=self.path[-1], p1=self.path[-2], ignore_y=True\n            )\n        step_result = RLStepResult(\n            observation=self.get_observations(),\n            reward=self.judge(),\n            done=self.is_done(),\n            info={\"last_action_success\": self.last_action_success, \"action\": action},\n        )\n        return step_result\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        assert mode in [\"rgb\", \"depth\"], \"only rgb and depth rendering is implemented\"\n        if mode == \"rgb\":\n            return self.env.current_frame\n        elif mode == \"depth\":\n            return self.env.current_depth\n\n    def _is_goal_in_range(self) -> Optional[bool]:\n        tget = self.task_info[\"target\"]\n        dist = self.dist_to_target()\n\n        if -0.5 < dist <= 0.2:\n            return True\n        elif dist > 0.2:\n            return False\n        else:\n            get_logger().debug(\n                \"No path for {} from {} to {}\".format(\n                    self.env.scene_name, self.env.agent_state(), tget\n                )\n            )\n            return None\n\n    def shaping(self) -> float:\n        rew = 0.0\n\n        if self.reward_configs[\"shaping_weight\"] == 0.0:\n            return rew\n\n        geodesic_distance = self.dist_to_target()\n\n        if geodesic_distance == -1.0:\n            geodesic_distance = self.last_geodesic_distance\n        if (\n            self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5\n        ):  # (robothor limits)\n            rew += self.last_geodesic_distance - geodesic_distance\n        self.last_geodesic_distance = geodesic_distance\n\n        return rew * self.reward_configs[\"shaping_weight\"]\n\n    def judge(self) -> float:\n        \"\"\"Judge the last event.\"\"\"\n        reward = self.reward_configs[\"step_penalty\"]\n\n        reward += self.shaping()\n\n        if self._took_end_action:\n            if self._success is not None:\n                reward += (\n                    self.reward_configs[\"goal_success_reward\"]\n                    if self._success\n                    else self.reward_configs[\"failed_stop_reward\"]\n                )\n        elif self.num_steps_taken() + 1 >= self.max_steps:\n            reward += self.reward_configs.get(\"reached_max_steps_reward\", 0.0)\n\n        self._rewards.append(float(reward))\n        return float(reward)\n\n    def dist_to_target(self):\n        return self.env.distance_to_point(self.task_info[\"target\"])\n\n    def metrics(self) -> Dict[str, Any]:\n        if not self.is_done():\n            return {}\n\n        total_reward = float(np.sum(self._rewards))\n        self._rewards = []\n\n        if self._success is None:\n            return {}\n\n        dist2tget = self.dist_to_target()\n        spl = spl_metric(\n            success=self._success,\n            optimal_distance=self.optimal_distance,\n            travelled_distance=self.travelled_distance,\n        )\n\n        metrics = {\n            **super(PointNavTask, self).metrics(),\n            \"success\": self._success,  # False also if no path to target\n            \"total_reward\": total_reward,\n            \"dist_to_target\": dist2tget,\n            \"spl\": 0 if spl is None else spl,\n        }\n        return metrics\n\n\nclass ObjectNavTask(Task[RoboThorEnvironment]):\n    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN)\n\n    def __init__(\n        self,\n        env: RoboThorEnvironment,\n        sensors: List[Sensor],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        reward_configs: Dict[str, Any],\n        **kwargs,\n    ) -> None:\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n        self.reward_configs = reward_configs\n        self._took_end_action: bool = False\n        self._success: Optional[bool] = False\n        self.mirror = task_info[\"mirrored\"]\n\n        self._all_metadata_available = env.all_metadata_available\n\n        self._rewards: List[float] = []\n        self._distance_to_goal: List[float] = []\n        self._metrics = None\n        self.path: List = (\n            []\n        )  # the initial coordinate will be directly taken from the optimal path\n        self.travelled_distance = 0.0\n\n        self.task_info[\"followed_path\"] = [self.env.agent_state()]\n        self.task_info[\"taken_actions\"] = []\n        self.task_info[\"action_names\"] = self.class_action_names()\n\n        if self._all_metadata_available:\n            self.last_geodesic_distance = self.env.distance_to_object_type(\n                self.task_info[\"object_type\"]\n            )\n            self.optimal_distance = self.last_geodesic_distance\n            self.closest_geo_distance = self.last_geodesic_distance\n\n        self.last_expert_action: Optional[int] = None\n        self.last_action_success = False\n\n    @property\n    def action_space(self):\n        return gym.spaces.Discrete(len(self._actions))\n\n    def reached_terminal_state(self) -> bool:\n        return self._took_end_action\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return cls._actions\n\n    def close(self) -> None:\n        self.env.stop()\n\n    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        action_str = self.class_action_names()[action]\n\n        if self.mirror:\n            if action_str == ROTATE_RIGHT:\n                action_str = ROTATE_LEFT\n            elif action_str == ROTATE_LEFT:\n                action_str = ROTATE_RIGHT\n\n        self.task_info[\"taken_actions\"].append(action_str)\n\n        if action_str == END:\n            self._took_end_action = True\n            self._success = self._is_goal_in_range()\n            self.last_action_success = self._success\n        else:\n            self.env.step({\"action\": action_str})\n            self.last_action_success = self.env.last_action_success\n            pose = self.env.agent_state()\n            self.path.append({k: pose[k] for k in [\"x\", \"y\", \"z\"]})\n            self.task_info[\"followed_path\"].append(pose)\n        if len(self.path) > 1:\n            self.travelled_distance += IThorEnvironment.position_dist(\n                p0=self.path[-1], p1=self.path[-2], ignore_y=True\n            )\n        step_result = RLStepResult(\n            observation=self.get_observations(),\n            reward=self.judge(),\n            done=self.is_done(),\n            info={\"last_action_success\": self.last_action_success, \"action\": action},\n        )\n        return step_result\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        assert mode in [\"rgb\", \"depth\"], \"only rgb and depth rendering is implemented\"\n        if mode == \"rgb\":\n            frame = self.env.current_frame.copy()\n        elif mode == \"depth\":\n            frame = self.env.current_depth.copy()\n        else:\n            raise NotImplementedError(f\"Mode '{mode}' is not supported.\")\n\n        if self.mirror:\n            frame = frame[:, ::-1, :].copy()  # horizontal flip\n            # print(\"mirrored render\")\n        return frame\n\n    def _is_goal_in_range(self) -> bool:\n        return any(\n            o[\"objectType\"] == self.task_info[\"object_type\"]\n            for o in self.env.visible_objects()\n        )\n\n    def shaping(self) -> float:\n        rew = 0.0\n\n        if self.reward_configs[\"shaping_weight\"] == 0.0:\n            return rew\n\n        geodesic_distance = self.env.distance_to_object_type(\n            self.task_info[\"object_type\"]\n        )\n\n        # Ensuring the reward magnitude is not greater than the total distance moved\n        max_reward_mag = 0.0\n        if len(self.path) >= 2:\n            p0, p1 = self.path[-2:]\n            max_reward_mag = math.sqrt(\n                (p0[\"x\"] - p1[\"x\"]) ** 2 + (p0[\"z\"] - p1[\"z\"]) ** 2\n            )\n\n        if self.reward_configs.get(\"positive_only_reward\", False):\n            if geodesic_distance > 0.5:\n                rew = max(self.closest_geo_distance - geodesic_distance, 0)\n        else:\n            if (\n                self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5\n            ):  # (robothor limits)\n                rew += self.last_geodesic_distance - geodesic_distance\n\n        self.last_geodesic_distance = geodesic_distance\n        self.closest_geo_distance = min(self.closest_geo_distance, geodesic_distance)\n\n        return (\n            max(\n                min(rew, max_reward_mag),\n                -max_reward_mag,\n            )\n            * self.reward_configs[\"shaping_weight\"]\n        )\n\n    def judge(self) -> float:\n        \"\"\"Judge the last event.\"\"\"\n        reward = self.reward_configs[\"step_penalty\"]\n\n        reward += self.shaping()\n\n        if self._took_end_action:\n            if self._success:\n                reward += self.reward_configs[\"goal_success_reward\"]\n            else:\n                reward += self.reward_configs[\"failed_stop_reward\"]\n        elif self.num_steps_taken() + 1 >= self.max_steps:\n            reward += self.reward_configs.get(\"reached_max_steps_reward\", 0.0)\n\n        self._rewards.append(float(reward))\n        return float(reward)\n\n    def get_observations(self, **kwargs) -> Any:\n        obs = self.sensor_suite.get_observations(env=self.env, task=self)\n        if self.mirror:\n            for o in obs:\n                if (\"rgb\" in o or \"depth\" in o) and isinstance(obs[o], np.ndarray):\n                    if (\n                        len(obs[o].shape) == 3\n                    ):  # heuristic to determine this is a visual sensor\n                        obs[o] = obs[o][:, ::-1, :].copy()  # horizontal flip\n                    elif len(obs[o].shape) == 2:  # perhaps only two axes for depth?\n                        obs[o] = obs[o][:, ::-1].copy()  # horizontal flip\n        return obs\n\n    def metrics(self) -> Dict[str, Any]:\n        if not self.is_done():\n            return {}\n\n        metrics = super(ObjectNavTask, self).metrics()\n        if self._all_metadata_available:\n            dist2tget = self.env.distance_to_object_type(self.task_info[\"object_type\"])\n\n            spl = spl_metric(\n                success=self._success,\n                optimal_distance=self.optimal_distance,\n                travelled_distance=self.travelled_distance,\n            )\n\n            metrics = {\n                **metrics,\n                \"success\": self._success,\n                \"total_reward\": np.sum(self._rewards),\n                \"dist_to_target\": dist2tget,\n                \"spl\": 0 if spl is None else spl,\n            }\n        return metrics\n\n    def query_expert(self, end_action_only: bool = False, **kwargs) -> Tuple[int, bool]:\n        if self._is_goal_in_range():\n            return self.class_action_names().index(END), True\n\n        if end_action_only:\n            return 0, False\n        else:\n            try:\n                self.env.step(\n                    {\n                        \"action\": \"ObjectNavExpertAction\",\n                        \"objectType\": self.task_info[\"object_type\"],\n                    }\n                )\n            except ValueError:\n                raise RuntimeError(\n                    \"Attempting to use the action `ObjectNavExpertAction` which is not supported by your version of\"\n                    \" AI2-THOR. The action `ObjectNavExpertAction` is experimental. In order\"\n                    \" to enable this action, please install the (in development) version of AI2-THOR. Through pip\"\n                    \" this can be done with the command\"\n                    \" `pip install -e git+https://github.com/allenai/ai2thor.git@7d914cec13aae62298f5a6a816adb8ac6946c61f#egg=ai2thor`.\"\n                )\n            if self.env.last_action_success:\n                expert_action: Optional[str] = self.env.last_event.metadata[\n                    \"actionReturn\"\n                ]\n                if isinstance(expert_action, str):\n                    if self.mirror:\n                        if expert_action == \"RotateLeft\":\n                            expert_action = \"RotateRight\"\n                        elif expert_action == \"RotateRight\":\n                            expert_action = \"RotateLeft\"\n\n                    return self.class_action_names().index(expert_action), True\n                else:\n                    # This should have been caught by self._is_goal_in_range()...\n                    return 0, False\n            else:\n                return 0, False\n\n\nclass NavToPartnerTask(Task[RoboThorEnvironment]):\n    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT)\n\n    def __init__(\n        self,\n        env: RoboThorEnvironment,\n        sensors: List[Sensor],\n        task_info: Dict[str, Any],\n        max_steps: int,\n        reward_configs: Dict[str, Any],\n        **kwargs,\n    ) -> None:\n        super().__init__(\n            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs\n        )\n        self.reward_configs = reward_configs\n\n        assert self.env.agent_count == 2, \"NavToPartnerTask only defined for 2 agents!\"\n\n        pose1 = self.env.agent_state(0)\n        pose2 = self.env.agent_state(1)\n        self.last_geodesic_distance = self.env.distance_cache.find_distance(\n            self.env.scene_name,\n            {k: pose1[k] for k in [\"x\", \"y\", \"z\"]},\n            {k: pose2[k] for k in [\"x\", \"y\", \"z\"]},\n            self.env.distance_from_point_to_point,\n        )\n\n        self.task_info[\"followed_path1\"] = [pose1]\n        self.task_info[\"followed_path2\"] = [pose2]\n        self.task_info[\"action_names\"] = self.class_action_names()\n\n    @property\n    def action_space(self):\n        return gym.spaces.Tuple(\n            [\n                gym.spaces.Discrete(len(self._actions)),\n                gym.spaces.Discrete(len(self._actions)),\n            ]\n        )\n\n    def reached_terminal_state(self) -> bool:\n        return (\n            self.last_geodesic_distance <= self.reward_configs[\"max_success_distance\"]\n        )\n\n    @classmethod\n    def class_action_names(cls, **kwargs) -> Tuple[str, ...]:\n        return cls._actions\n\n    def close(self) -> None:\n        self.env.stop()\n\n    def _step(self, action: Tuple[int, int]) -> RLStepResult:\n        assert isinstance(action, tuple)\n        action_str1 = self.class_action_names()[action[0]]\n        action_str2 = self.class_action_names()[action[1]]\n\n        self.env.step({\"action\": action_str1, \"agentId\": 0})\n        self.last_action_success1 = self.env.last_action_success\n        self.env.step({\"action\": action_str2, \"agentId\": 1})\n        self.last_action_success2 = self.env.last_action_success\n\n        pose1 = self.env.agent_state(0)\n        self.task_info[\"followed_path1\"].append(pose1)\n        pose2 = self.env.agent_state(1)\n        self.task_info[\"followed_path2\"].append(pose2)\n\n        self.last_geodesic_distance = self.env.distance_cache.find_distance(\n            self.env.scene_name,\n            {k: pose1[k] for k in [\"x\", \"y\", \"z\"]},\n            {k: pose2[k] for k in [\"x\", \"y\", \"z\"]},\n            self.env.distance_from_point_to_point,\n        )\n\n        step_result = RLStepResult(\n            observation=self.get_observations(),\n            reward=self.judge(),\n            done=self.is_done(),\n            info={\n                \"last_action_success\": [\n                    self.last_action_success1,\n                    self.last_action_success2,\n                ],\n                \"action\": action,\n            },\n        )\n        return step_result\n\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> np.ndarray:\n        assert mode in [\"rgb\", \"depth\"], \"only rgb and depth rendering is implemented\"\n        if mode == \"rgb\":\n            return tile_images(self.env.current_frames)\n        elif mode == \"depth\":\n            return tile_images(self.env.current_depths)\n\n    def judge(self) -> float:\n        \"\"\"Judge the last event.\"\"\"\n        reward = self.reward_configs[\"step_penalty\"]\n\n        if self.reached_terminal_state():\n            reward += self.reward_configs[\"success_reward\"]\n\n        return reward  # reward shared by both agents (no shaping)\n\n    def metrics(self) -> Dict[str, Any]:\n        if not self.is_done():\n            return {}\n\n        return {\n            **super().metrics(),\n            \"success\": self.reached_terminal_state(),\n        }\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/robothor_viz.py",
    "content": "import copy\nimport json\nimport math\nimport os\nfrom typing import Tuple, Sequence, Union, Dict, Optional, Any, cast, Generator, List\n\nimport cv2\nimport numpy as np\nfrom PIL import Image, ImageDraw\nfrom ai2thor.controller import Controller\nfrom matplotlib import pyplot as plt\nfrom matplotlib.figure import Figure\nimport colour as col\n\nfrom allenact.utils.system import get_logger\nfrom allenact.utils.viz_utils import TrajectoryViz\n\nROBOTHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR = os.path.join(\n    os.path.expanduser(\"~\"), \".allenact\", \"robothor\", \"top_down_viz_cache\"\n)\n\n\nclass ThorPositionTo2DFrameTranslator(object):\n    def __init__(\n        self,\n        frame_shape_rows_cols: Tuple[int, int],\n        cam_position: Sequence[float],\n        orth_size: float,\n    ):\n        self.frame_shape = frame_shape_rows_cols\n        self.lower_left = np.array((cam_position[0], cam_position[2])) - orth_size\n        self.span = 2 * orth_size\n\n    def __call__(self, position: Sequence[float]):\n        if len(position) == 3:\n            x, _, z = position\n        else:\n            x, z = position\n\n        camera_position = (np.array((x, z)) - self.lower_left) / self.span\n        return np.array(\n            (\n                round(self.frame_shape[0] * (1.0 - camera_position[1])),\n                round(self.frame_shape[1] * camera_position[0]),\n            ),\n            dtype=int,\n        )\n\n\nclass ThorViz(TrajectoryViz):\n    def __init__(\n        self,\n        path_to_trajectory: Sequence[str] = (\"task_info\", \"followed_path\"),\n        label: str = \"thor_trajectory\",\n        figsize: Tuple[float, float] = (8, 4),  # width, height\n        fontsize: float = 10,\n        scenes: Union[\n            Tuple[str, int, int, int, int], Sequence[Tuple[str, int, int, int, int]]\n        ] = (\"FloorPlan_Val{}_{}\", 1, 3, 1, 5),\n        viz_rows_cols: Tuple[int, int] = (448, 448),\n        single_color: bool = False,\n        view_triangle_only_on_last: bool = True,\n        disable_view_triangle: bool = False,\n        line_opacity: float = 1.0,\n        **kwargs\n    ):\n        super().__init__(\n            path_to_trajectory=path_to_trajectory,\n            label=label,\n            figsize=figsize,\n            fontsize=fontsize,\n            **kwargs\n        )\n\n        if isinstance(scenes[0], str):\n            scenes = [\n                cast(Tuple[str, int, int, int, int], scenes)\n            ]  # make it list of tuples\n        self.scenes = cast(List[Tuple[str, int, int, int, int]], scenes)\n\n        self.room_path = ROBOTHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR\n        os.makedirs(self.room_path, exist_ok=True)\n\n        self.viz_rows_cols = viz_rows_cols\n        self.single_color = single_color\n        self.view_triangle_only_on_last = view_triangle_only_on_last\n        self.disable_view_triangle = disable_view_triangle\n        self.line_opacity = line_opacity\n\n        # Only needed for rendering\n        self.map_data: Optional[Dict[str, Any]] = None\n        self.thor_top_downs: Optional[Dict[str, np.ndarray]] = None\n\n        self.controller: Optional[Controller] = None\n\n    def init_top_down_render(self):\n        self.map_data = self.get_translator()\n        self.thor_top_downs = self.make_top_down_views()\n\n        # No controller needed after this point\n        if self.controller is not None:\n            self.controller.stop()\n            self.controller = None\n\n    @staticmethod\n    def iterate_scenes(\n        all_scenes: Sequence[Tuple[str, int, int, int, int]]\n    ) -> Generator[str, None, None]:\n        for scenes in all_scenes:\n            for wall in range(scenes[1], scenes[2] + 1):\n                for furniture in range(scenes[3], scenes[4] + 1):\n                    roomname = scenes[0].format(wall, furniture)\n                    yield roomname\n\n    def cached_map_data_path(self, roomname: str) -> str:\n        return os.path.join(self.room_path, \"map_data__{}.json\".format(roomname))\n\n    def get_translator(self) -> Dict[str, Any]:\n        roomname = list(ThorViz.iterate_scenes(self.scenes))[0]\n        json_file = self.cached_map_data_path(roomname)\n        if not os.path.exists(json_file):\n            self.make_controller()\n            self.controller.reset(roomname)\n            map_data = self.get_agent_map_data()\n            get_logger().info(\"Dumping {}\".format(json_file))\n            with open(json_file, \"w\") as f:\n                json.dump(map_data, f, indent=4, sort_keys=True)\n        else:\n            with open(json_file, \"r\") as f:\n                map_data = json.load(f)\n\n        pos_translator = ThorPositionTo2DFrameTranslator(\n            self.viz_rows_cols,\n            self.position_to_tuple(map_data[\"cam_position\"]),\n            map_data[\"cam_orth_size\"],\n        )\n        map_data[\"pos_translator\"] = pos_translator\n\n        get_logger().debug(\"Using map_data {}\".format(map_data))\n        return map_data\n\n    def cached_image_path(self, roomname: str) -> str:\n        return os.path.join(\n            self.room_path, \"{}__r{}_c{}.png\".format(roomname, *self.viz_rows_cols)\n        )\n\n    def make_top_down_views(self) -> Dict[str, np.ndarray]:\n        top_downs = {}\n        for roomname in self.iterate_scenes(self.scenes):\n            fname = self.cached_image_path(roomname)\n            if not os.path.exists(fname):\n                self.make_controller()\n                self.dump_top_down_view(roomname, fname)\n            top_downs[roomname] = cv2.imread(fname)\n\n        return top_downs\n\n    def crop_viz_image(self, viz_image: np.ndarray) -> np.ndarray:\n        # Top-down view of room spans vertically near the center of the frame in RoboTHOR:\n        y_min = int(self.viz_rows_cols[0] * 0.3)\n        y_max = int(self.viz_rows_cols[0] * 0.8)\n        # But it covers approximately the entire width:\n        x_min = 0\n        x_max = self.viz_rows_cols[1]\n        cropped_viz_image = viz_image[y_min:y_max, x_min:x_max, :]\n        return cropped_viz_image\n\n    def make_controller(self):\n        if self.controller is None:\n            self.controller = Controller()\n\n            self.controller.step({\"action\": \"ChangeQuality\", \"quality\": \"Very High\"})\n            self.controller.step(\n                {\n                    \"action\": \"ChangeResolution\",\n                    \"x\": self.viz_rows_cols[1],\n                    \"y\": self.viz_rows_cols[0],\n                }\n            )\n\n    def get_agent_map_data(self):\n        self.controller.step({\"action\": \"ToggleMapView\"})\n        cam_position = self.controller.last_event.metadata[\"cameraPosition\"]\n        cam_orth_size = self.controller.last_event.metadata[\"cameraOrthSize\"]\n        to_return = {\n            \"cam_position\": cam_position,\n            \"cam_orth_size\": cam_orth_size,\n        }\n        self.controller.step({\"action\": \"ToggleMapView\"})\n        return to_return\n\n    @staticmethod\n    def position_to_tuple(position: Dict[str, float]) -> Tuple[float, float, float]:\n        return position[\"x\"], position[\"y\"], position[\"z\"]\n\n    @staticmethod\n    def add_lines_to_map(\n        ps: Sequence[Any],\n        frame: np.ndarray,\n        pos_translator: ThorPositionTo2DFrameTranslator,\n        opacity: float,\n        color: Optional[Tuple[int, ...]] = None,\n    ) -> np.ndarray:\n        if len(ps) <= 1:\n            return frame\n        if color is None:\n            color = (255, 0, 0)\n\n        img1 = Image.fromarray(frame.astype(\"uint8\"), \"RGB\").convert(\"RGBA\")\n        img2 = Image.new(\"RGBA\", frame.shape[:-1])  # Use RGBA\n\n        opacity = int(round(255 * opacity))  # Define transparency for the triangle.\n        draw = ImageDraw.Draw(img2)\n        for i in range(len(ps) - 1):\n            draw.line(\n                tuple(reversed(pos_translator(ps[i])))\n                + tuple(reversed(pos_translator(ps[i + 1]))),\n                fill=color + (opacity,),\n                width=int(frame.shape[0] / 100),\n            )\n\n        img = Image.alpha_composite(img1, img2)\n        return np.array(img.convert(\"RGB\"))\n\n    @staticmethod\n    def add_line_to_map(\n        p0: Any,\n        p1: Any,\n        frame: np.ndarray,\n        pos_translator: ThorPositionTo2DFrameTranslator,\n        opacity: float,\n        color: Optional[Tuple[int, ...]] = None,\n    ) -> np.ndarray:\n        if p0 == p1:\n            return frame\n        if color is None:\n            color = (255, 0, 0)\n\n        img1 = Image.fromarray(frame.astype(\"uint8\"), \"RGB\").convert(\"RGBA\")\n        img2 = Image.new(\"RGBA\", frame.shape[:-1])  # Use RGBA\n\n        opacity = int(round(255 * opacity))  # Define transparency for the triangle.\n        draw = ImageDraw.Draw(img2)\n        draw.line(\n            tuple(reversed(pos_translator(p0))) + tuple(reversed(pos_translator(p1))),\n            fill=color + (opacity,),\n            width=int(frame.shape[0] / 100),\n        )\n\n        img = Image.alpha_composite(img1, img2)\n        return np.array(img.convert(\"RGB\"))\n\n    @staticmethod\n    def add_agent_view_triangle(\n        position: Any,\n        rotation: Dict[str, float],\n        frame: np.ndarray,\n        pos_translator: ThorPositionTo2DFrameTranslator,\n        scale: float = 1.0,\n        opacity: float = 0.1,\n    ) -> np.ndarray:\n        p0 = np.array((position[0], position[2]))\n        p1 = copy.copy(p0)\n        p2 = copy.copy(p0)\n\n        theta = -2 * math.pi * (rotation[\"y\"] / 360.0)\n        rotation_mat = np.array(\n            [[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]]\n        )\n        offset1 = scale * np.array([-1 / 2.0, 1])\n        offset2 = scale * np.array([1 / 2.0, 1])\n\n        p1 += np.matmul(rotation_mat, offset1)\n        p2 += np.matmul(rotation_mat, offset2)\n\n        img1 = Image.fromarray(frame.astype(\"uint8\"), \"RGB\").convert(\"RGBA\")\n        img2 = Image.new(\"RGBA\", frame.shape[:-1])  # Use RGBA\n\n        opacity = int(round(255 * opacity))  # Define transparency for the triangle.\n        points = [tuple(reversed(pos_translator(p))) for p in [p0, p1, p2]]\n        draw = ImageDraw.Draw(img2)\n        draw.polygon(points, fill=(255, 255, 255, opacity))\n\n        img = Image.alpha_composite(img1, img2)\n        return np.array(img.convert(\"RGB\"))\n\n    @staticmethod\n    def visualize_agent_path(\n        positions: Sequence[Any],\n        frame: np.ndarray,\n        pos_translator: ThorPositionTo2DFrameTranslator,\n        single_color: bool = False,\n        view_triangle_only_on_last: bool = False,\n        disable_view_triangle: bool = False,\n        line_opacity: float = 1.0,\n        trajectory_start_end_color_str: Tuple[str, str] = (\"red\", \"green\"),\n    ) -> np.ndarray:\n        if single_color:\n            frame = ThorViz.add_lines_to_map(\n                list(map(ThorViz.position_to_tuple, positions)),\n                frame,\n                pos_translator,\n                line_opacity,\n                tuple(\n                    map(\n                        lambda x: int(round(255 * x)),\n                        col.Color(trajectory_start_end_color_str[0]).rgb,\n                    )\n                ),\n            )\n        else:\n            if len(positions) > 1:\n                colors = list(\n                    col.Color(trajectory_start_end_color_str[0]).range_to(\n                        col.Color(trajectory_start_end_color_str[1]), len(positions) - 1\n                    )\n                )\n            for i in range(len(positions) - 1):\n                frame = ThorViz.add_line_to_map(\n                    ThorViz.position_to_tuple(positions[i]),\n                    ThorViz.position_to_tuple(positions[i + 1]),\n                    frame,\n                    pos_translator,\n                    opacity=line_opacity,\n                    color=tuple(map(lambda x: int(round(255 * x)), colors[i].rgb)),\n                )\n\n        if view_triangle_only_on_last:\n            positions = [positions[-1]]\n        if disable_view_triangle:\n            positions = []\n        for position in positions:\n            frame = ThorViz.add_agent_view_triangle(\n                ThorViz.position_to_tuple(position),\n                rotation=position[\"rotation\"],\n                frame=frame,\n                pos_translator=pos_translator,\n                opacity=0.05 + view_triangle_only_on_last * 0.2,\n            )\n        return frame\n\n    def dump_top_down_view(self, room_name: str, image_path: str):\n        get_logger().debug(\"Dumping {}\".format(image_path))\n\n        self.controller.reset(room_name)\n        self.controller.step(\n            {\"action\": \"Initialize\", \"gridSize\": 0.1, \"makeAgentsVisible\": False}\n        )\n        self.controller.step({\"action\": \"ToggleMapView\"})\n        top_down_view = self.controller.last_event.cv2img\n\n        cv2.imwrite(image_path, top_down_view)\n\n    def make_fig(self, episode: Any, episode_id: str) -> Figure:\n        trajectory: Sequence[Dict[str, Any]] = self._access(\n            episode, self.path_to_trajectory\n        )\n\n        if self.thor_top_downs is None:\n            self.init_top_down_render()\n\n        roomname = \"_\".join(episode_id.split(\"_\")[:3])\n\n        im = self.visualize_agent_path(\n            trajectory,\n            self.thor_top_downs[roomname],\n            self.map_data[\"pos_translator\"],\n            single_color=self.single_color,\n            view_triangle_only_on_last=self.view_triangle_only_on_last,\n            disable_view_triangle=self.disable_view_triangle,\n            line_opacity=self.line_opacity,\n        )\n\n        fig, ax = plt.subplots(figsize=self.figsize)\n        ax.set_title(episode_id, fontsize=self.fontsize)\n        ax.imshow(self.crop_viz_image(im)[:, :, ::-1])\n        ax.axis(\"off\")\n\n        return fig\n\n\nclass ThorMultiViz(ThorViz):\n    def __init__(\n        self,\n        path_to_trajectory_prefix: Sequence[str] = (\"task_info\", \"followed_path\"),\n        agent_suffixes: Sequence[str] = (\"1\", \"2\"),\n        label: str = \"thor_trajectories\",\n        trajectory_start_end_color_strs: Sequence[Tuple[str, str]] = (\n            (\"red\", \"green\"),\n            (\"cyan\", \"purple\"),\n        ),\n        **kwargs\n    ):\n        super().__init__(label=label, **kwargs)\n\n        self.path_to_trajectory_prefix = list(path_to_trajectory_prefix)\n        self.agent_suffixes = list(agent_suffixes)\n        self.trajectory_start_end_color_strs = list(trajectory_start_end_color_strs)\n\n    def make_fig(self, episode: Any, episode_id: str) -> Figure:\n        if self.thor_top_downs is None:\n            self.init_top_down_render()\n\n        roomname = \"_\".join(episode_id.split(\"_\")[:3])\n        im = self.thor_top_downs[roomname]\n\n        for agent, start_end_color in zip(\n            self.agent_suffixes, self.trajectory_start_end_color_strs\n        ):\n            path = self.path_to_trajectory_prefix[:]\n            path[-1] = path[-1] + agent\n            trajectory = self._access(episode, path)\n\n            im = self.visualize_agent_path(\n                trajectory,\n                im,\n                self.map_data[\"pos_translator\"],\n                single_color=self.single_color,\n                view_triangle_only_on_last=self.view_triangle_only_on_last,\n                disable_view_triangle=self.disable_view_triangle,\n                line_opacity=self.line_opacity,\n                trajectory_start_end_color_str=start_end_color,\n            )\n\n        fig, ax = plt.subplots(figsize=self.figsize)\n        ax.set_title(episode_id, fontsize=self.fontsize)\n        ax.imshow(self.crop_viz_image(im)[:, :, ::-1])\n        ax.axis(\"off\")\n\n        return fig\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/scripts/__init__.py",
    "content": ""
  },
  {
    "path": "allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.py",
    "content": "import gzip\nimport json\nimport os\nfrom typing import Sequence, Optional\n\nfrom allenact_plugins.robothor_plugin.robothor_task_samplers import (\n    ObjectNavDatasetTaskSampler,\n)\n\n\ndef create_debug_dataset_from_train_dataset(\n    scene: str,\n    target_object_type: Optional[str],\n    episodes_subset: Sequence[int],\n    train_dataset_path: str,\n    base_debug_output_path: str,\n):\n    downloaded_episodes = os.path.join(\n        train_dataset_path, \"episodes\", scene + \".json.gz\"\n    )\n\n    assert os.path.exists(downloaded_episodes), (\n        \"'{}' doesn't seem to exist or is empty. Make sure you've downloaded to download the appropriate\"\n        \" training dataset with\"\n        \" datasets/download_navigation_datasets.sh\".format(downloaded_episodes)\n    )\n\n    # episodes\n    episodes = ObjectNavDatasetTaskSampler.load_dataset(\n        scene=scene, base_directory=os.path.join(train_dataset_path, \"episodes\")\n    )\n\n    if target_object_type is not None:\n        ids = {\n            \"{}_{}_{}\".format(scene, target_object_type, epit)\n            for epit in episodes_subset\n        }\n    else:\n        ids = {\"{}_{}\".format(scene, epit) for epit in episodes_subset}\n    debug_episodes = [ep for ep in episodes if ep[\"id\"] in ids]\n    assert len(ids) == len(debug_episodes), (\n        f\"Number of input ids ({len(ids)}) does not equal\"\n        f\" number of output debug tasks ({len(debug_episodes)})\"\n    )\n\n    # sort by episode_ids\n    debug_episodes = [\n        idep[1]\n        for idep in sorted(\n            [(int(ep[\"id\"].split(\"_\")[-1]), ep) for ep in debug_episodes],\n            key=lambda x: x[0],\n        )\n    ]\n    assert len(debug_episodes) == len(episodes_subset)\n\n    episodes_dir = os.path.join(base_debug_output_path, \"episodes\")\n    os.makedirs(episodes_dir, exist_ok=True)\n    episodes_file = os.path.join(episodes_dir, scene + \".json.gz\")\n\n    json_str = json.dumps(debug_episodes)\n    json_bytes = json_str.encode(\"utf-8\")\n    with gzip.GzipFile(episodes_file, \"w\") as fout:\n        fout.write(json_bytes)\n    assert os.path.exists(episodes_file)\n\n\nif __name__ == \"__main__\":\n    CURRENT_PATH = os.getcwd()\n    SCENE = \"FloorPlan_Train1_1\"\n    TARGET = \"Television\"\n    EPISODES = [0, 7, 11, 12]\n    BASE_OUT = os.path.join(CURRENT_PATH, \"datasets\", \"robothor-objectnav\", \"debug\")\n\n    create_debug_dataset_from_train_dataset(\n        scene=SCENE,\n        target_object_type=TARGET,\n        episodes_subset=EPISODES,\n        train_dataset_path=os.path.join(\n            CURRENT_PATH, \"datasets\", \"robothor-objectnav\", \"train\"\n        ),\n        base_debug_output_path=BASE_OUT,\n    )\n"
  },
  {
    "path": "allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.py",
    "content": "import os\n\nfrom allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import (\n    create_debug_dataset_from_train_dataset,\n)\n\nif __name__ == \"__main__\":\n    CURRENT_PATH = os.getcwd()\n    SCENE = \"FloorPlan_Train1_1\"\n    EPISODES = [3, 4, 5, 6]\n    BASE_OUT = os.path.join(CURRENT_PATH, \"datasets\", \"robothor-pointnav\", \"debug\")\n\n    create_debug_dataset_from_train_dataset(\n        scene=SCENE,\n        target_object_type=None,\n        episodes_subset=EPISODES,\n        train_dataset_path=os.path.join(\n            CURRENT_PATH, \"datasets\", \"robothor-pointnav\", \"train\"\n        ),\n        base_debug_output_path=BASE_OUT,\n    )\n"
  },
  {
    "path": "allenact_plugins/setup.py",
    "content": "import glob\nimport os\nfrom pathlib import Path\n\nfrom setuptools import find_packages, setup\n\n\ndef parse_req_file(fname, initial=None):\n    \"\"\"Reads requires.txt file generated by setuptools and outputs a\n    new/updated dict of extras as keys and corresponding lists of dependencies\n    as values.\n\n    The input file's contents are similar to a `ConfigParser` file, e.g.\n    pkg_1\n    pkg_2\n    pkg_3\n\n    [extras1]\n    pkg_4\n    pkg_5\n\n    [extras2]\n    pkg_6\n    pkg_7\n    \"\"\"\n    reqs = {} if initial is None else initial\n    cline = None\n    with open(fname, \"r\") as f:\n        for line in f.readlines():\n            line = line[:-1].strip()\n            if len(line) == 0:\n                continue\n            if line[0] == \"[\":\n                # Add new key for current extras (if missing in dict)\n                cline = line[1:-1].strip()\n                if cline not in reqs:\n                    reqs[cline] = []\n            else:\n                # Only keep dependencies from extras\n                if cline is not None:\n                    reqs[cline].append(line)\n    return reqs\n\n\ndef get_version(fname):\n    \"\"\"Reads PKG-INFO file generated by setuptools and extracts the Version\n    number.\"\"\"\n    res = \"UNK\"\n    with open(fname, \"r\") as f:\n        for line in f.readlines():\n            line = line[:-1]\n            if line.startswith(\"Version:\"):\n                res = line.replace(\"Version:\", \"\").strip()\n                break\n    if res in [\"UNK\", \"\"]:\n        raise ValueError(f\"Missing Version number in {fname}\")\n    return res\n\n\ndef run_setup():\n    base_dir = os.path.abspath(os.path.dirname(Path(__file__)))\n\n    if not os.path.exists(\n        os.path.join(base_dir, \"allenact_plugins.egg-info/dependency_links.txt\")\n    ):\n        # Build mode for sdist\n\n        # Extra dependencies required for various plugins\n        extras = {}\n        for plugin_path in glob.glob(os.path.join(base_dir, \"*_plugin\")):\n            plugin_name = os.path.basename(plugin_path).replace(\"_plugin\", \"\")\n            extra_reqs_path = os.path.join(plugin_path, \"extra_requirements.txt\")\n            if os.path.exists(extra_reqs_path):\n                with open(extra_reqs_path, \"r\") as f:\n                    # Filter out non-PyPI dependencies\n                    extras[plugin_name] = [\n                        clean_dep\n                        for clean_dep in (dep.strip() for dep in f.readlines())\n                        if clean_dep != \"\"\n                        and not clean_dep.startswith(\"#\")\n                        and \"@ git+https://github.com/\" not in clean_dep\n                    ]\n        extras[\"all\"] = sum(extras.values(), [])\n\n        os.chdir(os.path.join(base_dir, \"..\"))\n\n        with open(\".VERSION\", \"r\") as f:\n            __version__ = f.readline().strip()\n    else:\n        # Install mode from sdist\n        __version__ = get_version(\n            os.path.join(base_dir, \"allenact_plugins.egg-info/PKG-INFO\")\n        )\n        extras = parse_req_file(\n            os.path.join(base_dir, \"allenact_plugins.egg-info/requires.txt\")\n        )\n\n    setup(\n        name=\"allenact_plugins\",\n        version=__version__,\n        description=\"Plugins for the AllenAct framework\",\n        long_description=(\n            \"A collection of plugins/extensions for use within the AllenAct framework.\"\n        ),\n        classifiers=[\n            \"Intended Audience :: Science/Research\",\n            \"Development Status :: 3 - Alpha\",\n            \"License :: OSI Approved :: MIT License\",\n            \"Topic :: Scientific/Engineering :: Artificial Intelligence\",\n            \"Programming Language :: Python\",\n            \"Programming Language :: Python :: 3.6\",\n            \"Programming Language :: Python :: 3.7\",\n            \"Programming Language :: Python :: 3.8\",\n            \"Programming Language :: Python :: 3.9\",\n            \"Programming Language :: Python :: 3.10\",\n        ],\n        keywords=[\"reinforcement learning\", \"embodied-AI\", \"AI\", \"RL\", \"SLAM\"],\n        url=\"https://github.com/allenai/allenact\",\n        author=\"Allen Institute for Artificial Intelligence\",\n        author_email=\"lucaw@allenai.org\",\n        license=\"MIT\",\n        packages=find_packages(include=[\"allenact_plugins\", \"allenact_plugins.*\"]),\n        install_requires=[f\"allenact=={__version__}\"],\n        setup_requires=[\"pytest-runner\"],\n        tests_require=[\"pytest\", \"pytest-cov\"],\n        extras_require=extras,\n    )\n\n\nif __name__ == \"__main__\":\n    run_setup()\n"
  },
  {
    "path": "conda/environment-10.1.yml",
    "content": "channels:\n  - defaults\n  - pytorch\ndependencies:\n  - cudatoolkit=10.1\n  - pytorch>=1.6.0,!=1.8.0\n  - torchvision>=0.7.0,<0.10.0\n"
  },
  {
    "path": "conda/environment-10.2.yml",
    "content": "channels:\n  - defaults\n  - pytorch\ndependencies:\n  - cudatoolkit=10.2\n  - pytorch>=1.6.0,!=1.8.0\n  - torchvision>=0.7.0,<0.10.0\n"
  },
  {
    "path": "conda/environment-11.1.yml",
    "content": "channels:\n  - defaults\n  - pytorch\n  - nvidia\ndependencies:\n  - cudatoolkit=11.1\n  - pytorch>=1.6.0,!=1.8.0\n  - torchvision>=0.7.0\n"
  },
  {
    "path": "conda/environment-9.2.yml",
    "content": "channels:\n  - defaults\n  - pytorch\ndependencies:\n  - cudatoolkit=9.2\n  - pytorch>=1.6.0,!=1.8.0\n  - torchvision>=0.7.0,<0.10.0\n"
  },
  {
    "path": "conda/environment-base.yml",
    "content": "channels:\n  - defaults\n  - pytorch\n  - conda-forge\ndependencies:\n  - python=3.8\n  - certifi\n  - chardet=4.0.0\n  - cloudpickle=1.6.0\n  - cycler=0.10.0\n  - decorator=4.4.2\n  - filelock=3.0.12\n  - future=0.18.2\n  - gym>=0.17.0,<0.20.0\n  - idna>=2.10\n  - imageio>=2.9.0\n  - imageio-ffmpeg>=0.4.3\n  - kiwisolver=1.3.1\n  - matplotlib>=3.3.1\n  - networkx\n  - numpy>=1.19.1\n  - opencv\n  - conda-forge::pillow>=8.2.0,<9.0.0\n  - pip\n  - proglog>=0.1.9\n  - protobuf>=3.14.0\n  - pyglet>=1.5.0\n  - pyparsing>=2.4.7\n  - python-dateutil>=2.8.1\n  - pytorch::pytorch>=1.6.0,!=1.8.0\n  - pytorch::torchvision>=0.7.0\n  - requests>=2.25.1\n  - setproctitle\n  - six>=1.15.0\n  - tensorboardx>=2.1\n  - tqdm\n  - urllib3>=1.26.2\n  - attrs\n  - pip:\n    - moviepy>=1.0.3\n    - scipy>=1.5.4\n    - compress-pickle>=1.2.0\n"
  },
  {
    "path": "conda/environment-cpu.yml",
    "content": "channels:\n  - defaults\n  - pytorch\ndependencies:\n  - cpuonly\n  - pytorch>=1.6.0,!=1.8.0\n  - torchvision>=0.7.0,<0.10.0\n"
  },
  {
    "path": "conda/environment-dev.yml",
    "content": "channels:\n  - defaults\n  - conda-forge\ndependencies:\n  - black>=24.2.0\n  - docformatter>=1.3.1\n  - gitpython\n  - markdown>=3.3\n  - mkdocs>=1.1.2\n  - mkdocs-material>=5.5.3\n  - mkdocs-material-extensions>=1.0\n  - mypy\n  - pre-commit\n  - pytest>=6.1.1\n  - ruamel.yaml\n  - pip:\n    - pydoc-markdown>=3.4.0\n"
  },
  {
    "path": "constants.py",
    "content": "import os\nfrom pathlib import Path\n\nABS_PATH_OF_TOP_LEVEL_DIR = os.path.abspath(os.path.dirname(Path(__file__)))\nABS_PATH_OF_DOCS_DIR = os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, \"docs\")\n"
  },
  {
    "path": "datasets/.gitignore",
    "content": "*\n!.gitignore\n!*.sh\n!.habitat_datasets_download_info.json\n!.habitat_downloader_helper.py\n!habitat/configs/debug_habitat_pointnav.yaml"
  },
  {
    "path": "datasets/.habitat_datasets_download_info.json",
    "content": "{\n    \"pointnav-gibson-v1\": {\n        \"link\": \"https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v1/pointnav_gibson_v1.zip\",\n        \"rel_path\": \"data/datasets/pointnav/gibson/v1/\",\n        \"config_url\": \"configs/datasets/imagenav/gibson.yaml\"\n    },\n    \"pointnav-gibson-v2\": {\n        \"link\": \"https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v2/pointnav_gibson_v2.zip\",\n        \"rel_path\": \"data/datasets/pointnav/gibson/v2/\",\n        \"config_url\": \"configs/datasets/pointnav/gibson.yaml\"\n    },\n    \"pointnav-mp3d-v1\": {\n        \"link\": \"https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/mp3d/v1/pointnav_mp3d_v1.zip\",\n        \"rel_path\": \"data/datasets/pointnav/mp3d/v1/\",\n        \"config_url\": \"configs/datasets/imagenav/mp3d.yaml\"\n    },\n    \"objectnav-mp3d-v1\": {\n        \"link\": \"https://dl.fbaipublicfiles.com/habitat/data/datasets/objectnav/m3d/v1/objectnav_mp3d_v1.zip\",\n        \"rel_path\": \"data/datasets/objectnav/mp3d/v1/\",\n        \"config_url\": \"configs/datasets/objectnav/mp3d.yaml\"\n    },\n    \"eqa-mp3d-v1\": {\n        \"link\": \"https://dl.fbaipublicfiles.com/habitat/data/datasets/eqa/mp3d/v1/eqa_mp3d_v1.zip\",\n        \"rel_path\": \"data/datasets/eqa/mp3d/v1/\",\n        \"config_url\": \"configs/datasets/eqa/mp3d.yaml\"\n    },\n    \"vln-r2r-mp3d-v1\": {\n        \"link\": \"https://dl.fbaipublicfiles.com/habitat/data/datasets/vln/mp3d/r2r/v1/vln_r2r_mp3d_v1.zip\",\n        \"rel_path\": \"data/datasets/vln/mp3d/r2r/v1\",\n        \"config_url\": \"configs/datasets/vln/mp3d_r2r.yaml\"\n    }\n}\n"
  },
  {
    "path": "datasets/.habitat_downloader_helper.py",
    "content": "import json\nimport os\nimport re\nimport shutil\nimport sys\nfrom pathlib import Path\nfrom urllib.request import urlopen\n\nfrom allenact.utils.misc_utils import all_equal\n\nDATASET_DIR = os.path.abspath(os.path.dirname(Path(__file__)))\n\n\ndef get_habitat_download_info(allow_create: bool = False):\n    \"\"\"Get a dictionary giving a specification of where habitat data lives\n    online.\n\n    # Parameters\n\n    allow_create: Whether or not we should try to regenerate the json file that represents\n        the above dictionary. This is potentially unsafe so please only set this to `True`\n        if you're sure it will download what you want.\n    \"\"\"\n    json_save_path = os.path.join(DATASET_DIR, \".habitat_datasets_download_info.json\")\n    if allow_create and not os.path.exists(json_save_path):\n        url = \"https://raw.githubusercontent.com/facebookresearch/habitat-lab/master/README.md\"\n        output = urlopen(url).read().decode(\"utf-8\")\n\n        lines = [l.strip() for l in output.split(\"\\n\")]\n\n        task_table_started = False\n        table_lines = []\n        for l in lines:\n            if l.count(\"|\") > 3 and l[0] == l[-1] == \"|\":\n                if task_table_started:\n                    table_lines.append(l)\n                elif \"Task\" in l and \"Link\" in l:\n                    task_table_started = True\n                    table_lines.append(l)\n            elif task_table_started:\n                break\n\n        url_pat = re.compile(\"\\[.*\\]\\((.*)\\)\")\n\n        def get_url(in_str: str):\n            match = re.match(pattern=url_pat, string=in_str)\n            if match:\n                return match.group(1)\n            else:\n                return in_str\n\n        header = None\n        rows = []\n        for i, l in enumerate(table_lines):\n            l = l.strip(\"|\")\n            entries = [get_url(e.strip().replace(\"`\", \"\")) for e in l.split(\"|\")]\n\n            if i == 0:\n                header = [e.lower().replace(\" \", \"_\") for e in entries]\n            elif not all_equal(entries):\n                rows.append(entries)\n\n        link_ind = header.index(\"link\")\n        extract_ind = header.index(\"extract_path\")\n        config_ind = header.index(\"config_to_use\")\n        assert link_ind >= 0\n\n        data_info = {}\n        for row in rows:\n            id = row[link_ind].split(\"/\")[-1].replace(\".zip\", \"\").replace(\"_\", \"-\")\n            data_info[id] = {\n                \"link\": row[link_ind],\n                \"rel_path\": row[extract_ind],\n                \"config_url\": row[config_ind],\n            }\n\n        with open(json_save_path, \"w\") as f:\n            json.dump(data_info, f)\n\n    with open(json_save_path, \"r\") as f:\n        return json.load(f)\n\n\nif __name__ == \"__main__\":\n    habitat_dir = os.path.join(DATASET_DIR, \"habitat\")\n    os.makedirs(habitat_dir, exist_ok=True)\n    os.chdir(habitat_dir)\n\n    download_info = get_habitat_download_info(allow_create=False)\n\n    if len(sys.argv) != 2 or sys.argv[1] not in download_info:\n        print(\n            \"Incorrect input, expects a single input where this input is one of \"\n            f\" {['test-scenes', *sorted(download_info.keys())]}.\"\n        )\n        quit(1)\n\n    task_key = sys.argv[1]\n    task_dl_info = download_info[task_key]\n\n    output_archive_name = \"__TO_OVERWRITE__.zip\"\n    deletable_dir_name = \"__TO_DELETE__\"\n\n    cmd = f\"wget {task_dl_info['link']} -O {output_archive_name}\"\n    if os.system(cmd):\n        print(f\"ERROR: `{cmd}` failed.\")\n        quit(1)\n\n    cmd = f\"unzip {output_archive_name} -d {deletable_dir_name}\"\n    if os.system(cmd):\n        print(f\"ERROR: `{cmd}` failed.\")\n        quit(1)\n\n    download_to_path = task_dl_info[\"rel_path\"].replace(\"data/\", \"\")\n    if download_to_path[-1] == \"/\":\n        download_to_path = download_to_path[:-1]\n\n    os.makedirs(download_to_path, exist_ok=True)\n\n    cmd = f\"rsync -avz {deletable_dir_name}/ {download_to_path}/\"\n    if os.system(cmd):\n        print(f\"ERROR: `{cmd}` failed.\")\n        quit(1)\n\n    os.remove(output_archive_name)\n    shutil.rmtree(deletable_dir_name)\n"
  },
  {
    "path": "datasets/download_habitat_datasets.sh",
    "content": "#!/bin/bash\n\n# Move to the directory containing this file\ncd \"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" >/dev/null 2>&1 && pwd )\" || exit\n\nmkdir -p habitat\nmkdir -p habitat/scene_datasets\nmkdir -p habitat/datasets\nmkdir -p habitat/configs\n\ncd habitat || exit\n\noutput_archive_name=__TO_OVERWRITE__.zip\ndeletable_dir_name=__TO_DELETE__\n\ninstall_test_scenes_and_data() {\n    if ! wget http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip -O $output_archive_name; then\n      echo \"Could not unzip download test scenes from http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip\"\n      exit 1\n    fi\n    if ! unzip $output_archive_name -d $deletable_dir_name; then\n      echo \"Could not unzip $output_archive_name to $deletable_dir_name\"\n      exit 1\n    fi\n    rsync -avz $deletable_dir_name/data/datasets . && \\\n    rsync -avz $deletable_dir_name/data/scene_datasets . && \\\n    rm $output_archive_name && \\\n    rm -r $deletable_dir_name\n}\n\ninstall_scene_data() {\n  python3 ../.habitat_downloader_helper.py \"$1\"\n}\n\nif [ \"$1\" = \"test-scenes\" ]\nthen\n  install_test_scenes_and_data\n\nelse\n  install_scene_data $1\nfi\n\n"
  },
  {
    "path": "datasets/download_navigation_datasets.sh",
    "content": "#!/bin/bash\n\n# Move to the directory containing this file\ncd \"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" >/dev/null 2>&1 && pwd )\" || exit\n\ninstall_dataset() {\n    dataset_name=\"$1\"\n    download_suffix=\"$2\"\n    if ! mkdir \"$dataset_name\" ; then\n      echo \"Could not create directory \" $(pwd)/$dataset_name \"Does it already exist? If so, delete it.\"\n      exit 1\n    fi\n    url_archive_name=$dataset_name$download_suffix.tar.gz\n    output_archive_name=__TO_OVERWRITE__.tar.gz\n    wget https://prior-datasets.s3.us-east-2.amazonaws.com/embodied-ai/navigation/$url_archive_name -O $output_archive_name\n    tar -xf \"$output_archive_name\" -C \"$dataset_name\" --strip-components=1 && rm $output_archive_name\n    echo \"saved folder: \"$dataset_name\"\"\n}\n\n\n# Download, Unzip, and Remove zip\nif [ \"$1\" = \"robothor-pointnav\" ]\nthen\n    echo \"Downloading RoboTHOR PointNav Dataset ...\"\n    install_dataset \"$1\" \"-v0\"\n    cd ..\n    echo \"Generating RoboTHOR PointNav Debug Dataset ...\"\n    PYTHONPATH=. python ./allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.py\n\nelif [ \"$1\" = \"robothor-objectnav\" ]\nthen\n    echo \"Downloading RoboTHOR ObjectNav Dataset ...\"\n    install_dataset \"$1\" \"-challenge-2021\"\n    cd ..\n    echo \"Generating RoboTHOR ObjectNav Debug Dataset ...\"\n    PYTHONPATH=. python ./allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.py\n\nelif [ \"$1\" = \"ithor-pointnav\" ]\nthen\n    echo \"Downloading iTHOR PointNav Dataset ...\"\n    install_dataset \"$1\" \"-v0\"\n    cd ..\n    echo \"Generating iTHOR PointNav Debug Dataset ...\"\n    PYTHONPATH=. python ./allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.py\n\nelif [ \"$1\" = \"ithor-objectnav\" ]\nthen\n    echo \"Downloading iTHOR ObjectNav Dataset ...\"\n    install_dataset \"$1\" \"-v0\"\n    cd ..\n    echo \"Generating iTHOR ObjectNav Debug Dataset ...\"\n    PYTHONPATH=. python ./allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.py\n\nelif [ \"$1\" = \"all-thor\" ]\nthen\n    bash download_navigation_datasets.sh \"robothor-pointnav\"\n    bash download_navigation_datasets.sh \"robothor-objectnav\"\n    bash download_navigation_datasets.sh \"ithor-pointnav\"\n    bash download_navigation_datasets.sh \"ithor-objectnav\"\n\nelse\n    echo \"\\nFailed: Usage download_navigation_datasets.sh robothor-pointnav | robothor-objectnav | ithor-pointnav | ithor-objectnav | all-thor\"\n    exit 1\nfi\n"
  },
  {
    "path": "dev_requirements.txt",
    "content": "black==24.2.0\nruamel.yaml\ngitpython\nmarkdown==3.3\nmypy\npytest\ndocformatter==1.3.1\ndocstr-coverage==1.2.0\nmkdocs==1.1.2\nmkdocs-material==5.5.3\npre-commit\npydoc-markdown==3.4.0\ncompress-pickle==1.2.0\n"
  },
  {
    "path": "docs/.gitignore",
    "content": "api/\n"
  },
  {
    "path": "docs/CNAME",
    "content": "allenact.org"
  },
  {
    "path": "docs/FAQ.md",
    "content": "# FAQ\n\n## How do I file a bug regarding the code or documentation?\n\nPlease file bugs by submitting an [issue](https://github.com/allenai/allenact/issues). We also welcome contributions from the community, including new features and bugfixes on existing functionality. Please refer to our [contribution guidelines](CONTRIBUTING.md).\n\n## How do I generate documentation?\n\nDocumentation is generated using [mkdoc](https://www.mkdocs.org/) and\n[pydoc-markdown](https://pypi.org/project/pydoc-markdown/). \n\n### Building documentation locally\n\nThe `mkdocs` command used to build our documentation relies on all documentation existing\nas subdirectories of the `docs` folder. To ensure that all relevant markdown files are placed into\nthis directory, you should always run\n\n```bash\nbash scripts/build_docs.sh\n```\n\nfrom the top-level project directory before running any of the `mkdocs` commands below. \n\nIf you have made no changes to the documentation and only wish to build documentation on \nyour local machine, run the following from within the `allenact` root directory. Note: This will generate HTML documentation within the `site` folder\n\n```bash\nmkdocs build\n```\n\n### Serving documentation locally\n\nIf you have made no changes to the documentation and only wish to serve documentation on your local\n machine (with live reloading of modified documentation), run the following from within the `allenact` root directory.\n \n```bash\nmkdocs serve\n```\n\nThen navigate to [http://127.0.0.1:8000/](http://127.0.0.1:8000/)\n\n### Modifying and serving documentation locally\n\nIf you have made changes to the documentation, you will need to run a documentation builder script \nbefore you serve it on your local machine.\n\n```bash\nbash scripts/build_docs.sh\nmkdocs serve\n```\n\nThen navigate to [http://127.0.0.1:8000/](http://127.0.0.1:8000/)\n\nAlternatively, the `site` directory (once built) can be served as a static webpage on your local machine \nwithout installing any dependencies by running `python -m http.server 8000` from within the `site` directory.\n\n"
  },
  {
    "path": "docs/css/extra.css",
    "content": "/* Allow word-breaks in headers */\nh1 {\n  word-wrap: break-word;\n}\n\n/* Don't have the edit button as it's broken for us */\n.md-content__button {\n    display: none;\n}"
  },
  {
    "path": "docs/getting_started/abstractions.md",
    "content": "# Primary abstractions\n\nOur package relies on a collection of fundamental abstractions to define how, and in what task, an agent should be\ntrained and evaluated. A subset of these abstractions are described in plain language below. Each of the below sections\nend with a link to the (formal) documentation of the abstraction as well as a link to an example implementation of the\nabstraction (if relevant). The following provides a high-level illustration of how these abstractions interact.\n\n\n![abstractions-overview](../img/abstractions.png)\n\n## Experiment configuration\n\nIn `allenact`, experiments are defined by implementing the abstract `ExperimentConfig` class. The methods\nof this implementation are then called during training/inference to properly set up the desired experiment. For example,\nthe `ExperimentConfig.create_model` method will be called at the beginning of training to create the model\nto be trained.\nSee either the [\"designing your first minigrid experiment\"](/tutorials/minigrid-tutorial) or the\n[\"designing an experiment for point navigation\"](/tutorials/training-a-pointnav-model)\n tutorials to get an in-depth description of how these experiment configurations are defined in practice.\n\nSee also the [abstract `ExperimentConfig` class](/api/allenact/base_abstractions/experiment_config#experimentconfig) \nand an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_environment/#ithorenvironment).\n\n## Task sampler\n\nA task sampler is responsible for generating a sequence of tasks for agents to solve. The sequence of tasks can be \nrandomly generated (e.g. in training) or extracted from an ordered pool (e.g. in validation or testing).\n\nSee the [abstract `TaskSampler` class](/api/allenact/base_abstractions/task/#tasksampler) \nand an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_task_samplers/#objectnavtasksampler).\n\n## Task\n\nTasks define the scope of the interaction between agents and an environment (including the action types agents are \nallowed to execute), as well as metrics to evaluate the agents' performance. For example, we might define a task \n`ObjectNaviThorGridTask` in which agents receive observations obtained from the environment (e.g. RGB images) or directly from \nthe task (e.g. a target object class) and are allowed to execute actions such as `MoveAhead`, `RotateRight`, \n`RotateLeft`, and `End` whenever agents determine they have reached their target. The metrics might include a\nsuccess indicator or some quantitative metric on the optimality of the followed path.  \n\nSee the [abstract `Task` class](/api/allenact/base_abstractions/task/#task) \nand an [example implementation](/api/allenact_plugins/robothor_plugin/robothor_tasks/#objectnavtask).\n\n## Sensor\n\nSensors provide observations extracted from an environment (e.g. RGB or depth images) or directly from a task (e.g. the \nend point in point navigation or target object class in semantic navigation) that can be directly consumed by \nagents.\n\nSee the [abstract `Sensor` class](/api/allenact/base_abstractions/sensor/#sensor) \nand an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_sensors/#rgbsensorthor).\n\n## Actor critic model\n\nThe actor-critic agent is responsible for computing batched action probabilities and state values given the \nobservations provided by sensors, internal state representations, previous actions, and potentially \nother inputs.\n\nSee the [abstract `ActorCriticModel` class](/api/allenact/algorithms/onpolicy_sync/policy/#ActorCriticModel) \nand an\n[example implementation](/api/projects/objectnav_baselines/models/object_nav_models#ObjectNavBaselineActorCritic).\n\n## Training pipeline\n\nThe training pipeline, defined in the\n[`ExperimentConfig`'s `training_pipeline` method](/api/allenact/base_abstractions/experiment_config/#training_pipeline),\ncontains one or more training stages where different\n[losses can be combined or sequentially applied](/howtos/defining-a-new-training-pipeline).\n \n## Losses\n\nActor-critic losses compute a combination of action loss and value loss out of collected experience that can be used to \ntrain actor-critic models with back-propagation, e.g. PPO or A2C.\n\nSee the\n[`AbstractActorCriticLoss` class](/api/allenact/algorithms/onpolicy_sync/losses/abstract_loss#abstractactorcriticloss) \nand an [example implementation](/api/allenact/algorithms/onpolicy_sync/losses/ppo/#ppo).\n\nOff-policy losses implement generic training iterations in which a batch of data is run through a model (that can be a\nsubgraph of an [`ActorCriticModel`](#actor-critic-model)) and a loss is\ncomputed on the model's output.\n\nSee the\n[`AbstractOffPolicyLoss` class](/api/allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss#abstractoffpolicyloss) \nand an [example implementation](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy/#MiniGridOffPolicyExpertCELoss).\n"
  },
  {
    "path": "docs/getting_started/running-your-first-experiment.md",
    "content": "# Running your first experiment\n\nAssuming you have [installed the full library](../installation/installation-allenact.md#full-library), you can run your\nfirst experiment by calling\n\n```bash\nPYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o experiment_output/minigrid -s 12345\n```\n\nfrom the `allenact` root directory.\n\n* With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file \nwill be found in the `projects/tutorials` directory.\n* With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers).\n* With `-o experiment_output/minigrid` we set the output folder into which results and logs will be saved.\n* With `-s 12345` we set the random seed.\n\nIf everything was installed correctly, a simple model will be trained (and validated) in the MiniGrid environment and\na new folder `experiment_output/minigrid` will be created containing:\n\n* a `checkpoints/MiniGridTutorial/LOCAL_TIME_STR/` subfolder with model weight checkpoints,\n* a `used_configs/MiniGridTutorial/LOCAL_TIME_STR/` subfolder with all used configuration files,\n* and a tensorboard log file under `tb/MiniGridTutorial/LOCAL_TIME_STR/`.\n\nHere `LOCAL_TIME_STR` is a string that records the time when the experiment was started (e.g. the string \n`\"2020-08-21_18-19-47\"` corresponds to an experiment started on August 21st 2020, 47 seconds past 6:19pm. \n\nIf we have Tensorboard installed, we can track training progress with\n```bash\ntensorboard --logdir experiment_output/minigrid/tb\n```\nwhich will default to the URL [http://localhost:6006/](http://localhost:6006/).\n\nAfter 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder.\nThe training curves should look similar to:\n\n![training curves](../img/minigrid_train.png)\n\nIf everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4.\n(For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the\nnot-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example\nwith a different random seed). The validation curves should look similar to:\n\n![validation curves](../img/minigrid_valid.png)\n \nA detailed tutorial describing how the `minigrid_tutorial` experiment configuration was created can be found \n[here](../tutorials/minigrid-tutorial.md). \n \nTo run your own custom experiment simply define a new experiment configuration in a file \n`projects/YOUR_PROJECT_NAME/experiments/my_custom_experiment.py` after which you may run it with\n`PYTHONPATH=. python allenact/main.py my_custom_experiment -b projects/YOUR_PROJECT_NAME/experiments`.\n\n<!-- ## Experiment configuration\n\nThe main entry point for users is a configuration file that defines the experiment we\nwant to run. More concretely, it includes a single class defining:\n\n* A `tag` to identify the experiment.\n* A method to instantiate [actor-critic models](/getting_started/abstractions#actor-critic-model).\n* A multi-staged training pipeline with different types of [losses](/getting_started/abstractions#actor-critic-loss), an \noptimizer, and other parameters like learning rates, batch sizes, etc. \n* Machine configuration parameters that will be used e.g. for training or validation.\n* A method to instantiate [task samplers](/getting_started/abstractions#task-sampler).\n* Methods describing initialization parameters for task samplers used in training, validation, and testing; including\n the assignment of workers to devices for running environments.\n\nA detailed view to an example experiment config file can be found [here](/overview/experiment).\n -->"
  },
  {
    "path": "docs/getting_started/structure.md",
    "content": "# Structure of the codebase\n\nThe codebase consists of the following directories: `allenact`, `datasets`, `docs`, `overrides`, `allenact_plugins`,\n`pretrained_model_ckpts`, `projects`, `scripts`, and `tests`. Below, we explain the overall structure and how\ndifferent components of the codebase are organized. \n\n## [`allenact` directory](https://github.com/allenai/allenact/tree/master/allenact)\n\nContains runtime algorithms for on-policy and off-policy training and inference, base abstractions used throughout\nthe code base and basic models to be used as building blocks in future models.\n\n* `allenact.algorithms` includes on-policy and off-policy training nd inference algorithms and abstractions for losses,\npolicies, rollout storage, etc.\n\n* `allenact.base_abstractions` includes the base `ExperimentConfig`, distributions, base `Sensor`, `TaskSampler`, `Task`,\netc.\n\n* `allenact.embodiedai` includes basic CNN, and RNN state encoders, besides basic `ActorCriticModel` implementations\nfor embodied AI tasks.\n\n## [`datasets` directory](https://github.com/allenai/allenact/tree/master/datasets)\n\nA directory made to store task-specific datasets. For example, the script `datasets/download_navigation_datasets.sh` can\nbe used to automatically download task dataset files for Point Navigation within the RoboTHOR environment\nand it will place these files into a new `datasets/robothor-pointnav` directory. \n\n## [`docs` directory](https://github.com/allenai/allenact/tree/master/docs)\n\nContains documentation for the framework, including guides for installation and first experiments, how-to's for\nthe definition and usage of different abstractions, tutorials and per-project documentation.\n\n## [`overrides` directory](https://github.com/allenai/allenact/tree/master/overrides)\n\nFiles within this directory are used to the look and structure of the documentation generated when running `mkdocs`.\nSee our [FAQ](../FAQ.md) for information on how to generate this documentation for yourself. \n\n## [`allenact_plugins` directory](https://github.com/allenai/allenact/tree/master/allenact_plugins)\n\nContains implementations of `ActorCriticModel`s and `Task`s in different environments. Each plugin folder is \nnamed as `{environment}_plugin` and contains three subfolders:\n\n1. `configs` to host useful configuration for the environment or tasks.\n1. `data` to store data to be consumed by the environment or tasks.\n1. `scripts` to setup the plugin or gather and process data.\n\n## [`pretrained_model_ckpts` directory](https://github.com/allenai/allenact/tree/master/pretrained_model_ckpts)\n\nDirectory into which pretrained model checkpoints will be saved. See also the \n`pretrained_model_ckpts/download_navigation_model_ckpts.sh` which can be used to download such checkpoints.\n\n## [`projects` directory](https://github.com/allenai/allenact/tree/master/projects)\n\nContains project-specific code like experiment configurations and scripts to process results, generate visualizations\nor prepare data.\n\n## [`scripts` directory](https://github.com/allenai/allenact/tree/master/scripts)\n\nIncludes framework-wide scripts to build the documentation, format code, run_tests and start an xserver. The latter can\nbe used for OpenGL-based environments having super-user privileges in Linux, assuming NVIDIA drivers and `xserver-xorg`\nare installed.\n\n## [`tests` directory](https://github.com/allenai/allenact/tree/master/tests)\n\nIncludes unit tests for `allenact`.\n\n## [`allenact.utils` directory](https://github.com/allenai/allenact/tree/master/allenact/utils)\n\nIt includes different types of utilities, mainly divided into:\n\n* `allenact.utils.experiment_utils`, including the `TrainingPipeline`, `PipelineStage` and other utilities to configure an\nexperiment.\n* `allenact.utils.model_utils`, including generic CNN creation, forward-pass helpers and other utilities.\n* `allenact.utils.tensor_utils`, including functions to batch observations, convert tensors into video, scale image tensors, etc.\n* `allenact.utils.viz_utils`, including a `VizSuite` class that can be instantiated with different visualization plugins during\ninference.\n* `allenact.utils.system`, including logging and networking helpers.\n\nOther utils files, including `allenact.utils.misc_utils`, contain a number of helper functions for different purposes.\n"
  },
  {
    "path": "docs/howtos/changing-rewards-and-losses.md",
    "content": "# Changing rewards and losses\n\nIn order to train actor-critic agents, we need to specify\n\n* `rewards` at the task level, and\n* `losses` at the training pipeline level. \n\n## Rewards\n\nWe will use the [object navigation task in `iTHOR`](/api/allenact_plugins/ithor_plugin/ithor_tasks/#objectnavtask) as a \nrunning example. We can see how the `ObjectNaviThorGridTask._step(self, action: int) -> RLStepResult` method computes\nthe reward for the latest action by invoking a function like:\n\n```python\ndef judge(self) -> float:\n    reward = -0.01\n\n    if not self.last_action_success:\n        reward += -0.03\n\n    if self._took_end_action:\n        reward += 1.0 if self._success else -1.0\n\n    return float(reward)\n```\n\nAny reward shaping can be easily added by e.g. modifying the definition of an existing class:\n\n```python\nclass NavigationWithShaping(allenact_plugins.ithor_plugin.ithor_tasks.ObjectNaviThorGridTask):\n    def judge(self) -> float:\n        reward = super().judge()\n        \n        if self.previous_state is not None:\n            reward += float(my_reward_shaping_function(\n                self.previous_state,\n                self.current_state,\n            ))\n        \n        self.previous_state = self.current_state\n        \n        return reward\n\n``` \n\n## Losses\n\nWe support [A2C](/api/allenact/algorithms/onpolicy_sync/losses/a2cacktr#a2c),\n[PPO](/api/allenact/algorithms/onpolicy_sync/losses/ppo#ppo), and\n[imitation](/api/allenact/algorithms/onpolicy_sync/losses/imitation#imitation) losses amongst others. We can easily\ninclude [DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf) or variations thereof by\nassuming the availability of an expert providing optimal actions to agents and combining imitation and PPO losses in\ndifferent ways through multiple stages:\n\n```python\nclass MyExperimentConfig(allenact.base_abstractions.experiment_config.ExperimentConfig):\n    ...\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        dagger_steps = int(3e4)\n        ppo_steps = int(3e4)\n        ppo_steps2 = int(1e6)\n        ...\n        return allenact.utils.experiment_utils.TrainingPipeline(\n            named_losses={\n                \"imitation_loss\": allenact.algorithms.onpolicy_sync.losses.imitation.Imitation(),\n                \"ppo_loss\": allenact.algorithms.onpolicy_sync.losses.ppo.PPO(\n                    **allenact.algorithms.onpolicy_sync.losses.ppo.PPOConfig,\n                ),\n            },\n            ...\n            pipeline_stages=[\n                allenact.utils.experiment_utils.PipelineStage(\n                    loss_names=[\"imitation_loss\", \"ppo_loss\"],\n                    teacher_forcing=allenact.utils.experiment_utils.LinearDecay(\n                        startp=1.0, endp=0.0, steps=dagger_steps,\n                    ),\n                    max_stage_steps=dagger_steps,\n                ),\n                allenact.utils.experiment_utils.PipelineStage(\n                    loss_names=[\"ppo_loss\", \"imitation_loss\"],\n                    max_stage_steps=ppo_steps\n                ),\n                allenact.utils.experiment_utils.PipelineStage(\n                    loss_names=[\"ppo_loss\"],\n                    max_stage_steps=ppo_steps2,\n                ),\n            ],\n        )\n```\n"
  },
  {
    "path": "docs/howtos/defining-a-new-model.md",
    "content": "# Defining a new model\n\nAll actor-critic models must implement the interface described by the\n[ActorCriticModel class](/api/allenact/algorithms/onpolicy_sync/policy/#actorcriticmodel). This interface includes two methods that need to be \nimplemented:\n\n* `recurrent_memory_specification`, returning a description of the model's recurrent memory; and \n* `forward`, returning an [ActorCriticOutput](/api/allenact/base_abstractions/misc/#actorcriticoutput) given the current observation,\nhidden state and previous actions.\n\nFor convenience, we provide a [recurrent network module](/api/allenact/embodiedai/models/basic_models/#rnnstateencoder) and\n[a simple CNN module](/api/allenact/embodiedai/models/basic_models/#simplecnn) from the Habitat baseline navigation\nmodels, that will be used in this example.\n\n### Actor-critic model interface\n\nAs an example, let's build an object navigation agent.\n\n```python\nclass ObjectNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]):\n    \"\"\"Baseline recurrent actor critic model for object-navigation.\n\n    # Attributes\n    action_space : The space of actions available to the agent. Currently only discrete\n        actions are allowed (so this space will always be of type `gym.spaces.Discrete`).\n    observation_space : The observation space expected by the agent. This observation space\n        should include (optionally) 'rgb' images and 'depth' images and is required to\n        have a component corresponding to the goal `goal_sensor_uuid`.\n    goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor`\n        as an example of such a sensor.\n    hidden_size : The hidden size of the GRU RNN.\n    object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal\n        object type.\n    \"\"\"\n\n    def __init__(\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        goal_sensor_uuid: str,\n        rgb_uuid: Optional[str],\n        depth_uuid: Optional[str],\n        hidden_size=512,\n        object_type_embedding_dim=8,\n        trainable_masked_hidden_state: bool = False,\n        num_rnn_layers=1,\n        rnn_type=\"GRU\",\n    ):\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(action_space=action_space, observation_space=observation_space)\n\n        self.goal_sensor_uuid = goal_sensor_uuid\n        self._n_object_types = self.observation_space.spaces[self.goal_sensor_uuid].n\n        self._hidden_size = hidden_size\n        self.object_type_embedding_size = object_type_embedding_dim\n\n        self.visual_encoder = SimpleCNN(\n            observation_space=self.observation_space,\n            output_size=self._hidden_size,\n            rgb_uuid=rgb_uuid,\n            depth_uuid=depth_uuid,\n        )\n\n        self.state_encoder = RNNStateEncoder(\n            (0 if self.is_blind else self._hidden_size) + object_type_embedding_dim,\n            self._hidden_size,\n            trainable_masked_hidden_state=trainable_masked_hidden_state,\n            num_layers=num_rnn_layers,\n            rnn_type=rnn_type,\n        )\n\n        self.actor = LinearActorHead(self._hidden_size, action_space.n)\n        self.critic = LinearCriticHead(self._hidden_size)\n\n        self.object_type_embedding = nn.Embedding(\n            num_embeddings=self._n_object_types,\n            embedding_dim=object_type_embedding_dim,\n        )\n\n        self.train()\n\n    @property\n    def recurrent_hidden_state_size(self) -> int:\n        \"\"\"The recurrent hidden state size of the model.\"\"\"\n        return self._hidden_size\n\n    @property\n    def is_blind(self) -> bool:\n        \"\"\"True if the model is blind (e.g. neither 'depth' or 'rgb' is an\n        input observation type).\"\"\"\n        return self.visual_encoder.is_blind\n\n    @property\n    def num_recurrent_layers(self) -> int:\n        \"\"\"Number of recurrent hidden layers.\"\"\"\n        return self.state_encoder.num_recurrent_layers\n\n    def _recurrent_memory_specification(self):\n        return dict(\n            rnn=(\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n        )\n\n    def get_object_type_encoding(\n        self, observations: Dict[str, torch.FloatTensor]\n    ) -> torch.FloatTensor:\n        \"\"\"Get the object type encoding from input batched observations.\"\"\"\n        # noinspection PyTypeChecker\n        return self.object_type_embedding(  # type:ignore\n            observations[self.goal_sensor_uuid].to(torch.int64)\n        )\n\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        \"\"\"Processes input batched observations to produce new actor and critic\n        values. Processes input batched observations (along with prior hidden\n        states, previous actions, and masks denoting which recurrent hidden\n        states should be masked) and returns an `ActorCriticOutput` object\n        containing the model's policy (distribution over actions) and\n        evaluation of the current state (value).\n\n        # Parameters\n        observations : Batched input observations.\n        memory : `Memory` containing the hidden states from initial timepoints.\n        prev_actions : Tensor of previous actions taken.\n        masks : Masks applied to hidden states. See `RNNStateEncoder`.\n\n        # Returns\n        Tuple of the `ActorCriticOutput` and recurrent hidden state.\n        \"\"\"\n        target_encoding = self.get_object_type_encoding(\n            cast(Dict[str, torch.FloatTensor], observations)\n        )\n        x = [target_encoding]\n\n        if not self.is_blind:\n            perception_embed = self.visual_encoder(observations)\n            x = [perception_embed] + x\n\n        x_cat = torch.cat(x, dim=-1)  # type: ignore\n        x_out, rnn_hidden_states = self.state_encoder(\n            x_cat, memory.tensor(\"rnn\"), masks\n        )\n\n        return (\n            ActorCriticOutput(\n                distributions=self.actor(x_out), values=self.critic(x_out), extras={}\n            ),\n            memory.set_tensor(\"rnn\", rnn_hidden_states),\n        )\n```\n"
  },
  {
    "path": "docs/howtos/defining-a-new-task.md",
    "content": "# Defining a new task\n\nIn order to use new tasks in our experiments, we need to define two classes:\n\n* A [Task](/api/allenact/base_abstractions/task#task), including, among others, a `step` implementation providing a\n[RLStepResult](/api/allenact/base_abstractions/misc#rlstepresult), a `metrics` method providing quantitative performance measurements \nfor agents and, optionally, a `query_expert` method that can be used e.g. with an\n[imitation loss](/api/allenact/algorithms/onpolicy_sync/losses/imitation#imitation) during training.\n* A [TaskSampler](/api/allenact/base_abstractions/task#tasksampler), that allows instantiating new Tasks for the agents to solve during\ntraining, validation and testing.\n\n## Task\n\nLet's define a semantic navigation task, where agents have to navigate from a starting point in an environment to an\nobject of a specific class using a minimal amount of steps and deciding when the goal has been reached.\n\nWe need to define the methods `action_space`, `render`, `_step`, `reached_terminal_state`, `class_action_names`, `close`,\n`metrics`, and `query_expert` from the base `Task` definition.\n\n\n### Initialization, action space and termination\nLet's start with the definition of the action space and task initialization:\n\n```python\n...\nfrom allenact_plugins.ithor_plugin.ithor_constants import (\n    MOVE_AHEAD,\n    ROTATE_LEFT,\n    ROTATE_RIGHT,\n    LOOK_DOWN,\n    LOOK_UP,\n    END,\n)\n\n...\n\n\nclass ObjectNaviThorGridTask(Task[IThorEnvironment]):\n    _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END)\n\n    def __init__(\n            self,\n            env: IThorEnvironment,\n            sensors: List[Sensor],\n            task_info: Dict[str, Any],\n            max_steps: int,\n            **kwargs\n    ) -> None:\n        super().__init__(\n            env=env,\n            sensors=sensors,\n            task_info=task_info,\n            max_steps=max_steps, **kwargs\n        )\n        self._took_end_action: bool = False\n        self._success: Optional[bool] = False\n\n    @property\n    def action_space(self):\n        return gym.spaces.Discrete(len(self._actions))\n\n    @classmethod\n    def class_action_names(cls) -> Tuple[str, ...]:\n        return cls._actions\n\n    def reached_terminal_state(self) -> bool:\n        return self._took_end_action\n\n    def close(self) -> None:\n        self.env.stop()\n\n    ...\n```\n\n### Step method\nNext, we define the main method `_step` that will be called every time the agent produces a new action: \n```python\nclass ObjectNaviThorGridTask(Task[IThorEnvironment]):\n    ...\n    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:\n        assert isinstance(action, int)\n        action = cast(int, action)\n\n        action_str = self.class_action_names()[action]\n\n        if action_str == END:\n            self._took_end_action = True\n            self._success = self.is_goal_object_visible()\n            self.last_action_success = self._success\n        else:\n            self.env.step({\"action\": action_str})\n            self.last_action_success = self.env.last_action_success\n\n        step_result = RLStepResult(\n            observation=self.get_observations(),\n            reward=self.judge(),\n            done=self.is_done(),\n            info={\"last_action_success\": self.last_action_success},\n        )\n        return step_result\n    \n    ...\n\n    def is_goal_object_visible(self) -> bool:\n        return any(\n            o[\"objectType\"] == self.task_info[\"object_type\"]\n            for o in self.env.visible_objects()\n        )\n\n    def judge(self) -> float:\n        reward = -0.01\n\n        if not self.last_action_success:\n            reward += -0.03\n\n        if self._took_end_action:\n            reward += 1.0 if self._success else -1.0\n\n        return float(reward)\n```\n\n###  Metrics, rendering and expert actions\n\nFinally, we define methods to render and evaluate the current task, and optionally generate expert actions to be used\ne.g. for DAgger training.\n```python\n    def render(self, mode: str = \"rgb\", *args, **kwargs) -> numpy.ndarray:\n        assert mode == \"rgb\", \"only rgb rendering is implemented\"\n        return self.env.current_frame\n\n\n    def metrics(self) -> Dict[str, Any]:\n        if not self.is_done():\n            return {}\n        else:\n            return {\"success\": self._success, \"ep_length\": self.num_steps_taken()}\n\n    def query_expert(self, **kwargs) -> Tuple[int, bool]:\n        return my_objnav_expert_implementation(self)\n```\n\n## TaskSampler\n\nWe also need to define the corresponding TaskSampler, which must contain implementations for methods `__len__`,\n`total_unique`, `last_sampled_task`, `next_task`, `close`, `reset`, and `set_seed`. Currently,\nan additional method `all_observation_spaces_equal` is used to ensure compatibility with the current\n[RolloutBlockStorage](/api/allenact/algorithms/onpolicy_sync/storage#rolloutblockstorage).\n\nLet's define a tasks sampler able to provide an infinite number of object navigation tasks for AI2-THOR.\n\n### Initialization and termination \n\n```python\nclass ObjectNavTaskSampler(TaskSampler):\n    def __init__(\n        self,\n        scenes: List[str],\n        object_types: str,\n        sensors: List[Sensor],\n        max_steps: int,\n        env_args: Dict[str, Any],\n        action_space: gym.Space,\n        seed: Optional[int] = None,\n        deterministic_cudnn: bool = False,\n        *args,\n        **kwargs\n    ) -> None:\n        self.env_args = env_args\n        self.scenes = scenes\n        self.object_types = object_types\n        self.grid_size = 0.25\n        self.env: Optional[IThorEnvironment] = None\n        self.sensors = sensors\n        self.max_steps = max_steps\n        self._action_sapce = action_space\n\n        self.scene_id: Optional[int] = None\n\n        self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None\n\n        set_seed(seed)\n\n        self.reset()\n\n    def close(self) -> None:\n        if self.env is not None:\n            self.env.stop()\n\n    def reset(self):\n        self.scene_id = 0\n    \n    def _create_environment(self) -> IThorEnvironment:\n        env = IThorEnvironment(\n            make_agents_visible=False,\n            object_open_speed=0.05,\n            restrict_to_initially_reachable_points=True,\n            **self.env_args,\n        )\n        return env\n```\n\n### Task sampling\n\nFinally, we need to define methods to determine the number of available tasks (possibly infinite) and sample tasks:\n```python\n\n    @property\n    def length(self) -> Union[int, float]:\n        return float(\"inf\")\n\n    @property\n    def total_unique(self) -> Optional[Union[int, float]]:\n        return None\n\n    @property\n    def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]:\n        return self._last_sampled_task\n\n    @property\n    def all_observation_spaces_equal(self) -> bool:\n        return True\n\n    def next_task(self) -> Optional[ObjectNaviThorGridTask]:\n        self.scene_id = random.randint(0, len(self.scenes) - 1)\n        self.scene = self.scenes[self.scene_id]\n\n        if self.env is not None:\n            if scene != self.env.scene_name:\n                self.env.reset(scene)\n        else:\n            self.env = self._create_environment()\n            self.env.reset(scene_name=scene)\n\n        self.env.randomize_agent_location()\n\n        task_info = {\"object_type\": random.sample(self.object_types, 1)}\n\n        self._last_sampled_task = ObjectNaviThorGridTask(\n            env=self.env,\n            sensors=self.sensors,\n            task_info=task_info,\n            max_steps=self.max_steps,\n            action_space=self._action_sapce,\n        )\n        return self._last_sampled_task\n```"
  },
  {
    "path": "docs/howtos/defining-a-new-training-pipeline.md",
    "content": "# Defining a new training pipeline\n\nDefining a new training pipeline, or even new learning algorithms, is straightforward with the modular design in\n`AllenAct`.\n\nA convenience [Builder](/api/allenact/utils/experiment_utils#builder) object allows us to defer the instantiation\nof objects of the class passed as their first argument while allowing passing additional keyword arguments to their\ninitializers.\n\n## On-policy\n\nWe can implement a training pipeline which trains with a single stage using PPO:\n```python\nclass ObjectNavThorPPOExperimentConfig(ExperimentConfig):\n    ...\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        ppo_steps = int(1e6)\n        lr = 2.5e-4\n        num_mini_batch = 2 if not torch.cuda.is_available() else 6\n        update_repeats = 4\n        num_steps = 128\n        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks\n        save_interval = 10000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 1.0\n        max_grad_norm = 0.5\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\n                \"ppo_loss\": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),\n            },\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps,),\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n    ...\n```\n\nAlternatively, we could use a more complex pipeline that includes dataset aggregation\n([DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf)). This requires the existence of an\nexpert (implemented in the task definition) that provides optimal actions to agents. We have implemented such a \npipeline by extending the above configuration as follows:\n```python\nclass ObjectNavThorDaggerThenPPOExperimentConfig(ExperimentConfig):\n    ...\n    SENSORS = [\n        ...\n        ExpertActionSensor(nactions=6), # Notice that we have added\n                                        # an expert action sensor.\n    ]\n    ...\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        dagger_steps = int(1e4) # Much smaller number of steps as we're using imitation learning\n        ppo_steps = int(1e6)\n        lr = 2.5e-4\n        num_mini_batch = 1 if not torch.cuda.is_available() else 6\n        update_repeats = 4\n        num_steps = 128\n        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks\n        save_interval = 10000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 1.0\n        max_grad_norm = 0.5\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\n                \"ppo_loss\": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),\n                \"imitation_loss\": Imitation(), # We add an imitation loss.\n            },\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[ # The pipeline now has two stages, in the first\n                              # we use DAgger (imitation loss + teacher forcing).\n                              # In the second stage we no longer use teacher\n                              # forcing and add in the ppo loss.\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    teacher_forcing=LinearDecay(\n                        startp=1.0, endp=0.0, steps=dagger_steps,\n                    ),\n                    max_stage_steps=dagger_steps,\n                ),\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps,),\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n``` \n\n## Off-policy\n\nWe can also define off-policy stages where an external dataset is used, in this case, for Behavior Cloning: \n\n```python\nclass BCOffPolicyBabyAIGoToLocalExperimentConfig(ExperimentConfig):\n    ...\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = int(1e7)\n        num_steps=128\n        return TrainingPipeline(\n            save_interval=10000,  # Save every 10000 steps (approximately)\n            metric_accumulate_interval=1,\n            optimizer_builder=Builder(optim.Adam, dict(lr=2.5e-4)),\n            num_mini_batch=0,  # no on-policy training\n            update_repeats=0,  # no on-policy training\n            num_steps=num_steps // 4,  # rollouts from environment tasks\n            named_losses={\n                \"offpolicy_expert_ce_loss\": MiniGridOffPolicyExpertCELoss(\n                    total_episodes_in_epoch=int(1e6)  # dataset contains 1M episodes\n                ),\n            },\n            gamma=0.99,\n            use_gae=True,\n            gae_lambda=1.0,\n            max_grad_norm=0.5,\n            advance_scene_rollout_period=None,\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[],  # no on-policy losses\n                    max_stage_steps=total_train_steps,\n                    # We only train from off-policy data:\n                    offpolicy_component=OffPolicyPipelineComponent(\n                        data_iterator_builder=lambda **kwargs: create_minigrid_offpolicy_data_iterator(\n                            path=DATASET_PATH,  # external dataset\n                            nrollouts=128,  # per trainer batch size\n                            rollout_len=num_steps,  # For truncated-BPTT\n                            instr_len=5,\n                            **kwargs,\n                        ),\n                        loss_names=[\"offpolicy_expert_ce_loss\"],  # off-policy losses\n                        updates=16,  # 16 batches per rollout\n                    ),\n                ),\n            ],\n        )\n```\n\nNote that, in this example, `128 / 4 = 32` steps will be sampled from tasks in a MiniGrid environment (which can be\nuseful to track the agent's performance), while a subgraph of the model (in this case the entire Actor) is\ntrained from batches of 128-step truncated episodes sampled from an offline dataset stored under `DATASET_PATH`.\n"
  },
  {
    "path": "docs/howtos/defining-an-experiment.md",
    "content": "# Defining an  experiment\n\nLet's look at an example experiment configuration for an object navigation example with an actor-critic agent observing\nRGB images from the environment and target object classes from the task. This is a simplified example where the \nagent is confined to a single `iTHOR` scene (`FloorPlan1`) and needs to find a single object (a tomato). To see how one\nmight running a \"full\"/\"hard\" version of navigation within AI2-THOR, see our tutorials\n [PointNav in RoboTHOR](../tutorials/training-a-pointnav-model.md) and \n [Swapping in a new environment](../tutorials/transfering-to-a-different-environment-framework.md).\n\nThe interface to be implemented by the experiment specification is defined in\n[allenact.base_abstractions.experiment_config](/api/allenact/base_abstractions/experiment_config#experimentconfig). If you'd\nlike to skip ahead and see the finished configuration, [see here](https://github.com/allenai/allenact/blob/master/projects/tutorials/object_nav_ithor_ppo_one_object.py).\nWe begin by making the following imports:\n\n```python\nfrom math import ceil\nfrom typing import Dict, Any, List, Optional\n\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor, GoalObjectTypeThorSensor\nfrom allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler\nfrom allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask\nfrom projects.objectnav_baselines.models.object_nav_models import (\n ObjectNavBaselineActorCritic,\n)\nfrom allenact.utils.experiment_utils import Builder, PipelineStage, TrainingPipeline, LinearDecay\n```\n\nNow first method to implement is `tag`, which provides a string identifying the experiment:\n\n```python\nclass ObjectNavThorPPOExperimentConfig(ExperimentConfig):\n    ...\n    @classmethod\n    def tag(cls):\n        return \"ObjectNavThorPPO\"\n    ...\n```\n\n## Model creation\n\nNext, `create_model` will be used to instantiate an\n[baseline object navigation actor-critic model](/api/projects/objectnav_baselines/models/object_nav_models#ObjectNavBaselineActorCritic):\n\n```python\nclass ObjectNavThorExperimentConfig(ExperimentConfig):\n    ...\n\n    # A simple setting, train/valid/test are all the same single scene\n    # and we're looking for a single object\n    OBJECT_TYPES = [\"Tomato\"]\n    TRAIN_SCENES = [\"FloorPlan1_physics\"]\n    VALID_SCENES = [\"FloorPlan1_physics\"]\n    TEST_SCENES = [\"FloorPlan1_physics\"]\n\n    # Setting up sensors and basic environment details\n    SCREEN_SIZE = 224\n    SENSORS = [\n        RGBSensorThor(\n            height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True,\n        ),\n        GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),\n    ]\n    \n    ...\n    \n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return ObjectNavBaselineActorCritic(\n            action_space=gym.spaces.Discrete(len(ObjectNaviThorGridTask.class_action_names())),\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            rgb_uuid=cls.SENSORS[0].uuid,\n            depth_uuid=None,\n            goal_sensor_uuid=\"goal_object_type_ind\",\n            hidden_size=512,\n            object_type_embedding_dim=8,\n        )\n    ...\n```\n\n## Training pipeline\n\nWe now implement a training pipeline which trains with a single stage using PPO.\n\nIn the below we use [Builder](/api/allenact/utils/experiment_utils#builder) objects, which allow us to defer the instantiation\nof objects of the class passed as their first argument while allowing passing additional keyword arguments to their\ninitializers. This is necessary when instantiating things like PyTorch optimizers who take as input the list of\nparameters associated with our agent's model (something we can't know until the `create_model` function has been called).\n \n```python\nclass ObjectNavThorPPOExperimentConfig(ExperimentConfig):\n    ...\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        ppo_steps = int(1e6)\n        lr = 2.5e-4\n        num_mini_batch = 2 if not torch.cuda.is_available() else 6\n        update_repeats = 4\n        num_steps = 128\n        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks\n        save_interval = 10000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 1.0\n        max_grad_norm = 0.5\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\n                \"ppo_loss\": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),\n            },\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps,),\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n    ...\n```\n\nAlternatively, we could use a more sophisticated pipeline that begins training with dataset aggregation\n([DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf)) before moving to training\nwith PPO. This requires the existence of an\nexpert (implemented in the task definition) that provides optimal actions to agents. We have implemented such a \npipeline by extending the above configuration as follows\n\n```python\nclass ObjectNavThorDaggerThenPPOExperimentConfig(ObjectNavThorPPOExperimentConfig):\n    ...\n    SENSORS = [\n        RGBSensorThor(\n            height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True,\n        ),\n        GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),\n        ExpertActionSensor(nactions=6), # Notice that we have added an expert action sensor.\n    ]\n    ...\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        dagger_steps = int(1e4) # Much smaller number of steps as we're using imitation learning\n        ppo_steps = int(1e6)\n        lr = 2.5e-4\n        num_mini_batch = 1 if not torch.cuda.is_available() else 6\n        update_repeats = 4\n        num_steps = 128\n        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks\n        save_interval = 10000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 1.0\n        max_grad_norm = 0.5\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\n                \"ppo_loss\": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),\n                \"imitation_loss\": Imitation(), # We add an imitation loss.\n            },\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[ # The pipeline now has two stages, in the first\n                              # we use DAgger (imitation loss + teacher forcing).\n                              # In the second stage we no longer use teacher\n                              # forcing and add in the ppo loss.\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    teacher_forcing=LinearDecay(\n                        startp=1.0, endp=0.0, steps=dagger_steps,\n                    ),\n                    max_stage_steps=dagger_steps,\n                ),\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps,),\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n``` \n\nA version of our experiment config file for which we have implemented this two-stage training\ncan be found [here](https://github.com/allenai/allenact/blob/master/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py).\nThis two-stage configuration `ObjectNavThorDaggerThenPPOExperimentConfig` is actually implemented _as a subclass of `ObjectNavThorPPOExperimentConfig`_.\nThis is a common pattern used in AllenAct and lets one skip a great deal of boilerplate when defining a new\nexperiment as a slight modification of an old one. Of course one must then be careful: changes to the superclass\nconfiguration will propagate to all subclassed configurations. \n\n## Machine configuration\n\nIn `machine_params` we define machine configuration parameters that will be used for training, validation and test:\n```python\nclass ObjectNavThorPPOExperimentConfig(allenact.base_abstractions.experiment_config.ExperimentConfig):\n    ...\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs):\n        num_gpus = torch.cuda.device_count()\n        has_gpu = num_gpus != 0 \n\n        if mode == \"train\":\n            nprocesses = 20 if has_gpu else 4\n            gpu_ids = [0] if has_gpu else []\n        elif mode == \"valid\":\n            nprocesses = 1\n            gpu_ids = [1 % num_gpus] if has_gpu else []\n        elif mode == \"test\":\n            nprocesses = 1\n            gpu_ids = [0] if has_gpu else []\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        return {\"nprocesses\": nprocesses, \"gpu_ids\": gpu_ids}\n    ...\n```\nIn the above we use the availability of cuda (`torch.cuda.device_count() !=  0`) to determine whether\nwe should use parameters appropriate for local machines or for a server. We might optionally add a list of\n`sampler_devices` to assign devices (likely those not used for running our agent) to task sampling workers.\n\n## Task sampling\n\nThe above has defined the model we'd like to use, the types of losses we wish to use during training,\nand the machine specific parameters that should be used during training. Critically we have not yet\ndefined which task we wish to train our agent to complete. This is done by implementing the \n`ExperimentConfig.make_sampler_fn` function\n```python\nclass ObjectNavThorPPOExperimentConfig(ExperimentConfig):\n    ...\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return ObjectNavTaskSampler(**kwargs)\n    ...\n```\nNow, before training starts, our trainer will know to generate a collection of task\nsamplers using `make_sampler_fn` for training (and possibly validation or testing).\nThe `kwargs` parameters used in the above function call can be different for each\ntraining process, we implement such differences using the\n`ExperimentConfig.train_task_sampler_args` function\n```python\nclass ObjectNavThorPPOExperimentConfig(ExperimentConfig):\n    ...\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            self.TRAIN_SCENES,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_period\"] = \"manual\"\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        return res\n    ...\n```\nNow training process `i` out of `n` total processes will be instantiated with the parameters\n`ObjectNavThorPPOExperimentConfig.train_task_sampler_args(i, n, ...)`. Similar functions\n (`valid_task_sampler_args` and `test_task_sampler_args`) exist for generating validation\n and test parameters. Note also that with this function we can assign devices to run\n our environment for each worker. See the documentation of `ExperimentConfig` for more information.\n \n\n## Running the experiment\n\nWe are now in the position to run the experiment (with seed 12345) using the command\n```bash\npython main.py object_nav_ithor_ppo_one_object -b projects/tutorials -s 12345\n```\n"
  },
  {
    "path": "docs/howtos/running-a-multi-agent-experiment.md",
    "content": "# To-do\n\n\n"
  },
  {
    "path": "docs/howtos/visualizing-results.md",
    "content": "# To-do\n\n\n"
  },
  {
    "path": "docs/installation/download-datasets.md",
    "content": "# Downloading datasets \n\n**Note:** These instructions assume you have\n[installed the full library](../installation/installation-allenact.md#full-library) and, generally, [installed\nspecific plugin requirements](../installation/installation-allenact.md#plugins-extra-requirements).\n\nThe below provides instructions on how to download datasets necessary for defining the train, validation, and\ntest sets used within the ObjectNav/PointNav tasks in the `iTHOR` and `RoboTHOR` environments.\n\n<!--\nNote that these datasets **do not include** scene assets for the below datasets. For `iTHOR` and `RoboTHOR`\nthese scene assets will be downloaded automatically, for `habitat` please following the instructions\nin [this tutorial](installation-framework.md).\n-->\n\n## Point Navigation (PointNav)\n\n### RoboTHOR\nTo get the PointNav dataset for `RoboTHOR` run the following command:\n```bash\nbash datasets/download_navigation_datasets.sh robothor-pointnav\n```\nThis will download the dataset into `datasets/robothor-pointnav`.\n\n### iTHOR\nTo get the PointNav dataset for `iTHOR` run the following command:\n```bash\nbash datasets/download_navigation_datasets.sh ithor-pointnav\n```\nThis will download the dataset into `datasets/ithor-pointnav`.\n\n## Object Navigation (ObjectNav)\n\n### RoboTHOR\nTo get the ObjectNav dataset for `RoboTHOR` run the following command:\n\n```bash\nbash datasets/download_navigation_datasets.sh robothor-objectnav\n```\nThis will download the dataset into `datasets/robothor-objectnav`.\n\n### iTHOR\nTo get the ObjectNav dataset for `iTHOR` run the following command:\n```bash\nbash datasets/download_navigation_datasets.sh ithor-objectnav\n```\nThis will download the dataset into `datasets/ithor-objectnav`.\n"
  },
  {
    "path": "docs/installation/installation-allenact.md",
    "content": "# Installation of AllenAct\n\n**Note 1:** This library has been tested *only in python 3.6.*/3.7.*. The following assumes you have a working\nversion of *python 3.6/3.7* installed locally. \n\n**Note 2:** If you are installing `allenact` intending to use a GPU for training/inference and your\ncurrent machine uses an older version of CUDA you may need to manually install the version of \nPyTorch that supports your CUDA version. In such a case, after installing the below requirements, you\nshould follow the directions for installing PyTorch with older\nversions of CUDA available on the [PyTorch homepage](https://pytorch.org/).\n\nIn order to install `allenact` and/or its requirements we recommend creating a new\n[python virtual environment](https://docs.python.org/3/tutorial/venv.html) and installing all\nof the below requirements into this virtual environment.\n\nAlternatively, we also document how to [install a conda environment](#installing-a-conda-environment)\nwith all the requirements, which is especially useful if you plan to train models in [Habitat](https://aihabitat.org/).\n\n## Different ways to use `allenact`\n\nThere are three main installation paths depending on how you wish to use `allenact`.\n\n1. You want to use the `allenact` abstractions and training engine for your own task/environment and don't really \ncare about using any of our plugins that offer additional support (in the form of models, sensors, task samplers, etc.)\nfor select tasks/environments like AI2-THOR, Habitat, and MiniGrid.\n    - If this sounds like you, install the [standalone framework](#standalone-framework).\n1. You want to use `allenact` as above but would also like to use some of our additional plugins.\n    - If this sounds like you, install the [framework and plugins](#framework-and-plugins).\n1. You want full access to everything in `allenact` (including all plugins and all of our projects and baselines)\n   and want to have the option to edit the internal implementation of `allenact` to suit your desire. \n    - If this sounds like you, install the [full library](#full-library).   \n\n\n## Standalone framework\n\nYou can install `allenact` easily using pip:\n\n```bash\npip install allenact\n```\n\nIf you'd like to install the latest development version of `allenact` (possibly unstable) directly from GitHub see the\nnext section.\n\n### Bleeding edge pip install\n\nTo install the latest `allenact` framework, you can use\n\n```bash\npip install -e \"git+https://github.com/allenai/allenact.git@main#egg=allenact&subdirectory=allenact\"\n```\n\nand, similarly, you can also use\n\n```bash\npip install -e \"git+https://github.com/allenai/allenact.git@main#egg=allenact_plugins[all]&subdirectory=allenact_plugins\"\n```\n\nto install all plugins.\n\nDepending on your machine configuration, you may need to use `pip3` instead of `pip` in the commands\nabove.\n\n## Framework and plugins\n\nTo install `allenact` and all available plugins, run\n\n```bash\npip install allenact allenact_plugins[all]\n```\n\nwhich will install `allenact` and `allenact_plugins` packages along with the requirements for _all_\nof the plugins (when possible). If you only want to install the requirements for some subset of plugins, you can\nspecify these plugins with the `allenact_plugins[plugin1,plugin2]` notation. For instance, to install requirements\nfor the `ithor_plugin` and the `minigrid_plugin`, simply run:\n\n```bash\npip install allenact allenact_plugins[ithor,minigrid]\n```\n\nA list of all available plugins can be found [here](https://github.com/allenai/allenact/tree/master/allenact_plugins).\n\n## Full library\n\nClone the `allenact` repository to your local machine and move into the top-level directory\n\n```bash\ngit clone git@github.com:allenai/allenact.git\ncd allenact\n```\n\nBelow we describe two alternative ways to install all dependencies via `pip` or `conda`.\n\n### Installing requirements with `pip`\n\nAll requirements for `allenact` (not including plugin requirements) may be installed by running the following command:\n\n```bash\npip install -r requirements.txt; pip install -r dev_requirements.txt\n```\n\nTo install plugin requirements, see below.\n\n#### Plugins extra requirements\n\nTo install the specific requirements of each plugin, we need to additionally call\n\n```bash\npip install -r allenact_plugins/<PLUGIN_NAME>_plugin/extra_requirements.txt\n```\n\nfrom the top-level directory.\n\n### Installing a `conda` environment\n\n_If you are unfamiliar with Conda, please familiarize yourself with their [introductory documentation](https://docs.conda.io/projects/conda/en/latest/).\nIf you have not already, you will need to first [install Conda (i.e. Anaconda or Miniconda)](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)\non your machine. We suggest installing [Miniconda](https://docs.conda.io/projects/conda/en/latest/glossary.html#miniconda-glossary)\nas it's relatively lightweight._\n\nThe `conda` folder contains YAML files specifying [Conda environments](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file)\ncompatible with AllenAct. These environment files include: \n\n* `environment-base.yml` - A base environment file to be used on all machines (it includes\n[PyTorch](https://pytorch.org/) with the latest `cudatoolkit`).\n* `environment-dev.yml` - Additional dev dependencies.\n* `environment-<CUDA_VERSION>.yml` - Additional dependencies, where `<CUDA_VERSION>` is the CUDA version used on your\nmachine (if you are using linux, you might find this version by running `/usr/local/cuda/bin/nvcc --version`).\n* `environment-cpu.yml` - Additional dependencies to be used on machines where GPU support is not needed (everything\n will be run on the CPU).\n \n\nFor the moment let's assume you're using `environment-base.yml` above. To install a conda environment with name `allenact`\n using this file you can simply run the following (*this will take a few minutes*):\n\n```bash\nconda env create --file ./conda/environment-base.yml --name allenact\n``` \nThe above is very simple but has the side effect of creating a new `src` directory where it will\nplace some of AllenAct's dependencies. To get around this, instead of running the above you can instead\nrun the commands:\n\n```bash\nexport MY_ENV_NAME=allenact\nexport CONDA_BASE=\"$(dirname $(dirname \"${CONDA_EXE}\"))\"\nexport PIP_SRC=\"${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc\"\nconda env create --file ./conda/environment-base.yml --name $MY_ENV_NAME\n``` \n\nThese additional commands tell conda to place these dependencies under the `${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc` directory rather\nthan under `src`, this is more in line with where we'd expect dependencies to be placed when running `pip install ...`.\n\nIf needed, you can use one of the `environment-<CUDA_VERSION>.yml` environment files to install the proper version of\nthe `cudatoolkit` by running:\n\n```bash\nconda env update --file ./conda/environment-<CUDA_VERSION>.yml --name allenact\n```\nor the CPU-only version:\n```bash\nconda env update --file ./conda/environment-cpu.yml --name allenact\n```\n\n#### Using the `conda` environment\n\nNow that you've installed the conda environment as above, you can activate it by running:\n\n```bash\nconda activate allenact\n```\n\nafter which you can run everything as you would normally.\n\n\n#### Installing supported environments with `conda`\n\nEach supported plugin contains a YAML environment file that can be applied upon the existing `allenact` environment. To\ninstall the specific requirements of each plugin, we need to additionally call\n\n```bash\nconda env update --file allenact_plugins/<PLUGIN_NAME>_plugin/extra_environment.yml --name $MY_ENV_NAME\n```\n\nfrom the top-level directory.\n\n**Habitat:** Note that, for habitat, we provide two environment types, regarding whether our machine is connected to a\ndisplay. More details can be found [here](../installation/installation-framework.md#installation-of-habitat). \n"
  },
  {
    "path": "docs/installation/installation-framework.md",
    "content": "# Installation of supported environments\n\nIn general, each supported environment can be installed by just following the instructions to\n[install the full library and specific requirements of every plugin](../installation/installation-allenact.md#full-library)\neither [via pip](../installation/installation-allenact.md#installing-requirements-with-pip) or\n[via Conda](../installation/installation-allenact.md#installing-a-conda-environment).\n\nBelow we provide additional installation instructions for a number of environments that we support and\nprovide some guidance for problems commonly experienced when using these environments.\n\n## Installation of iTHOR (`ithor` plugin)\n\nThe first time you will run an experiment with `iTHOR` (or any script that uses `ai2thor`)\nthe library will download all of the assets it requires to render the scenes automatically.\nHowever, the datasets must be manually downloaded as described [here](../installation/download-datasets.md).\n\n**Trying to use `iTHOR` on a machine without an attached display?** \n\n**Note:** These instructions assume you have\n[installed the full library](../installation/installation-allenact.md#full-library).\n\nIf you wish to run `iTHOR` on a machine without an attached display (for instance, a remote server such as an AWS\n machine) you will also need to run a script that launches `xserver` processes on your GPUs. This can be done\n with the following command:\n\n```bash\nsudo python scripts/startx.py &\n```\n\nNotice that you need to run the command with `sudo` (i.e. administrator privileges). If you do not have `sudo` \naccess (for example if you are running this on a shared university machine) you\ncan ask your administrator to run it for you. You only need to run it once (as\nlong as you do not turn off your machine).\n\n## Installation of RoboTHOR (`robothor` plugin)\n\n`RoboTHOR` is installed in the same way as `iTHOR`. For more information see the above section on installing `iTHOR`. \n\n## Installation of Habitat\n\nInstalling habitat requires \n\n1. Installing the `habitat-lab` and `habitat-sim` packages.\n   - This may be done by either following the [directions provided by Habitat themselves](https://github.com/facebookresearch/habitat-lab#installation)\nor by using our `conda` installation instructions below. \n1. Downloading the scene assets (i.e. the Gibson or Matterport scene files) relevant to whichever task you're interested in.\n   - Unfortunately we cannot legally distribute these files to you directly. Instead you will need to download these\n     yourself. See [here](https://github.com/facebookresearch/habitat-lab#Gibson) for how you can download \n     the Gibson files and [here](https://github.com/facebookresearch/habitat-lab#matterport3d) for directions on\n     how to download the Matterport flies.\n1. Downloading the dataset files for the task you're interested in (e.g. PointNav, ObjectNav, etc).\n   - See [here](https://github.com/facebookresearch/habitat-lab#task-datasets) for links to these dataset files.\n \n<!--\n### Using Docker\n\nTo run experiments using Habitat please use our docker image using the following command:\n\n```bash\ndocker pull allenact/allenact:latest\n```\n\nThis container includes the 0.1.0 release of `allenact`, the 0.1.5 release of `habitat` as well\nas the `Gibson` point navigation dataset. This dataset consists of a set of start and goal positions provided by habitat.\nYou then need to launch the container and attach into it:\n\n```bash\ndocker run --runtime=nvidia -it allenact/allenact\n```\nIf you are running the container on a machine without an Nvidia GPU, omit the `--runtime=nvidia` flag.\n\nOnce inside the container simply `cd` into the `allenact` directory where all the allenact and habitat code should be stored:\n \nUnfortunately we cannot legally redistribute the Gibson scenes by including them in the above container.\nInstead you will need to download these yourself by filling out \n[this form](https://docs.google.com/forms/d/e/1FAIpQLScWlx5Z1DM1M-wTSXaa6zV8lTFkPmTHW1LqMsoCBDWsTDjBkQ/viewform)\nand downloading the `gibson_habitat_trainval` data. Extract the scene assets (`.glb` files) into `habitat-lab/data/scene_datasets/` \nwithin the above container. You can then proceed to run your experiments using `allenact` as you normally would.\n-->\n\n### Using `conda`\n\nHabitat has recently released the option to install their simulator using `conda` which avoids having\nto manually build dependencies or use Docker. This does not guarantee that the installation process\nis completely painless (it is difficult to avoid all possible build issues) but we've found it\nto be a nice alternative to using Docker. To use this installation option please first\ninstall an AllenAct `conda` environment using the instructions available [here](../installation/installation-allenact.md#installing-a-conda-environment).\nAfter installing this environment, you can then install `habitat-sim` and `habitat-lab` by running:\n\nIf you are on a machine with an attached display:\n```bash\nexport MY_ENV_NAME=allenact\nexport CONDA_BASE=\"$(dirname $(dirname \"${CONDA_EXE}\"))\"\nexport PIP_SRC=\"${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc\"\nconda env update --file allenact_plugins/habitat_plugin/extra_environment.yml --name $MY_ENV_NAME\n```\n\nIf you are on a machine without an attached display (e.g. a server), replace the last command by:\n```bash\nconda env update --file allenact_plugins/habitat_plugin/extra_environment_headless.yml --name $MY_ENV_NAME\n```\n\nAfter these steps, feel free to proceed to download the required scene assets and task-specific dataset files as\ndescribed above.\n\n<!--\n#### Installing a Conda environment\n\n_If you are unfamiliar with Conda, please familiarize yourself with their [introductory documentation](https://docs.conda.io/projects/conda/en/latest/).\nIf you have not already, you will need to first [install Conda (i.e. Anaconda or Miniconda)](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)\non your machine. We suggest installing [Miniconda](https://docs.conda.io/projects/conda/en/latest/glossary.html#miniconda-glossary)\nas it's relatively lightweight._\n\nClone the `allenact` repository to your local machine and move into the top-level directory\n\n```bash\ngit clone git@github.com:allenai/allenact.git\ncd allenact\n```\n\nThe `conda` folder contains YAML files specifying [Conda environments](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file)\ncompatible with AllenAct. These environment files include: \n\n* `environment-base.yml` - A base environment file to be used on machines where the version of CUDA on your machine\nmatches the one of the latest `cudatoolkit` in conda.\n* `environment-dev.yml` - Additional dev dependencies.\n* `environment-<CUDA_VERSION>.yml` - Additional dependencies, where `<CUDA_VERSION>` is the CUDA version used on your\nmachine (if you are using linux, you might find this version by running `/usr/local/cuda/bin/nvcc --version`).\n* `environment-cpu.yml` - Additional dependencies to be used on machines where GPU support is not needed (everything\n will be run on the CPU).\n \n\nFor the moment let's assume you're using `environment-base.yml` above. To install a conda environment with name `allenact`\n using this file you can simply run the following (*this will take a few minutes*):\n\n```bash\nconda env create --file ./conda/environment-base.yml --name allenact\n``` \nThe above is very simple but has the side effect of creating a new `src` directory where it will\nplace some of AllenAct's dependencies. To get around this, instead of running the above you can instead\nrun the commands:\n\n```bash\nexport MY_ENV_NAME=allenact\nexport CONDA_BASE=\"$(dirname $(dirname \"${CONDA_EXE}\"))\"\nexport PIP_SRC=\"${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc\"\nconda env create --file ./conda/environment-base.yml --name $MY_ENV_NAME\n``` \n\nThese additional commands tell conda to place these dependencies under the `${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc` directory rather\nthan under `src`, this is more in line with where we'd expect dependencies to be placed when running `pip install ...`.\n\nIf needed, you can use one of the `environment-<CUDA_VERSION>.yml` environment files to install the proper version of\nthe `cudatoolkit` by running:\n\n```bash\nconda env update --file ./conda/environment-<CUDA_VERSION>.yml --name allenact\n```\nor the CPU-only version:\n```bash\nconda env update --file ./conda/environment-cpu.yml --name allenact\n```\n\n##### Using the Conda environment\n\nNow that you've installed the conda environment as above, you can activate it by running:\n\n```bash\nconda activate allenact\n```\n\nafter which you can run everything as you would normally.\n-->"
  },
  {
    "path": "docs/javascripts/extra.js",
    "content": "// The below can be used to open all nav links in the documentation, code found at\n// from https://github.com/squidfunk/mkdocs-material/issues/767#issuecomment-384558269\n// from the user Akkadius.\n/*\ndocument.addEventListener(\"DOMContentLoaded\", function() {\n    load_navpane();\n});\n\nfunction load_navpane() {\n    var width = window.innerWidth;\n    if (width <= 1200) {\n        return;\n    }\n\n    var nav = document.getElementsByClassName(\"md-nav\");\n    for (var i = 0; i < nav.length; i++) {\n        if (typeof nav.item(i).style === \"undefined\") {\n            continue;\n        }\n\n        if (nav.item(i).getAttribute(\"data-md-level\") && nav.item(i).getAttribute(\"data-md-component\")) {\n            nav.item(i).style.display = 'block';\n            nav.item(i).style.overflow = 'visible';\n        }\n    }\n\n    var nav = document.getElementsByClassName(\"md-nav__toggle\");\n    for(var i = 0; i < nav.length; i++) {\n       nav.item(i).checked = true;\n    }\n}\n*/"
  },
  {
    "path": "docs/notebooks/firstbook.md",
    "content": "# To-do"
  },
  {
    "path": "docs/projects/advisor_2020/README.md",
    "content": "# Experiments for Advisor\n\n## TODO: \n\n1. Add details taken from https://unnat.github.io/advisor/. \n2. Cite the arxiv paper.\n3. Give a list of things you can run with bash commands.\n4. Ideally be able to recreate a large set of experiments."
  },
  {
    "path": "docs/projects/babyai_baselines/README.md",
    "content": "# Baseline experiments for the BabyAI environment\n\nWe perform a collection of baseline experiments within the BabyAI environment\n on the GoToLocal task, see the `projects/babyai_baselines/experiments/go_to_local` directory.\n For instance, to train a model using PPO, run\n \n```bash\npython main.py go_to_local.ppo --experiment_base projects/babyai_baselines/experiments\n```\n\nNote that these experiments will be quite slow when not using a GPU as the BabyAI model architecture is surprisingly \nlarge. Specifying a GPU (if available) can be done from the command line using hooks we created using \n[gin-config](https://github.com/google/gin-config). E.g. to train using the 0th GPU device, add\n\n```bash\n--gp \"machine_params.gpu_id = 0\"\n```  \n\nto the above command."
  },
  {
    "path": "docs/projects/gym_baselines/README.md",
    "content": "# Baseline models Gym (for MuJoCo environments)\n\nThis project contains the code for training baseline models for the tasks under the [MuJoCo](https://gym.openai.com/envs/#mujoco) group of Gym environments, included [\"Ant-v2\"](https://gym.openai.com/envs/Ant-v2/), [\"HalfCheetah-v2\"](https://gym.openai.com/envs/HalfCheetah-v2/), [\"Hopper-v2\"](https://gym.openai.com/envs/Hopper-v2/), [\"Humanoid-v2\"](https://gym.openai.com/envs/Humanoid-v2/), [\"InvertedDoublePendulum-v2\"](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), [\"InvertedPendulum-v2\"](https://gym.openai.com/envs/InvertedPendulum-v2/), [Reacher-v2](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), [\"Swimmer-v2\"](https://gym.openai.com/envs/Swimmer-v2/), and [Walker2d-v2\"](https://gym.openai.com/envs/Walker2d-v2/).\n\nProvided are experiment configs for training a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](https://allenact.org/api/allenact_plugins/gym_plugin/gym_models/#memorylessactorcritic), with a [Gaussian distribution](https://allenact.org/api/allenact_plugins/gym_plugin/gym_distributions/#gaussiandistr) to sample actions for all continuous-control environments under the `MuJoCo` group of `Gym` environments. \n\nThe experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)\nReinforcement Learning Algorithm. \n\nTo train an experiment run the following command from the `allenact` root directory:\n\n```bash\npython main.py <PATH_TO_EXPERIMENT_CONFIG> -o <PATH_TO_OUTPUT>\n```\n\nWhere `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights\nand logs to be stored and `<PATH_TO_EXPERIMENT_CONFIG>` is the path to the python file containing\nthe experiment configuration. An example usage of this command would be:\n\n```bash\npython main.py projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py -o /YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo\n```\n\nThis trains a lightweight implementation with separate MLPs for actors and critic with a Gaussian distribution to sample actions in the \"Ant-v2\" environment, and stores the model weights and logs\nto `/YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo`.\n\n## Results\n\nIn our experiments, the rewards for MuJoCo environments we obtained after training using PPO are similar to those reported by OpenAI Gym Baselines(1M steps). The Humanoid environment is compared with the original PPO paper where training 50M steps using PPO. Due to the time constraint, we only tested our baseline across two seeds so far. \n\n\n| Environment           | Gym Baseline Reward | Ours Reward |\n| -----------           | ------------------- | ----------- |\n|[Ant-v2](https://gym.openai.com/envs/Ant-v2/)| 1083.2 |1098.6(reached 4719 in 25M steps)  | \n| [HalfCheetah-v2](https://gym.openai.com/envs/HalfCheetah-v2/) | 1795.43             |  1741(reached 4019 in 18M steps)           |\n|[Hopper-v2](https://gym.openai.com/envs/Hopper-v2/)|2316.16|2266|\n|[Humanoid-v2](https://gym.openai.com/envs/Humanoid-v2/)|4000+|4500+(reached 6500 in 70M steps)|\n| [InvertedPendulum-v2](https://gym.openai.com/envs/InvertedPendulum-v2/) | 809.43              |  1000       |\n|[Reacher-v2](https://gym.openai.com/envs/Reacher-v2/)|-6.71|-7.045|\n|[Swimmer-v2](https://gym.openai.com/envs/Swimmer-v2/)|111.19|124.7|\n|[Walker2d](https://gym.openai.com/envs/Walker2d-v2/)|3424.95|2723 in 10M steps|\n"
  },
  {
    "path": "docs/projects/objectnav_baselines/README.md",
    "content": "# Baseline models ObjectNav (for RoboTHOR/iTHOR)\n\nThis project contains the code for training baseline models for the ObjectNav task. In ObjectNav, the agent\nspawns at a location in an environment and is tasked to explore the environment until it finds an object of a\ncertain type (such as TV or Basketball). Once the agent is confident that it has the object within sight\nit executes the `END` action which terminates the episode. If the agent is within a set\ndistance to the target (in our case 1.0 meters) and the target is visible within its observation frame\nthe agent succeeded, otherwise it failed.\n\nProvided are experiment configs for training a simple convolutional model with\nan GRU using `RGB`, `Depth` or `RGB-D` (i.e. `RGB+Depth`) as inputs in\n[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).\n\nThe experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)\nReinforcement Learning Algorithm. For the RoboTHOR environment we also have and experiment\n(`objectnav_robothor_rgb_resnetgru_dagger.py`) showing how a model can be trained using DAgger,\na form of imitation learning.\n\nTo train an experiment run the following command from the `allenact` root directory:\n\n```bash\npython main.py <PATH_TO_EXPERIMENT_CONFIG> -o <PATH_TO_OUTPUT> -c\n```\n\nWhere `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights\nand logs to be stored and `<PATH_TO_EXPERIMENT_CONFIG>` is the path to the python file containing\nthe experiment configuration. An example usage of this command would be:\n\n```bash\npython main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet_ddppo.py -o storage/objectnav-robothor-rgb\n```\n\nThis trains a simple convolutional neural network with a GRU using RGB input \npassed through a pretrained ResNet-18 visual encoder on the\nPointNav task in the RoboTHOR environment and stores the model weights and logs\nto `storage/pointnav-robothor-rgb`.\n\n## RoboTHOR ObjectNav 2021 Challenge\n\nThe experiment configs found under the `projects/objectnav_baselines/experiments/robothor` directory are designed\nto conform to the requirements of the [RoboTHOR ObjectNav 2021 Challenge](https://ai2thor.allenai.org/robothor/cvpr-2021-challenge).\n\n### Training a baseline\nTo train a baseline ResNet->GRU model taking RGB-D inputs, run the following command\n```bash\npython main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet_ddppo.py -o storage/objectnav-robothor-rgbd\n```\nBy default, when using a machine with a GPU, the above experiment will attempt to train using 60 parallel processes\nacross all available GPUs. See the `TRAIN_GPU_IDS` constant in `experiments/objectnav_thor_base.py` and\nthe `NUM_PROCESSES` constant in `experiments/robothor/objectnav_robothor_base.py` if you'd like to change which\nGPUs are used or how many processes are run respectively.\n\n### Downloading our pretrained model checkpoint\nWe provide a pretrained model obtained allowing the above command to run for all 300M training steps and then selecting\nthe model checkpoint with best validation-set performance (for us occuring at ~170M training steps). You can download \nthis model checkpoint by running\n```bash\nbash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021\n```\nfrom the top-level directory. This will download the pretrained model weights and save them at the path\n```bash\npretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt\n```\n\n### Running inference on the pretrained model\n\nYou can run inference on the above pretrained model (on the test dataset) by running\n```bash\nexport SAVED_MODEL_PATH=pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt\npython main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.py -c $SAVED_MODEL_PATH --eval\n```\nTo discourage \"cheating\", the test dataset has been scrubbed of the information needed to actually compute the success rate / SPL\nof your model and so running the above will only save the trajectories your models take. To evaluate these\ntrajectories you will have to submit them to our leaderboard, see [here for more details](https://github.com/allenai/robothor-challenge/).\nIf you'd like to get a sense of if your model is doing well before submitting to the leaderboard, you can obtain the \nsuccess rate / SPL of it on our validation dataset. To do this, you can simply comment-out the line\n```python\n    TEST_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-objectnav/test\")\n```\nwithin the `projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py` file and rerun the above\n`python main.py ...` command (when the test dataset is not given, the code defaults to using the validation set)."
  },
  {
    "path": "docs/projects/pointnav_baselines/README.md",
    "content": "# Baseline models for the Point Navigation task in the Habitat, RoboTHOR and iTHOR environments\n\nThis project contains the code for training baseline models on the PointNav task. In this setting the agent\nspawns at a location in an environment and is tasked to move to another location. The agent is given a \"compass\"\nthat tells it the distance and bearing to the target position at every frame. Once the agent is confident that\nit has reached the end it executes the `END` action which terminates the episode. If the agent is within a set\ndistance to the target (in our case 0.2 meters) the agent succeeded, else it failed.\n\nProvided are experiment configs for training a simple convolutional model with\nan GRU using `RGB`, `Depth` or `RGBD` as inputs in [Habitat](https://github.com/facebookresearch/habitat-sim), \n[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).\n\nThe experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)\nReinforcement Learning Algorithm.\n\nTo train an experiment run the following command from the `allenact` root directory:\n\n```bash\npython main.py -o <PATH_TO_OUTPUT> -c -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> <EXPERIMENT_NAME>\n```\n\nWhere `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights\nand logs to be stored, `<BASE_DIRECTORY_OF_YOUR_EXPERIMENT>` is the directory where our\nexperiment file is located and `<EXPERIMENT_NAME>` is the name of the python module containing\nthe experiment. An example usage of this command would be:\n\n```bash\npython main.py -o storage/pointnav-robothor-depth -b projects/pointnav_baselines/experiments/robothor/ pointnav_robothor_depth_simpleconvgru_ddppo\n```\n\nThis trains a simple convolutional neural network with a GRU using Depth input on the\nPointNav task in the RoboTHOR environment and stores the model weights and logs\nto `storage/pointnav-robothor-rgb`.\n"
  },
  {
    "path": "docs/projects/two_body_problem_2019/README.md",
    "content": "# Experiments for the Two Body Problem paper\n\n## TODO: \n\n1. Add details taken from https://prior.allenai.org/projects/two-body-problem \n2. Cite the CVPR paper.\n3. Give a list of things you can run with bash commands.\n4. At least a subset of the experiments."
  },
  {
    "path": "docs/tutorials/distributed-objectnav-tutorial.md",
    "content": "<!-- DO NOT EDIT THIS FILE. --> \n<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/distributed_objectnav_tutorial.py', EDIT IT INSTEAD. -->\n\n# Tutorial: Distributed training across multiple nodes.\n**Note** The provided commands to execute in this tutorial assume include a configuration script to\n[clone the full library](../installation/installation-allenact.md#full-library). Setting up headless THOR might\nrequire superuser privileges. We also assume [NCCL](https://developer.nvidia.com/nccl) is available for communication\nacross computation nodes and all nodes have a running `ssh` server.\n\nThe below introduced experimental tools and commands for distributed training assume a Linux OS (tested on Ubuntu\n18.04).\n\nIn this tutorial, we:\n\n1. Introduce the available API for training across multiple nodes, as well as experimental scripts for distributed\nconfiguration, training start and termination, and remote command execution.\n1. Introduce the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenAct`. Note that, in contrast with\nprevious tutorials using AI2-THOR, this time we don't require an xserver (in Linux) to be active.\n1. Show a training example for RoboTHOR ObjectNav on a cluster, with each node having sufficient GPUs and GPU memory to\nhost 60 experience samplers collecting rollout data.\n\nThanks to the massive parallelization of experience collection and model training enabled by\n[DD-PPO](https://arxiv.org/abs/1911.00357), we can greatly speed up training by scaling across multiple nodes:\n\n![training speedup](../img/multinode_training.jpg)\n\n## The task: ObjectNav\n\nIn ObjectNav, the goal for the agent is to navigate to an object (possibly unseen during training) of a known given\nclass and signal task completion when it determines it has reached the goal.\n\n\n## Implementation\n\nFor this tutorial, we'll use the readily available `objectnav_baselines` project, which includes configurations for\na wide variety of object navigation experiments for both iTHOR and RoboTHOR. Since those configuration files are\ndefined for a single-node setup, we will mainly focus on the changes required in the `machine_params` and\n`training_pipeline` methods.\n\nNote that, in order to use the headless version of AI2-THOR, we currently need to install a specific THOR commit,\ndifferent from the default one in `robothor_plugin`. Note that this command is included in the configuration script\nbelow, so **we don't need to run this**:\n\n```bash\npip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48\n```\n\nThe experiment config starts as follows:\n\n```python\nimport math\nfrom typing import Optional, Sequence\n\nimport torch\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    LinearDecay,\n    MultiLinearDecay,\n    TrainingPipeline,\n    PipelineStage,\n)\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_rgb_resnet18gru_ddppo import (\n    ObjectNavRoboThorRGBPPOExperimentConfig as BaseConfig,\n)\n\n\nclass DistributedObjectNavRoboThorRGBPPOExperimentConfig(BaseConfig):\n    def tag(self) -> str:\n        return \"DistributedObjectNavRoboThorRGBPPO\"\n```\nWe override ObjectNavRoboThorBaseConfig's THOR_COMMIT_ID to match the installed headless one:\n```python\n    THOR_COMMIT_ID = \"91139c909576f3bf95a187c5b02c6fd455d06b48\"\n```\nAlso indicate that we're using headless THOR (for `task_sampler_args` methods):\n```python\n    THOR_IS_HEADLESS = True\n```\n**Temporary hack** Disable the `commit_id` argument passed to the THOR `Controller`'s `init` method:\n```python\n    def env_args(self):\n        res = super().env_args()\n        res.pop(\"commit_id\", None)\n        return res\n```\nAnd, of course, define the number of nodes. This will be used by `machine_params` and `training_pipeline` below.\nWe override the existing `ExperimentConfig`'s `init` method to include control on the number of nodes:\n\n```python\n    def __init__(\n        self,\n        distributed_nodes: int = 1,\n        num_train_processes: Optional[int] = None,\n        train_gpu_ids: Optional[Sequence[int]] = None,\n        val_gpu_ids: Optional[Sequence[int]] = None,\n        test_gpu_ids: Optional[Sequence[int]] = None,\n    ):\n        super().__init__(\n            num_train_processes=num_train_processes,\n            train_gpu_ids=train_gpu_ids,\n            val_gpu_ids=val_gpu_ids,\n            test_gpu_ids=test_gpu_ids,\n        )\n        self.distributed_nodes = distributed_nodes\n```\n### Machine parameters\n\n**Note:** We assume that all nodes are identical (same number and model of GPUs and drivers).\n\nThe `machine_params` method will be invoked by `runner.py` with different arguments, e.g. to determine the\nconfiguration for validation or training.\n\nWhen working in distributed settings, `AllenAct` needs to know the total number of trainers across all nodes as well\nas the local number of trainers. This is accomplished through the introduction of a `machine_id` keyword argument,\nwhich will be used to define the training parameters as follows:\n\n```python\n    def machine_params(self, mode=\"train\", **kwargs):\n        params = super().machine_params(mode, **kwargs)\n\n        if mode == \"train\":\n            params.devices = params.devices * self.distributed_nodes\n            params.nprocesses = params.nprocesses * self.distributed_nodes\n            params.sampler_devices = params.sampler_devices * self.distributed_nodes\n\n            if \"machine_id\" in kwargs:\n                machine_id = kwargs[\"machine_id\"]\n                assert (\n                    0 <= machine_id < self.distributed_nodes\n                ), f\"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]\"\n\n                local_worker_ids = list(\n                    range(\n                        len(self.train_gpu_ids) * machine_id,\n                        len(self.train_gpu_ids) * (machine_id + 1),\n                    )\n                )\n\n                params.set_local_worker_ids(local_worker_ids)\n\n            # Confirm we're setting up train params nicely:\n            print(\n                f\"devices {params.devices}\"\n                f\"\\nnprocesses {params.nprocesses}\"\n                f\"\\nsampler_devices {params.sampler_devices}\"\n                f\"\\nlocal_worker_ids {params.local_worker_ids}\"\n            )\n        elif mode == \"valid\":\n            # Use all GPUs at their maximum capacity for training\n            # (you may run validation in a separate machine)\n            params.nprocesses = (0,)\n\n        return params\n```\nIn summary, we need to specify which indices in `devices`, `nprocesses` and `sampler_devices` correspond to the\nlocal `machine_id` node (whenever a `machine_id` is given as a keyword argument), otherwise we specify the global\nconfiguration.\n\n### Training pipeline\n\nIn preliminary ObjectNav experiments, we observe that small batches are useful during the initial training steps in\nterms of sample efficiency, whereas large batches are preferred during the rest of training.\n\nIn order to scale to the larger amount of collected data in multi-node settings, we will proceed with a two-stage\npipeline:\n\n1. In the first stage, we'll enforce a number of updates per amount of collected data similar to the\nconfiguration with a single node by enforcing more batches per rollout (for about 30 million steps).\n1. In the second stage we'll switch to a configuration with larger learning rate and batch size to be\nused up to the grand total of 300 million experience steps.\n\nWe first define a helper method to generate a learning rate curve with decay for each stage:\n\n```python\n    @staticmethod\n    def lr_scheduler(small_batch_steps, transition_steps, ppo_steps, lr_scaling):\n        safe_small_batch_steps = int(small_batch_steps * 1.02)\n        large_batch_and_lr_steps = ppo_steps - safe_small_batch_steps - transition_steps\n\n        # Learning rate after small batch steps (assuming decay to 0)\n        break1 = 1.0 - safe_small_batch_steps / ppo_steps\n\n        # Initial learning rate for large batch (after transition from initial to large learning rate)\n        break2 = lr_scaling * (\n            1.0 - (safe_small_batch_steps + transition_steps) / ppo_steps\n        )\n        return MultiLinearDecay(\n            [\n                # Base learning rate phase for small batch (with linear decay towards 0)\n                LinearDecay(steps=safe_small_batch_steps, startp=1.0, endp=break1,),\n                # Allow the optimizer to adapt its statistics to the changes with a larger learning rate\n                LinearDecay(steps=transition_steps, startp=break1, endp=break2,),\n                # Scaled learning rate phase for large batch (with linear decay towards 0)\n                LinearDecay(steps=large_batch_and_lr_steps, startp=break2, endp=0,),\n            ]\n        )\n```\nThe training pipeline looks like:\n\n```python\n    def training_pipeline(self, **kwargs):\n        # These params are identical to the baseline configuration for 60 samplers (1 machine)\n        ppo_steps = int(300e6)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 4\n        num_steps = 128\n        save_interval = 5000000\n        log_interval = 10000 if torch.cuda.is_available() else 1\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n\n        # We add 30 million steps for small batch learning\n        small_batch_steps = int(30e6)\n        # And a short transition phase towards large learning rate\n        # (see comment in the `lr_scheduler` helper method\n        transition_steps = int(2 / 3 * self.distributed_nodes * 1e6)\n\n        # Find exact number of samplers per GPU\n        assert (\n            self.num_train_processes % len(self.train_gpu_ids) == 0\n        ), \"Expected uniform number of samplers per GPU\"\n        samplers_per_gpu = self.num_train_processes // len(self.train_gpu_ids)\n\n        # Multiply num_mini_batch by the largest divisor of\n        # samplers_per_gpu to keep all batches of same size:\n        num_mini_batch_multiplier = [\n            i\n            for i in reversed(\n                range(1, min(samplers_per_gpu // 2, self.distributed_nodes) + 1)\n            )\n            if samplers_per_gpu % i == 0\n        ][0]\n\n        # Multiply update_repeats so that the product of this factor and\n        # num_mini_batch_multiplier is >= self.distributed_nodes:\n        update_repeats_multiplier = int(\n            math.ceil(self.distributed_nodes / num_mini_batch_multiplier)\n        )\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\"ppo_loss\": PPO(**PPOConfig, show_ratios=False)},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                # We increase the number of batches for the first stage to reach an\n                # equivalent number of updates per collected rollout data as in the\n                # 1 node/60 samplers setting\n                PipelineStage(\n                    loss_names=[\"ppo_loss\"],\n                    max_stage_steps=small_batch_steps,\n                    num_mini_batch=num_mini_batch * num_mini_batch_multiplier,\n                    update_repeats=update_repeats * update_repeats_multiplier,\n                ),\n                # The we proceed with the base configuration (leading to larger\n                # batches due to the increased number of samplers)\n                PipelineStage(\n                    loss_names=[\"ppo_loss\"],\n                    max_stage_steps=ppo_steps - small_batch_steps,\n                ),\n            ],\n            # We use the MultiLinearDecay curve defined by the helper function,\n            # setting the learning rate scaling as the square root of the number\n            # of nodes. Linear scaling might also works, but we leave that\n            # check to the reader.\n            lr_scheduler_builder=Builder(\n                LambdaLR,\n                {\n                    \"lr_lambda\": self.lr_scheduler(\n                        small_batch_steps=small_batch_steps,\n                        transition_steps=transition_steps,\n                        ppo_steps=ppo_steps,\n                        lr_scaling=math.sqrt(self.distributed_nodes),\n                    )\n                },\n            ),\n        )\n```\n## Multi-node configuration\n\n**Note:** In the following, we'll assume you don't have an available setup for distributed execution, such as\n[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup and run\ndistributed processes, we encourage you to use that. The experimental distributed tools included here are intended for\na rather basic usage pattern that might not suit your needs.\n\nIf we haven't set up AllenAct with the headless version of AI2-THOR in our nodes, we can define a configuration script\nsimilar to:\n\n```bash\n#!/bin/bash\n\n# Prepare a virtualenv for allenact\nsudo apt-get install -y python3-venv\npython3 -mvenv ~/allenact_venv\nsource ~/allenact_venv/bin/activate\npip install -U pip wheel\n\n# Install AllenAct\ncd ~\ngit clone https://github.com/allenai/allenact.git\ncd allenact\n\n# Install AllenaAct + RoboTHOR plugin dependencies\npip install -r requirements.txt\npip install -r allenact_plugins/robothor_plugin/extra_requirements.txt\n\n# Download + setup datasets\nbash datasets/download_navigation_datasets.sh robothor-objectnav\n\n# Install headless AI2-THOR and required libvulkan1\nsudo apt-get install -y libvulkan1\npip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48\n\n# Download AI2-THOR binaries\npython -c \"from ai2thor.controller import Controller; c=Controller(); c.stop()\"\n\necho DONE\n```\n\nand save it as `headless_robothor_config.sh`. Note that some of the configuration steps in the script assume you have\nsuperuser privileges.\n\nThen, we can just copy this file to the first node in our cluster and run it with:\n\n```bash\nsource <PATH/TO/headless_robothor_config.sh>\n```\n\nIf everything went well, we should be able to\n\n```bash\ncd ~/allenact && source ~/allenact_venv/bin/activate\n```\n\nNote that we might need to install `libvulkan1` in each node (even if the AllenAct setup is shared across nodes) if it\nis not already available.\n\n### Local filesystems\n\nIf our cluster does not use a shared filesystem, we'll need to propagate the setup to the rest of nodes. Assuming\nwe can just `ssh` with the current user to all nodes, we can propagate our config with\n\n```bash\nscripts/dconfig.py --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \\\n--config_script <PATH/TO/headless_robothor_config.sh>\n```\n\nand we can check the state of the installation with the `scripts/dcommand.py` tool:\n\n```bash\nscripts/dcommand.py --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \\\n--command 'tail -n 5 ~/log_allenact_distributed_config'\n```\n\nIf everything went fine, all requirements are ready to start running our experiment.\n\n## Run your experiment\n\n**Note:** In this section, we again assume you don't have an available setup for distributed execution, such as\n[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup/run\ndistributed processes, we encourage you to use that. The experimental distributed tools included here are intended for\na rather basic usage pattern that might not suit your needs.\n\nOur experimental extension to AllenAct's `main.py` script allows using practically identical commands to the ones\nused in a single-node setup to start our experiments. From the root `allenact` directory, we can simply invoke\n\n```bash\nscripts/dmain.py projects/tutorials/distributed_objectnav_tutorial.py \\\n--config_kwargs '{\"distributed_nodes\":3}' \\\n--runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \\\n--env_activate_path ~/allenact_venv/bin/activate \\\n--allenact_path ~/allenact \\\n--distributed_ip_and_port <FIRST_IP_ADDRESS_IN_RUNS_ON_LIST>:<FREE_PORT_NUMBER_FOR_THIS_IP_ADDRESS>\n```\n\nThis script will do several things for you, including synchronization of the changes in the `allenact` directory\nto all machines, enabling virtual environments in each node, sharing the same random seed for all `main.py` instances,\nassigning `--machine_id` parameters required for multi-node training, and redirecting the process output to a log file\nunder the output results folder.\n\nNote that by changing the value associated with the `distributed_nodes` key in the `config_kwargs` map and the `runs_on`\nlist of IPs, we can easily scale our training to e.g. 1, 3, or 8 nodes as shown in the chart above. Note that for this\ncall to work unmodified, you should have sufficient GPUs/GPU memory to host 60 samplers per node.\n\n## Track and stop your experiment\n\nYou might have noticed that, when your experiment started with the above command, a file was created under\n`~/.allenact`. This file includes IP addresses and screen session IDs for all nodes. It can be used\nby the already introduced `scripts/dcommand.py` script, if we omit the `--runs_on` argument, to call a command on each\nnode via ssh; but most importantly it is used by the `scripts/dkill.py` script to terminate all screen sessions hosting\nour training processes.\n\n### Experiment tracking\n\nA simple way to check all machines are training, assuming you have `nvidia-smi` installed in all nodes, is to just call\n\n```bash\nscripts/dcommand.py\n```\n\nfrom the root `allenact` directory. If everything is working well, the GPU usage stats from `nvidia-smi` should reflect\nongoing activity. You can also add different commands to be executed by each node. It is of course also possible to run\ntensorboard on any of the nodes, if that's your preference.\n\n### Experiment termination\n\nJust call\n\n```bash\nscripts/dkill.py\n```\n\nAfter killing all involved screen sessions, you will be asked about whether you also want to delete the \"killfile\"\nstored under the `~/.allenact` directory (which might be your preferred option once all processes are terminated).\n\nWe hope this tutorial will help you start quickly testing new ideas! Even if we've only explored moderates settings of\nup to 480 experience samplers, you might want to consider some additional changes (like the\n[choice for the optimizer](https://arxiv.org/abs/2103.07013)) if you plan to run at larger scale.\n\n"
  },
  {
    "path": "docs/tutorials/gym-mujoco-tutorial.md",
    "content": "<!-- DO NOT EDIT THIS FILE. --> \n<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/gym_mujoco_tutorial.py', EDIT IT INSTEAD. -->\n\n# Tutorial: OpenAI gym MuJoCo environment.\n**Note** The provided commands to execute in this tutorial assume you have\n[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the\n`gym_plugin`. The latter can be installed by\n\n```bash\npip install -r allenact_plugins/gym_plugin/extra_requirements.txt\n```\n\nThe environments for this tutorial use [MuJoCo](http://www.mujoco.org/)(**Mu**lti-**Jo**int dynamics in **Co**ntact)\nphysics simulator, which is also required to be installed properly with instructions\n[here](https://github.com/openai/mujoco-py).\n\n## The task\n\nFor this tutorial, we'll focus on one of the continuous-control environments under the `mujoco` group of `gym`\nenvironments: [Ant-v2](https://gym.openai.com/envs/Ant-v2/). In this task, the goal\nis to make a four-legged creature, \"ant\", walk forward as fast as possible. A random agent of \"Ant-v2\" is shown below.\n\n![The Ant-v2 task](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_random.gif).\n\nTo achieve the goal, we need to provide continuous control for the agent moving forward with four legs with the\n`x` velocity as high as possible for at most 1000 episodes steps. The agent is failed, or done, if the `z` position\nis out of the range [0.2, 1.0]. The dimension of the action space is 8 and 111 for the dimension of the observation\nspace that maps to different body parts, including 3D position `(x,y,z)`, orientation(quaternion `x`,`y`,`z`,`w`)\nof the torso, and the joint angles, 3D velocity `(x,y,z)`, 3D angular velocity `(x,y,z)`, and joint velocities.\nThe rewards for the agent \"ant\" are composed of the forward rewards, healthy rewards, control cost, and contact cost.\n\n## Implementation\n\nFor this tutorial, we'll use the readily available `gym_plugin`, which includes a\n[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a\n[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and\n[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a\n[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`\nenvironment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).\nThe experiment config, similar to the one used for the\n[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:\n\n```python\nfrom typing import Dict, Optional, List, Any, cast\n\nimport gym\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO\n\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\nfrom allenact.utils.viz_utils import VizSuite, AgentViewViz\n\n\nclass HandManipulateTutorialExperimentConfig(ExperimentConfig):\n    @classmethod\n    def tag(cls) -> str:\n        return \"GymMuJoCoTutorial\"\n```\n### Sensors and Model\n\nAs mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide\nfull observations from the state of the `gym` environment to our model.\n\n```python\n    SENSORS = [\n        GymMuJoCoSensor(\"Ant-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n```\nWe define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,\n[MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since\nthis is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`\ninstead of `ActorCriticModel[CategoricalDistr]`, since we'll use a\n[Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.\n\n```python\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=gym.spaces.Box(\n                -3.0, 3.0, (8,), \"float32\"\n            ),  # 8 actors, each in the range [-3.0, 3.0]\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n```\n### Task samplers\nWe use an available `TaskSampler` implementation for `gym` environments that allows to sample\n[GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):\n[GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task\nsampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created\nabove, which contain a custom identifier for the actual observation space (`gym_mujoco_data`) also used by the model.\n\n```python\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"Ant-v2\", **kwargs)\n```\nFor convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three\nmodes, `train, valid, test`:\n\n```python\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"train\", seeds=seeds\n        )\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"valid\", seeds=seeds\n        )\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"test\", seeds=seeds)\n```\nSimilarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,\nduring testing (or validation), we sample a fixed number of tasks.\n\n```python\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"Ant-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n```\nNote that we just sample 4 tasks for validation and testing in this case, which suffice to illustrate the model's\nsuccess.\n\n### Machine parameters\n\nIn this tutorial, we just train the model on the CPU. We allocate a larger number of samplers for training (8) than\nfor validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also\ninclude a video visualizer (`AgentViewViz`) in test mode.\n\n```python\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        visualizer = None\n        if mode == \"test\":\n            visualizer = VizSuite(\n                mode=mode,\n                video_viz=AgentViewViz(\n                    label=\"episode_vid\",\n                    max_clip_length=400,\n                    vector_task_source=(\"render\", {\"mode\": \"rgb_array\"}),\n                    fps=30,\n                ),\n            )\n        return {\n            \"nprocesses\": 8 if mode == \"train\" else 1,  # rollout\n            \"devices\": [],\n            \"visualizer\": visualizer,\n        }\n```\n### Training pipeline\n\nThe last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate\nand 10 single-batch update repeats per rollout. The reward should exceed 4,000\nin 20M steps in the test. In order to make the \"ant\" run with an obvious fast speed, we train the agents using PPO\nwith 3e7 steps.\n\n```python\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        lr = 3e-4\n        ppo_steps = int(3e7)\n        clip_param = 0.2\n        value_loss_coef = 0.5\n        entropy_coef = 0.0\n        num_mini_batch = 4  # optimal 64\n        update_repeats = 10\n        max_grad_norm = 0.5\n        num_steps = 2048\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        advance_scene_rollout_period = None\n        save_interval = 200000\n        metric_accumulate_interval = 50000\n        return TrainingPipeline(\n            named_losses=dict(\n                ppo_loss=PPO(\n                    clip_param=clip_param,\n                    value_loss_coef=value_loss_coef,\n                    entropy_coef=entropy_coef,\n                ),\n            ),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps),\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=advance_scene_rollout_period,\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps, startp=1, endp=0)},\n            ),\n        )\n```\n## Training and validation\n\nWe have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_mujoco_tutorial.py`.\nTo start training from scratch, we just need to invoke\n\n```bash\nPYTHONPATH=. python allenact/main.py gym_mujoco_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_mujoco_output -s 0 -e\n```\n\nfrom the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the\n[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.\n\nIf we have Tensorboard installed, we can track progress with\n```bash\ntensorboard --logdir /PATH/TO/gym_mujoco_output\n```\nwhich will default to the URL [http://localhost:6006/](http://localhost:6006/).\n\nAfter 30,000,000 steps, the script will terminate. If everything went well, the `valid` success rate should be 1\nand the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a\nlittle below 1,000.\n\n## Testing\n\nThe training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the\nsubfolders in the path to the checkpoints, saved under the output folder.\nIn order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the\ndirectory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:\n```bash\nPYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \\\n-b projects/tutorials \\\n-m 1 \\\n-o /PATH/TO/gym_mujoco_output \\\n-s 0 \\\n-e \\\n--eval \\\n--checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE\n```\n\nIf everything went well, the `test` success rate should converge to 1, the `test` success rate should be 1\nand the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a\nlittle below 1,000. The `gif` results can be seen in the image tab of Tensorboard while testing.\nThe output should be something like this:\n\n![results](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.png).\n\nAnd the `gif` results can be seen in the image tab of Tensorboard while testing.\n\n![mp4 demo](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.gif)\n\nIf the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to \"None\"`, e.g. when running\nremotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display\navailable:\n\n```bash\nDISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \\\n-b projects/tutorials \\\n-m 1 \\\n-o /PATH/TO/gym_mujoco_output \\\n-s 0 \\\n-e \\\n--eval \\\n--checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE\n```\n\n"
  },
  {
    "path": "docs/tutorials/gym-tutorial.md",
    "content": "<!-- DO NOT EDIT THIS FILE. --> \n<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/gym_tutorial.py', EDIT IT INSTEAD. -->\n\n# Tutorial: OpenAI gym for continuous control.\n**Note** The provided commands to execute in this tutorial assume you have\n[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the\n`gym_plugin`. The latter can be installed by\n\n```bash\npip install -r allenact_plugins/gym_plugin/extra_requirements.txt\n```\n\nIn this tutorial, we:\n\n1. Introduce the `gym_plugin`, which enables some of the tasks in [OpenAI's gym](https://gym.openai.com/) for training\nand inference within AllenAct.\n1. Show an example of continuous control with an arbitrary action space covering 2 policies for one of the `gym` tasks.\n\n\n## The task\n\nFor this tutorial, we'll focus on one of the continuous-control environments under the `Box2D` group of `gym`\nenvironments: [LunarLanderContinuous-v2](https://gym.openai.com/envs/LunarLanderContinuous-v2/). In this task, the goal\nis to smoothly land a lunar module in a landing pad, as shown below.\n\n![The LunarLanderContinuous-v2 task](../img/lunar_lander_continuous_demo.png).\n\nTo achieve this goal, we need to provide continuous control for a main engine and directional one (2 real values). In\norder to solve the task, the expected reward is of at least 200 points. The controls for main and directional engines\nare both in the range [-1.0, 1.0] and the observation space is composed of 8 scalars indicating `x` and `y` positions,\n`x` and `y` velocities, lander angle and angular velocity, and left and right ground contact. Note that these 8 scalars\nprovide a full observation of the state.\n\n\n## Implementation\n\nFor this tutorial, we'll use the readily available `gym_plugin`, which includes a\n[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a\n[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and\n[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a\n[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`\nenvironment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).\n\nThe experiment config, similar to the one used for the\n[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:\n\n```python\nfrom typing import Dict, Optional, List, Any, cast\n\nimport gym\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\nfrom allenact.utils.viz_utils import VizSuite, AgentViewViz\n\n\nclass GymTutorialExperimentConfig(ExperimentConfig):\n    @classmethod\n    def tag(cls) -> str:\n        return \"GymTutorial\"\n```\n### Sensors and Model\n\nAs mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide\nfull observations from the state of the `gym` environment to our model.\n\n```python\n    SENSORS = [\n        GymBox2DSensor(\"LunarLanderContinuous-v2\", uuid=\"gym_box_data\"),\n    ]\n```\nWe define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,\n[MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since\nthis is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`\ninstead of `ActorCriticModel[CategoricalDistr]`, since we'll use a\n[Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.\n\n```python\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return MemorylessActorCritic(\n            input_uuid=\"gym_box_data\",\n            action_space=gym.spaces.Box(\n                -1.0, 1.0, (2,)\n            ),  # 2 actors, each in the range [-1.0, 1.0]\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n```\n### Task samplers\nWe use an available `TaskSampler` implementation for `gym` environments that allows to sample\n[GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):\n[GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task\nsampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created\nabove, which contain a custom identifier for the actual observation space (`gym_box_data`) also used by the model.\n\n```python\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(**kwargs)\n```\nFor convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three\nmodes, `train, valid, test`:\n\n```python\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"train\", seeds=seeds\n        )\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"valid\", seeds=seeds\n        )\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"test\", seeds=seeds)\n```\nSimilarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,\nduring testing (or validation), we sample a fixed number of tasks.\n\n```python\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 3\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"LunarLanderContinuous-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n```\nNote that we just sample 3 tasks for validation and testing in this case, which suffice to illustrate the model's\nsuccess.\n\n### Machine parameters\n\nGiven the simplicity of the task and model, we can just train the model on the CPU. During training, success should\nreach 100% in less than 10 minutes, whereas solving the task (evaluation reward > 200) might take about 20 minutes\n(on a laptop CPU).\n\nWe allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to\nCPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode.\n\n```python\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        visualizer = None\n        if mode == \"test\":\n            visualizer = VizSuite(\n                mode=mode,\n                video_viz=AgentViewViz(\n                    label=\"episode_vid\",\n                    max_clip_length=400,\n                    vector_task_source=(\"render\", {\"mode\": \"rgb_array\"}),\n                    fps=30,\n                ),\n            )\n        return {\n            \"nprocesses\": 8 if mode == \"train\" else 1,\n            \"devices\": [],\n            \"visualizer\": visualizer,\n        }\n```\n### Training pipeline\n\nThe last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate\nand 80 single-batch update repeats per rollout:\n\n```python\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        ppo_steps = int(1.2e6)\n        return TrainingPipeline(\n            named_losses=dict(\n                ppo_loss=PPO(clip_param=0.2, value_loss_coef=0.5, entropy_coef=0.0,),\n            ),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps),\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-3)),\n            num_mini_batch=1,\n            update_repeats=80,\n            max_grad_norm=100,\n            num_steps=2000,\n            gamma=0.99,\n            use_gae=False,\n            gae_lambda=0.95,\n            advance_scene_rollout_period=None,\n            save_interval=200000,\n            metric_accumulate_interval=50000,\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)},  # type:ignore\n            ),\n        )\n```\n## Training and validation\n\nWe have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_tutorial.py`.\nTo start training from scratch, we just need to invoke\n\n```bash\nPYTHONPATH=. python allenact/main.py gym_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_output -s 54321 -e\n```\n\nfrom the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the\n[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.\n\nIf we have Tensorboard installed, we can track progress with\n```bash\ntensorboard --logdir /PATH/TO/gym_output\n```\nwhich will default to the URL [http://localhost:6006/](http://localhost:6006/).\n\nAfter 1,200,000 steps, the script will terminate. If everything went well, the `valid` success rate should quickly\nconverge to 1 and the mean reward to above 250, while the average episode length should stay below or near 300.\n\n## Testing\n\nThe training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the\nsubfolders in the path to the checkpoints, saved under the output folder.\nIn order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the\ndirectory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:\n```bash\nPYTHONPATH=. python allenact/main.py gym_tutorial \\\n-b projects/tutorials \\\n-m 1 \\\n-o /PATH/TO/gym_output \\\n-s 54321 \\\n-e \\\n--eval \\\n--checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \\\n--approx_ckpt_step_interval 800000 # Skip some checkpoints\n```\n\nThe option `--approx_ckpt_step_interval 800000` tells AllenAct that we only want to evaluate checkpoints\nwhich were saved every ~800000 steps, this lets us avoid evaluating every saved checkpoint. If everything went well,\nthe `test` success rate should converge to 1, the episode length below or near 300 steps, and the mean reward to above\n250. The images tab in tensorboard will contain videos for the sampled test episodes.\n\n![video_results](../img/lunar_lander_continuous_test.png).\n\nIf the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to \"None\"`, e.g. when running\nremotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display\navailable:\n\n```bash\nDISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_tutorial \\\n-b projects/tutorials \\\n-m 1 \\\n-o /PATH/TO/gym_output \\\n-s 54321 \\\n-e \\\n--eval \\\n--checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \\\n--approx_ckpt_step_interval 800000\n```\n\n"
  },
  {
    "path": "docs/tutorials/index.md",
    "content": "# AllenAct Tutorials\n\n**Note** The provided commands to execute these tutorials assume you have\n[installed the full library](../installation/installation-allenact.md#full-library)\nand the specific requirements for each used plugin.\n\nWe provide several tutorials to help ramp up researchers to the field of Embodied-AI as well as to the AllenAct framework.\n\n## [Navigation in MiniGrid](../tutorials/minigrid-tutorial.md)\n\n![MiniGridEmptyRandom5x5 task example](../img/minigrid_environment.png)\n\nWe train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the [MiniGrid](https://github.com/maximecb/gym-minigrid) environment. \n\nThis tutorial presents:\n\n* Writing an experiment configuration file with a simple training pipeline from scratch.\n* Using one of the supported environments with minimal user effort.\n* Training, validation and testing your experiment from the command line.\n\n[Follow the tutorial here.](../tutorials/minigrid-tutorial.md)\n\n\n## [PointNav in RoboTHOR](../tutorials/training-a-pointnav-model.md)\n\n![RoboTHOR Robot](../img/RoboTHOR_robot.jpg)\n\nWe train an agent on the Point Navigation task within the RoboTHOR Embodied-AI environment.\n\nThis tutorial presents:\n\n* The basics of the Point Navigation task, a common task in Embodied AI\n* Using an external dataset\n* Writing an experiment configuration file with a simple training pipeline from scratch.\n* Use one of the supported environments with minimal user effort.\n* Train, validate and test your experiment from the command line.\n* Testing a pre-trained model\n\n[Follow the tutorial here.](../tutorials/training-a-pointnav-model.md)\n\n\n## [Swapping in a new environment](../tutorials/transfering-to-a-different-environment-framework.md)\n\n![Environment Transfer](../img/env_transfer.jpg)\n\nThis tutorial demonstrates how easy it is modify the experiment config created in the RoboTHOR PointNav tutorial to work with the iTHOR and Habitat environments. \n\n[Follow the tutorial here.](../tutorials/transfering-to-a-different-environment-framework.md)\n\n\n## [Using a pretrained model](../tutorials/running-inference-on-a-pretrained-model.md)\n\n![Pretrained inference](../img/viz_pretrained_2videos.jpg)\n\nThis tutorial shows how to run inference on one or more checkpoints of a pretrained model and generate\nvisualizations of different types.\n\n[Follow the tutorial here.](../tutorials/running-inference-on-a-pretrained-model.md)\n\n\n## [Off-policy training](../tutorials/offpolicy-tutorial.md)\n\nThis tutorial shows how to train an Actor using an off-policy dataset with expert actions.\n\n[Follow the tutorial here.](../tutorials/offpolicy-tutorial.md)\n\n\n## [OpenAI gym for continuous control](../tutorials/gym-tutorial.md)\n\n![gym task example](../img/lunar_lander_continuous_demo.png)\n\nWe train an agent to complete the `LunarLanderContinuous-v2` task from\n[OpenAI gym](https://gym.openai.com/envs/LunarLanderContinuous-v2). \n\nThis tutorial presents:\n\n* A `gym` plugin fopr `AllenAct`. \n* A continuous control example with multiple actors using PPO.\n\n[Follow the tutorial here.](../tutorials/gym-tutorial.md)\n\n\n<!---\n## [OpenAI gym for MuJoCo](../tutorials/gym-mujoco-tutorial.md)\n\nIn this tutorial, we train a four-legged creature,\n\"ant\", to walk forward as fast as possible in [MuJoCo](http://www.mujoco.org/).\n\n[Follow the tutorial here.](../tutorials/gym-mujoco-tutorial.md)\n--->\n\n\n## [Multi-node training for RoboTHOR ObjectNav](../tutorials/distributed-objectnav-tutorial.md)\n\n![training speedup](../img/multinode_training.jpg)\n\nWe train an agent to navigate to an object in a fraction of the time\nrequired for training in one node by distributing training across multiple\nnodes. \n\nThis tutorial presents:\n\n1. The AllenAct API for training across multiple nodes, as well as\nexperimental scripts for distributed configuration, training start\nand termination, and remote command execution.\n2. The introduction of the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenaAct`.\n\n[Follow the tutorial here.](../tutorials/distributed-objectnav-tutorial.md)\n"
  },
  {
    "path": "docs/tutorials/minigrid-tutorial.md",
    "content": "<!-- DO NOT EDIT THIS FILE. --> \n<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/minigrid_tutorial.py', EDIT IT INSTEAD. -->\n\n# Tutorial: Navigation in MiniGrid.\nIn this tutorial, we will train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the\n[MiniGrid](https://github.com/maximecb/gym-minigrid) environment. We will demonstrate how to:\n\n* Write an experiment configuration file with a simple training pipeline from scratch.\n* Use one of the supported environments with minimal user effort.\n* Train, validate and test your experiment from the command line.\n\nThis tutorial assumes the [installation instructions](../installation/installation-allenact.md) have already been\nfollowed and that, to some extent, this framework's [abstractions](../getting_started/abstractions.md) are known.\nThe `extra_requirements` for `minigrid_plugin` and `babyai_plugin` can be installed with.\n\n```bash\npip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt; pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt\n```\n\n## The task\nA `MiniGrid-Empty-Random-5x5-v0` task consists of a grid of dimensions 5x5 where an agent spawned at a random\nlocation and orientation has to navigate to the visitable bottom right corner cell of the grid by sequences of three\npossible actions (rotate left/right and move forward). A visualization of the environment with expert steps in a random\n`MiniGrid-Empty-Random-5x5-v0` task looks like\n\n![MiniGridEmptyRandom5x5 task example](../img/minigrid_environment.png)\n\nThe observation for the agent is a subset of the entire grid, simulating a simplified limited field of view, as\ndepicted by the highlighted rectangle (observed subset of the grid) around the agent (red arrow). Gray cells correspond\nto walls.\n\n## Experiment configuration file\n\nOur complete experiment consists of:\n\n* Training a basic actor-critic agent with memory to solve randomly sampled navigation tasks.\n* Validation on a fixed set of tasks (running in parallel with training).\n* A second stage where we test saved checkpoints with a larger fixed set of tasks.\n\nThe entire configuration for the experiment, including training, validation, and testing, is encapsulated in a single\nclass implementing the `ExperimentConfig` abstraction. For this tutorial, we will follow the config under\n`projects/tutorials/minigrid_tutorial.py`.\n\nThe `ExperimentConfig` abstraction is used by the\n[OnPolicyTrainer](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicytrainer) class (for training) and the\n[OnPolicyInference](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicyinference) class (for validation and testing)\ninvoked through the entry script `main.py` that calls an orchestrating\n[OnPolicyRunner](../api/allenact/algorithms/onpolicy_sync/runner.md#onpolicyrunner) class. It includes:\n\n* A `tag` method to identify the experiment.\n* A `create_model` method to instantiate actor-critic models.\n* A `make_sampler_fn` method to instantiate task samplers.\n* Three `{train,valid,test}_task_sampler_args` methods describing initialization parameters for task samplers used in\ntraining, validation, and testing; including assignment of workers to devices for simulation.\n* A `machine_params` method with configuration parameters that will be used for training, validation, and testing.\n* A `training_pipeline` method describing a possibly multi-staged training pipeline with different types of losses,\nan optimizer, and other parameters like learning rates, batch sizes, etc.\n\n### Preliminaries\n\nWe first import everything we'll need to define our experiment.\n\n```python\nfrom typing import Dict, Optional, List, Any, cast\n\nimport gym\nfrom gym_minigrid.envs import EmptyRandomEnv5x5\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\nfrom allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvRNN\nfrom allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor\nfrom allenact_plugins.minigrid_plugin.minigrid_tasks import (\n    MiniGridTaskSampler,\n    MiniGridTask,\n)\n```\nWe now create the `MiniGridTutorialExperimentConfig` class which we will use to define our experiment.\nFor pedagogical reasons, we will add methods to this class one at a time below with a description of what\nthese classes do.\n\n```python\nclass MiniGridTutorialExperimentConfig(ExperimentConfig):\n```\nAn experiment is identified by a `tag`.\n```python\n    @classmethod\n    def tag(cls) -> str:\n        return \"MiniGridTutorial\"\n```\n### Sensors and Model\n\nA readily available Sensor type for MiniGrid,\n[EgocentricMiniGridSensor](../api/allenact_plugins/minigrid_plugin/minigrid_sensors.md#egocentricminigridsensor),\nallows us to extract observations in a format consumable by an `ActorCriticModel` agent:\n\n```python\n    SENSORS = [\n        EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),\n    ]\n```\nThe three `view_channels` include objects, colors and states corresponding to a partial observation of the environment\nas an image tensor, equivalent to that from `ImgObsWrapper` in\n[MiniGrid](https://github.com/maximecb/gym-minigrid#wrappers). The\nrelatively large `agent_view_size` means the view will only be clipped by the environment walls in the forward and\nlateral directions with respect to the agent's orientation.\n\nWe define our `ActorCriticModel` agent using a lightweight implementation with recurrent memory for MiniGrid\nenvironments, [MiniGridSimpleConvRNN](../api/allenact_plugins/minigrid_plugin/minigrid_models.md#minigridsimpleconvrnn):\n\n```python\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return MiniGridSimpleConvRNN(\n            action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())),\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            num_objects=cls.SENSORS[0].num_objects,\n            num_colors=cls.SENSORS[0].num_colors,\n            num_states=cls.SENSORS[0].num_states,\n        )\n```\n### Task samplers\n\nWe use an available TaskSampler implementation for MiniGrid environments that allows to sample both random and\ndeterministic `MiniGridTasks`,\n[MiniGridTaskSampler](../api/allenact_plugins/minigrid_plugin/minigrid_tasks.md#minigridtasksampler):\n\n```python\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return MiniGridTaskSampler(**kwargs)\n```\nThis task sampler will during training (or validation/testing), randomly initialize new tasks for the agent to complete.\nWhile it is not quite as important for this task type (as we test our agent in the same setting it is trained on) there\nare a lot of good reasons we would like to sample tasks differently during training than during validation or testing.\nOne good reason, that is applicable in this tutorial, is that, during training, we would like to be able to sample tasks\nforever while, during testing, we would like to sample a fixed number of tasks (as otherwise we would never finish\ntesting!). In `allenact` this is made possible by defining different arguments for the task sampler:\n\n```python\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"train\")\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"valid\")\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"test\")\n```\nwhere, for convenience, we have defined a `_get_sampler_args` method:\n\n```python\n    def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 20 + 20 * (mode == \"test\")  # 20 tasks for valid, 40 for test\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            max_tasks=max_tasks,  # see above\n            env_class=self.make_env,  # builder for third-party environment (defined below)\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            env_info=dict(),  # parameters for environment builder (none for now)\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n        )\n\n    @staticmethod\n    def make_env(*args, **kwargs):\n        return EmptyRandomEnv5x5()\n```\nNote that the `env_class` argument to the Task Sampler is the one determining which task type we are going to train the\nmodel for (in this case, `MiniGrid-Empty-Random-5x5-v0` from\n[gym-minigrid](https://github.com/maximecb/gym-minigrid#empty-environment))\n. The sparse reward is\n[given by the environment](https://github.com/maximecb/gym-minigrid/blob/6e22a44dc67414b647063692258a4f95ce789161/gym_minigrid/minigrid.py#L819)\n, and the maximum task length is 100. For training, we opt for a default random sampling, whereas for validation and\ntest we define fixed sets of randomly sampled tasks without needing to explicitly define a dataset.\n\nIn this toy example, the maximum number of different tasks is 32. For validation we sample 320 tasks using 16 samplers,\nor 640 for testing, so we can be fairly sure that all possible tasks are visited at least once during evaluation.\n\n### Machine parameters\n\nGiven the simplicity of the task and model, we can quickly train the model on the CPU:\n\n```python\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        return {\n            \"nprocesses\": 128 if mode == \"train\" else 16,\n            \"devices\": [],\n        }\n```\nWe allocate a larger number of samplers for training (128) than for validation or testing (16), and we default to CPU\nusage by returning an empty list of `devices`.\n\n### Training pipeline\n\nThe last definition required before starting to train is a training pipeline. In this case, we just use a single PPO\nstage with linearly decaying learning rate:\n\n```python\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        ppo_steps = int(150000)\n        return TrainingPipeline(\n            named_losses=dict(ppo_loss=PPO(**PPOConfig)),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps)\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),\n            num_mini_batch=4,\n            update_repeats=3,\n            max_grad_norm=0.5,\n            num_steps=16,\n            gamma=0.99,\n            use_gae=True,\n            gae_lambda=0.95,\n            advance_scene_rollout_period=None,\n            save_interval=10000,\n            metric_accumulate_interval=1,\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}  # type:ignore\n            ),\n        )\n```\nYou can see that we use a `Builder` class to postpone the construction of some of the elements, like the optimizer,\nfor which the model weights need to be known.\n\n## Training and validation\n\nWe have a complete implementation of this experiment's configuration class in `projects/tutorials/minigrid_tutorial.py`.\nTo start training from scratch, we just need to invoke\n\n```bash\nPYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o /PATH/TO/minigrid_output -s 12345\n```\n\nfrom the `allenact` root directory.\n\n* With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file\nwill be found in the `projects/tutorials` directory.\n* With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers).\n* With `-o minigrid_output` we set the output folder into which results and logs will be saved.\n* With `-s 12345` we set the random seed.\n\nIf we have Tensorboard installed, we can track progress with\n```bash\ntensorboard --logdir /PATH/TO/minigrid_output\n```\nwhich will default to the URL [http://localhost:6006/](http://localhost:6006/).\n\nAfter 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder.\nThe training curves should look similar to:\n\n![training curves](../img/minigrid_train.png)\n\nIf everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4.\n(For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the\nnot-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example\nwith a different random seed). The validation curves should look similar to:\n\n![validation curves](../img/minigrid_valid.png)\n\n## Testing\n\nThe training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the\nsubfolders in the path to the checkpoints, saved under the output folder.\nIn order to evaluate (i.e. test) a particular checkpoint, we need to pass the `--eval` flag and specify the checkpoint with the\n`--checkpoint CHECKPOINT_PATH` option:\n```bash\nPYTHONPATH=. python allenact/main.py minigrid_tutorial \\\n-b projects/tutorials \\\n-m 1 \\\n-o /PATH/TO/minigrid_output \\\n-s 12345 \\\n--eval \\\n--checkpoint /PATH/TO/minigrid_output/checkpoints/MiniGridTutorial/YOUR_START_DATE/exp_MiniGridTutorial__stage_00__steps_000000151552.pt\n```\n\nAgain, if everything went well, the `test` success rate should converge to 1 and the mean episode length to a value\nbelow 4. Detailed results are saved under a `metrics` subfolder in the output folder.\nThe test curves should look similar to:\n\n![test curves](../img/minigrid_test.png)\n\n"
  },
  {
    "path": "docs/tutorials/offpolicy-tutorial.md",
    "content": "<!-- DO NOT EDIT THIS FILE. --> \n<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/minigrid_offpolicy_tutorial.py', EDIT IT INSTEAD. -->\n\n# Tutorial: Off-policy training.\n**Note** The provided commands to execute in this tutorial assume you have\n[installed the full library](../installation/installation-allenact.md#full-library) and the `extra_requirements`\nfor the `babyai_plugin` and `minigrid_plugin`. The latter can be installed with:\n\n```bash\npip install -r allenact_plugins/babyai_plugin/extra_requirements.txt; pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt\n```\n\nIn this tutorial we'll learn how to train an agent from an external dataset by imitating expert actions via\nBehavior Cloning. We'll use a [BabyAI agent](/api/allenact_plugins/babyai_plugin/babyai_models#BabyAIRecurrentACModel) to solve\n`GoToLocal` tasks on [MiniGrid](https://github.com/maximecb/gym-minigrid); see the\n`projects/babyai_baselines/experiments/go_to_local` directory for more details.\n\nThis tutorial assumes `AllenAct`'s [abstractions](../getting_started/abstractions.md) are known.\n\n## The task\n\nIn a `GoToLocal` task, the agent immersed in a grid world has to navigate to a specific object in the presence of\nmultiple distractors, requiring the agent to understand `go to` instructions like \"go to the red ball\". For further\ndetails, please consult the [original paper](https://arxiv.org/abs/1810.08272).\n\n## Getting the dataset\n\nWe will use a large dataset (**more than 4 GB**) including expert demonstrations for `GoToLocal` tasks. To download\nthe data we'll run\n\n```bash\nPYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py GoToLocal\n```\n\nfrom the project's root directory, which will download `BabyAI-GoToLocal-v0.pkl` and `BabyAI-GoToLocal-v0_valid.pkl` to\nthe `allenact_plugins/babyai_plugin/data/demos` directory.\n\nWe will also generate small versions of the datasets, which will be useful if running on CPU, by calling\n\n```bash\nPYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py\n```\nfrom the project's root directory, which will generate `BabyAI-GoToLocal-v0-small.pkl` under the same\n`allenact_plugins/babyai_plugin/data/demos` directory.\n\n## Data storage\n\nIn order to train with an off-policy dataset, we need to define an `ExperienceStorage`. In AllenAct, an\n`ExperienceStorage` object has two primary functions:\n1. It stores/manages relevant data (e.g. similarly to the `Dataset` class in PyTorch).\n2. It loads stored data into batches that will be used for loss computation (e.g. similarly to the `Dataloader`\nclass in PyTorch).\nUnlike a PyTorch `Dataset` however, an `ExperienceStorage` object can build its dataset **at runtime** by processing\nrollouts from the agent. This flexibility allows for us to, for exmaple, implement the experience replay datastructure\nused in deep Q-learning. For this tutorial we won't need this additional functionality as our off-policy dataset\nis a fixed collection of expert trajectories.\n\nAn example of a `ExperienceStorage` for BabyAI expert demos might look as follows:\n\n```python\nclass MiniGridExpertTrajectoryStorage(ExperienceStorage, StreamingStorageMixin):\n    def __init__(\n        self,\n        data_path: str,\n        num_samplers: int,\n        rollout_len: int,\n        instr_len: Optional[int],\n        restrict_max_steps_in_dataset: Optional[int] = None,\n        device: torch.device = torch.device(\"cpu\"),\n    ):\n        ...\n\n    def data(self) -> List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]:\n        ...\n\n    def set_partition(self, index: int, num_parts: int):\n        ...\n\n    def initialize(self, *, observations: ObservationType, **kwargs):\n        ...\n\n    def add(\n        self,\n        observations: ObservationType,\n        memory: Optional[Memory],\n        actions: torch.Tensor,\n        action_log_probs: torch.Tensor,\n        value_preds: torch.Tensor,\n        rewards: torch.Tensor,\n        masks: torch.Tensor,\n    ):\n        ...\n\n    def to(self, device: torch.device):\n        ...\n\n    def total_experiences(self) -> int:\n        ...\n\n    def reset_stream(self):\n        ...\n\n    def empty(self) -> bool:\n        ...\n\n    def _get_next_ind(self):\n        ...\n\n    def _fill_rollout_queue(self, q: queue.Queue, sampler: int):\n        ...\n\n    def get_data_for_rollout_ind(self, sampler_ind: int) -> Dict[str, np.ndarray]:\n        ...\n\n    def next_batch(self) -> Dict[str, torch.Tensor]:\n        ...\n```\nA complete example can be found in\n[MiniGridExpertTrajectoryStorage](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridExpertTrajectoryStorage).\n\n## Loss function\n\nOff-policy losses must implement the\n[`GenericAbstractLoss`](/api/allenact/base_abstractions/misc/#genericabstractloss)\ninterface. In this case, we minimize the cross-entropy between the actor's policy and the expert action:\n\n```python\nclass MiniGridOffPolicyExpertCELoss(GenericAbstractLoss):\n    def __init__(self, total_episodes_in_epoch: Optional[int] = None):\n        super().__init__()\n        self.total_episodes_in_epoch = total_episodes_in_epoch\n\n    def loss(  # type: ignore\n        self,\n        *,  # No positional arguments\n        model: ModelType,\n        batch: ObservationType,\n        batch_memory: Memory,\n        stream_memory: Memory,\n    ) -> LossOutput:\n        rollout_len, nrollouts = cast(torch.Tensor, batch[\"minigrid_ego_image\"]).shape[\n            :2\n        ]\n\n        # Initialize Memory if empty\n        if len(stream_memory) == 0:\n            spec = model.recurrent_memory_specification\n            for key in spec:\n                dims_template, dtype = spec[key]\n                # get sampler_dim and all_dims from dims_template (and nrollouts)\n\n                dim_names = [d[0] for d in dims_template]\n                sampler_dim = dim_names.index(\"sampler\")\n\n                all_dims = [d[1] for d in dims_template]\n                all_dims[sampler_dim] = nrollouts\n\n                stream_memory.check_append(\n                    key=key,\n                    tensor=torch.zeros(\n                        *all_dims,\n                        dtype=dtype,\n                        device=cast(torch.Tensor, batch[\"minigrid_ego_image\"]).device,\n                    ),\n                    sampler_dim=sampler_dim,\n                )\n\n        # Forward data (through the actor and critic)\n        ac_out, stream_memory = model.forward(\n            observations=batch,\n            memory=stream_memory,\n            prev_actions=None,  # type:ignore\n            masks=cast(torch.FloatTensor, batch[\"masks\"]),\n        )\n\n        # Compute the loss from the actor's output and expert action\n        expert_ce_loss = -ac_out.distributions.log_prob(batch[\"expert_action\"]).mean()\n\n        info = {\"expert_ce\": expert_ce_loss.item()}\n\n        return LossOutput(\n            value=expert_ce_loss,\n            info=info,\n            per_epoch_info={},\n            batch_memory=batch_memory,\n            stream_memory=stream_memory,\n            bsize=rollout_len * nrollouts,\n        )\n\n```\nA complete example can be found in\n[MiniGridOffPolicyExpertCELoss](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridOffPolicyExpertCELoss).\nNote that in this case we train the entire actor, but it would also be possible to forward data through a different\nsubgraph of the ActorCriticModel.\n\n## Experiment configuration\n\nFor the experiment configuration, we'll build on top of an existing\n[base BabyAI GoToLocal Experiment Config](/api/projects/babyai_baselines/experiments/go_to_local/base/#basebabyaigotolocalexperimentconfig).\nThe complete `ExperimentConfig` file for off-policy training is\n[here](/api/projects/tutorials/minigrid_offpolicy_tutorial/#bcoffpolicybabyaigotolocalexperimentconfig), but let's\nfocus on the most relevant aspect to enable this type of training:\nproviding an [OffPolicyPipelineComponent](/api/allenact/utils/experiment_utils/#offpolicypipelinecomponent) object as input to a\n`PipelineStage` when instantiating the `TrainingPipeline` in the `training_pipeline` method.\n\n```python\nclass BCOffPolicyBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):\n    \"\"\"BC Off-policy imitation.\"\"\"\n\n    DATASET: Optional[List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]] = None\n\n    GPU_ID = 0 if torch.cuda.is_available() else None\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToLocalBCOffPolicy\"\n\n    @classmethod\n    def METRIC_ACCUMULATE_INTERVAL(cls):\n        # See BaseBabyAIGoToLocalExperimentConfig for how this is used.\n        return 1\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=-1)\n\n        num_mini_batch = ppo_info[\"num_mini_batch\"]\n        update_repeats = ppo_info[\"update_repeats\"]\n\n        # fmt: off\n        return cls._training_pipeline(\n            named_losses={\n                \"offpolicy_expert_ce_loss\": MiniGridOffPolicyExpertCELoss(\n                    total_episodes_in_epoch=int(1e6)\n                ),\n            },\n            named_storages={\n                \"onpolicy\": RolloutBlockStorage(),\n                \"minigrid_offpolicy_expert\": MiniGridExpertTrajectoryStorage(\n                    data_path=os.path.join(\n                                BABYAI_EXPERT_TRAJECTORIES_DIR,\n                                \"BabyAI-GoToLocal-v0{}.pkl\".format(\n                                    \"\" if torch.cuda.is_available() else \"-small\"\n                                ),\n                            ),\n                    num_samplers=cls.NUM_TRAIN_SAMPLERS,\n                    rollout_len=cls.ROLLOUT_STEPS,\n                    instr_len=cls.INSTR_LEN,\n                ),\n            },\n            pipeline_stages=[\n                # Single stage, only with off-policy training\n                PipelineStage(\n                    loss_names=[\"offpolicy_expert_ce_loss\"],                                              # no on-policy losses\n                    max_stage_steps=total_train_steps,                          # keep sampling episodes in the stage\n                    stage_components=[\n                        StageComponent(\n                            uuid=\"offpolicy\",\n                            storage_uuid=\"minigrid_offpolicy_expert\",\n                            loss_names=[\"offpolicy_expert_ce_loss\"],\n                            training_settings=TrainingSettings(\n                                update_repeats=num_mini_batch * update_repeats,\n                                num_mini_batch=1,\n                            )\n                        )\n                    ],\n                ),\n            ],\n            # As we don't have any on-policy losses, we set the next\n            # two values to zero to ensure we don't attempt to\n            # compute gradients for on-policy rollouts:\n            num_mini_batch=0,\n            update_repeats=0,\n            total_train_steps=total_train_steps,\n        )\n        # fmt: on\n```\nYou'll have noted that it is possible to combine on-policy and off-policy training in the same stage, even though here\nwe apply pure off-policy training.\n\n## Training\n\nWe recommend using a machine with a CUDA-capable GPU for this experiment. In order to start training, we just need to\ninvoke\n\n```bash\nPYTHONPATH=. python allenact/main.py -b projects/tutorials minigrid_offpolicy_tutorial -m 8 -o <OUTPUT_PATH>\n```\n\nNote that with the `-m 8` option we limit to 8 the number of on-policy task sampling processes used between off-policy\nupdates.\n\nIf everything goes well, the training success should quickly reach values around 0.7-0.8 on GPU and converge to values\nclose to 1 if given sufficient time to train.\n\nIf running tensorboard, you'll notice a separate group of scalars named `train-offpolicy-losses` and\n`train-offpolicy-misc` with losses, approximate \"experiences per second\" (i.e. the number of off-policy experiences/steps\nbeing used to update the model per second), and other tracked values in addition to the standard `train-onpolicy-*`\nused for on-policy training. In the `train-metrics` and `train-misc` sections you'll find the metrics\nquantifying the performance of the agent throughout training and some other plots showing training details.\n*Note that the x-axis for these plots is different than for the `train-offpolicy-*` sections*. This\nis because these plots use the number of rollout steps as the x-axis (i.e. steps that the trained agent\ntakes interactively) while the `train-offpolicy-*` plots uses the number of offpolicy \"experiences\" that have\nbeen shown to the agent.\n\n\nA view of the training progress about 5 hours after starting on a CUDA-capable GPU should look similar to the below\n(note that training reached >99% success after about 50 minutes).\n\n![off-policy progress](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/minigrid-offpolicy/minigrid-offpolicy-tutorial-tb.png)\n\n"
  },
  {
    "path": "docs/tutorials/running-inference-on-a-pretrained-model.md",
    "content": "<!-- DO NOT EDIT THIS FILE. --> \n<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/running_inference_tutorial.py', EDIT IT INSTEAD. -->\n\n# Tutorial: Inference with a pre-trained model.\nIn this tutorial we will run inference on a pre-trained model for the PointNav task\nin the RoboTHOR environment. In this task the agent is tasked with going to a specific location\nwithin a realistic 3D environment.\n\nFor information on how to train a PointNav Model see [this tutorial](training-a-pointnav-model.md)\n\nWe will need to [install the full AllenAct library](../installation/installation-allenact.md#full-library),\nthe `robothor_plugin` requirements via\n\n```bash\npip install -r allenact_plugins/robothor_plugin/extra_requirements.txt\n```\n\nand [download the\nRoboTHOR Pointnav dataset](../installation/download-datasets.md) before we get started.\n\nFor this tutorial we will download the weights of a model trained on the debug dataset.\nThis can be done with a handy script in the `pretrained_model_ckpts` directory:\n```bash\nbash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-pointnav-rgb-resnet\n```\nThis will download the weights for an RGB model that has been\ntrained on the PointNav task in RoboTHOR to `pretrained_model_ckpts/robothor-pointnav-rgb-resnet`\n\n\nNext we need to run the inference, using the PointNav experiment config from the\n[tutorial on making a PointNav experiment](training-a-pointnav-model.md).\nWe can do this with the following command:\n\n```bash\nPYTHONPATH=. python allenact/main.py -o <PATH_TO_OUTPUT> -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> -c <PATH_TO_CHECKPOINT> --eval\n```\n\nWhere `<PATH_TO_OUTPUT>` is the location where the results of the test will be dumped, `<PATH_TO_CHECKPOINT>` is the\nlocation of the downloaded model weights, and `<BASE_DIRECTORY_OF_YOUR_EXPERIMENT>` is a path to the directory where\nour experiment definition is stored.\n\nFor our current setup the following command would work:\n\n```bash\nPYTHONPATH=. python allenact/main.py \\\ntraining_a_pointnav_model \\\n-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \\\n-b projects/tutorials \\\n-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \\\n--eval\n```\n\nFor testing on all saved checkpoints we pass a directory to `--checkpoint` rather than just a single file:\n\n```bash\nPYTHONPATH=. python allenact/main.py \\\ntraining_a_pointnav_model \\\n-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \\\n-b projects/tutorials  \\\n-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30\n--eval\n```\n## Visualization\n\nWe also show examples of visualizations that can be extracted from the `\"valid\"` and `\"test\"` modes. Currently,\nvisualization is still undergoing design changes and does not support multi-agent tasks, but the available functionality\nis sufficient for pointnav in RoboThor.\n\nFollowing up on the example above, we can make a specialized pontnav `ExperimentConfig` where we instantiate\nthe base visualization class, `VizSuite`, defined in\n[`allenact.utils.viz_utils`](https://github.com/allenai/allenact/tree/master/allenact/utils/viz_utils.py), when in `test` mode.\n\nEach visualization type can be thought of as a plugin to the base `VizSuite`. For example, all `episode_ids` passed to\n`VizSuite` will be processed with each of the instantiated visualization types (possibly with the exception of the\n`AgentViewViz`). In the example below we show how to instantiate different visualization types from 4 different data\nsources.\n\nThe data sources available to `VizSuite` are:\n\n* Task output (e.g. 2D trajectories)\n* Vector task (e.g. egocentric views)\n* Rollout storage (e.g. recurrent memory, taken action logprobs...)\n* `ActorCriticOutput` (e.g. action probabilities)\n\nThe visualization types included below are:\n\n* `TrajectoryViz`: Generic 2D trajectory view.\n* `AgentViewViz`: RGB egocentric view.\n* `ActorViz`: Action probabilities from `ActorCriticOutput[CategoricalDistr]`.\n* `TensorViz1D`: Evolution of a point from RolloutStorage over time.\n* `TensorViz2D`: Evolution of a vector from RolloutStorage over time.\n* `ThorViz`: Specialized 2D trajectory view\n[for RoboThor](https://github.com/allenai/allenact/tree/master/allenact_plugins/robothor_plugin/robothor_viz.py).\n\nNote that we need to explicitly set the `episode_ids` that we wish to visualize. For `AgentViewViz` we have the option\nof using a different (typically shorter) list of episodes or enforce the ones used for the rest of visualizations.\n\n```python\nclass PointNavRoboThorRGBPPOVizExperimentConfig(PointNavRoboThorRGBPPOExperimentConfig):\n    \"\"\"ExperimentConfig used to demonstrate how to set up visualization code.\n\n    # Attributes\n\n    viz_ep_ids : Scene names that will be visualized.\n    viz_video_ids : Scene names that will have videos visualizations associated with them.\n    \"\"\"\n\n    viz_ep_ids = [\n        \"FloorPlan_Train1_1_3\",\n        \"FloorPlan_Train1_1_4\",\n        \"FloorPlan_Train1_1_5\",\n        \"FloorPlan_Train1_1_6\",\n    ]\n    viz_video_ids = [[\"FloorPlan_Train1_1_3\"], [\"FloorPlan_Train1_1_4\"]]\n\n    viz: Optional[VizSuite] = None\n\n    def get_viz(self, mode):\n        if self.viz is not None:\n            return self.viz\n\n        self.viz = VizSuite(\n            episode_ids=self.viz_ep_ids,\n            mode=mode,\n            # Basic 2D trajectory visualizer (task output source):\n            base_trajectory=TrajectoryViz(\n                path_to_target_location=(\"task_info\", \"target\",),\n            ),\n            # Egocentric view visualizer (vector task source):\n            egeocentric=AgentViewViz(\n                max_video_length=100, episode_ids=self.viz_video_ids\n            ),\n            # Default action probability visualizer (actor critic output source):\n            action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),\n            # Default taken action logprob visualizer (rollout storage source):\n            taken_action_logprobs=TensorViz1D(),\n            # Same episode mask visualizer (rollout storage source):\n            episode_mask=TensorViz1D(rollout_source=(\"masks\",)),\n            # Default recurrent memory visualizer (rollout storage source):\n            rnn_memory=TensorViz2D(rollout_source=(\"memory\", \"single_belief\")),\n            # Specialized 2D trajectory visualizer (task output source):\n            thor_trajectory=ThorViz(\n                figsize=(16, 8),\n                viz_rows_cols=(448, 448),\n                scenes=(\"FloorPlan_Train{}_{}\", 1, 1, 1, 1),\n            ),\n        )\n\n        return self.viz\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        res = super().machine_params(mode, **kwargs)\n        if mode == \"test\":\n            res.set_visualizer(self.get_viz(mode))\n\n        return res\n```\nRunning test on the same downloaded models, but using the visualization-enabled `ExperimentConfig` with\n\n```bash\nPYTHONPATH=. python allenact/main.py \\\nrunning_inference_tutorial \\\n-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \\\n-b projects/tutorials \\\n-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \\\n--eval\n```\n\ngenerates different types of visualization and logs them in tensorboard. If everything is properly setup and\ntensorboard includes the `robothor-pointnav-rgb-resnet` folder, under the `IMAGES` tab, we should see something similar\nto\n\n![Visualization example](../img/viz_pretrained_2videos.jpg)\n\n"
  },
  {
    "path": "docs/tutorials/training-a-pointnav-model.md",
    "content": "<!-- DO NOT EDIT THIS FILE. --> \n<!-- THIS FILE WAS AUTOGENERATED FROM 'ALLENACT_BASE_DIR/projects/tutorials/training_a_pointnav_model.py', EDIT IT INSTEAD. -->\n\n# Tutorial: PointNav in RoboTHOR.\n![RoboTHOR Robot](../img/RoboTHOR_robot.jpg)\n## Introduction\nOne of the most obvious tasks that an embodied agent should master is navigating the world it inhabits.\nBefore we can teach a robot to cook or clean it first needs to be able to move around. The simplest\nway to formulate \"moving around\" into a task is by making your agent find a beacon somewhere in the environment.\nThis beacon transmits its location, such that at any time, the agent can get the direction and euclidian distance\nto the beacon. This particular task is often called Point Navigation, or **PointNav** for short.\n\n#### PointNav\nAt first glance, this task seems trivial. If the agent is given the direction and distance of the target at\nall times, can it not simply follow this signal directly? The answer is no, because agents are often trained\non this task in environments that emulate real-world buildings which are not wide-open spaces, but rather\ncontain many smaller rooms. Because of this, the agent has to learn to navigate human spaces and use doors\nand hallways to efficiently navigate from one side of the building to the other. This task becomes particularly\ndifficult when the agent is tested in an environment that it is not trained in. If the agent does not know\nhow the floor plan of an environment looks, it has to learn to predict the design of man-made structures,\nto efficiently navigate across them, much like how people instinctively know how to move around a building\nthey have never seen before based on their experience navigating similar buildings.\n\n#### What is an environment anyways?\nEnvironments are worlds in which embodied agents exist. If our embodied agent is simply a neural network that is being\ntrained in a simulator, then that simulator is its environment. Similarly, if our agent is a\nphysical robot then its environment is the real world. The agent interacts with the environment by taking one\nof several available actions (such as \"move forward\", or \"turn left\"). After each action, the environment\nproduces a new frame that the agent can analyze to determine its next step. For many tasks, including PointNav\nthe agent also has a special \"stop\" action which indicates that the agent thinks it has reached the target.\nAfter this action is called the agent will be reset to a new location, regardless if it reached the\ntarget. The hope is that after enough training the agent will learn to correctly assess that it has successfully\nnavigated to the target.\n\n![RoboTHOR Sim vs. Real](../img/RoboTHOR_sim_real.jpg)\n\nThere are many simulators designed for the training\nof embodied agents. In this tutorial, we will be using a simulator called [RoboTHOR](https://ai2thor.allenai.org/robothor/),\nwhich is designed specifically to train models that can easily be transferred to a real robot, by providing a\nphoto-realistic virtual environment and a real-world replica of the environment that researchers can have access to.\nRoboTHOR contains 60 different virtual scenes with different floor plans and furniture and 15 validation scenes.\n\nIt is also important to mention that **AllenAct**\nhas a class abstraction called Environment. This is not the actual simulator game engine or robotics controller,\nbut rather a shallow wrapper that provides a uniform interface to the actual environment.\n\n#### Learning algorithm\nFinally, let us briefly touch on the algorithm that we will use to train our embodied agent to navigate. While\n*AllenAct* offers us great flexibility to train models using complex pipelines, we will be using a simple\npure reinforcement learning approach for this tutorial. More specifically, we will be using DD-PPO,\na decentralized and distributed variant of the ubiquitous PPO algorithm. For those unfamiliar with Reinforcement\nLearning we highly recommend [this tutorial](http://karpathy.github.io/2016/05/31/rl/) by Andrej Karpathy, and [this\nbook](http://www.incompleteideas.net/book/the-book-2nd.html) by Sutton and Barto. Essentially what we are doing\nis letting our agent explore the environment on its own, rewarding it for taking actions that bring it closer\nto its goal and penalizing it for actions that take it away from its goal. We then optimize the agent's model\nto maximize this reward.\n\n## Requirements\nTo train the model on the PointNav task, we need to [install the RoboTHOR environment](../installation/installation-framework.md)\nand [download the RoboTHOR PointNav dataset](../installation/download-datasets.md)\n\nThe dataset contains a list of episodes with thousands of randomly generated starting positions and target locations for each of the scenes\nas well as a precomputed cache of distances, containing the shortest path from each point in a scene, to every other point in that scene.\nThis is used to reward the agent for moving closer to the target in terms of geodesic distance - the actual path distance (as opposed to a\nstraight line distance).\n\n## Config File Setup\nNow comes the most important part of the tutorial, we are going to write an experiment config file.\nIf this is your first experience with experiment config files in AllenAct, we suggest that you\nfirst see our how-to on [defining an experiment](../howtos/defining-an-experiment.md) which will\nwalk you through creating a simplified experiment config file.\n\nUnlike a library that can be imported into python, **AllenAct** is structured as a framework with a runner script called\n`main.py` which will run the experiment specified in a config file. This design forces us to keep meticulous records of\nexactly which settings were used to produce a particular result,\nwhich can be very useful given how expensive RL models are to train.\n\nThe `projects/` directory is home to different projects using `AllenAct`. Currently it is populated with baselines\nof popular tasks and tutorials.\n\nWe already have all the code for this tutorial stored in `projects/tutorials/training_a_pointnav_model.py`. We will\nbe using this file to run our experiments, but you can create a new directory in `projects/` and start writing your\nexperiment there.\n\nWe start off by importing everything we will need:\n\n```python\nimport glob\nimport os\nfrom math import ceil\nfrom typing import Dict, Any, List, Optional, Sequence\n\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\nfrom torchvision import models\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams\nfrom allenact.base_abstractions.preprocessor import SensorPreprocessorGraph\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    PipelineStage,\n    TrainingPipeline,\n    LinearDecay,\n    evenly_distribute_count_into_bins,\n)\nfrom allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor\nfrom allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor\nfrom allenact_plugins.robothor_plugin.robothor_task_samplers import (\n    PointNavDatasetTaskSampler,\n)\nfrom allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask\nfrom projects.pointnav_baselines.models.point_nav_models import (\n    ResnetTensorPointNavActorCritic,\n)\n```\nNext we define a new experiment config class:\n```python\nclass PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig):\n    \"\"\"A Point Navigation experiment configuration in RoboThor.\"\"\"\n```\nWe then define the task parameters. For PointNav, these include the maximum number of steps our agent\ncan take before being reset (this prevents the agent from wandering on forever), and a configuration\nfor the reward function that we will be using.\n\n```python\n    # Task Parameters\n    MAX_STEPS = 500\n    REWARD_CONFIG = {\n        \"step_penalty\": -0.01,\n        \"goal_success_reward\": 10.0,\n        \"failed_stop_reward\": 0.0,\n        \"shaping_weight\": 1.0,\n    }\n```\nIn this case, we set the maximum number of steps to 500.\nWe give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal\nin as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination.\nIf the agent selects the `stop` action without reaching the target we do not punish it (although this is\nsometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves\ncloser to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should\nbe and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around\nwith them.\n\nNext, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render\nevery frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set\nto a 224 by 224 box).\n\n```python\n    # Simulator Parameters\n    CAMERA_WIDTH = 640\n    CAMERA_HEIGHT = 480\n    SCREEN_SIZE = 224\n```\nNext, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel\nprocesses that will be used to train the model. In general, more processes result in faster training,\nbut since each process is a unique instance of the environment in which we are training they can take up a\nlot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may\nneed to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to\nbe the number of training scenes in RoboTHOR, which allows each process to load only a single scene into\nmemory, saving time and space.\n\n`TRAINING_GPUS` takes the ids of the GPUS on which\nthe model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which\nthe validation and testing will occur. During training, a validation process is constantly running and evaluating\nthe current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea.\nIf our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default\nto running everything on the CPU with only 1 process.\n\n```python\n    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None\n    NUM_PROCESSES = 20\n    TRAINING_GPUS: Sequence[int] = [0]\n    VALIDATION_GPUS: Sequence[int] = [0]\n    TESTING_GPUS: Sequence[int] = [0]\n```\nSince we are using a dataset to train our model we need to define the path to where we have stored it. If we\ndownload the dataset instructed above we can define the path as follows\n\n```python\n    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-pointnav/debug\")\n    VAL_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-pointnav/debug\")\n```\nNext, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the\nraw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we\nspecified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks\nthe point our agent needs to move to. It tells us the direction and distance to our goal at every time step.\n\n```python\n    SENSORS = [\n        RGBSensorThor(\n            height=SCREEN_SIZE,\n            width=SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GPSCompassSensorRoboThor(),\n    ]\n```\nFor the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct*\nthe preprocessor abstraction is designed with large models with frozen weights in mind. These models often\nhail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a\ncomplex embedding, which then gets stored and used as input to our trainable model instead of the original image.\nMost other preprocessing work is done in the sensor classes (as we just saw with the RGB\nsensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should\nuse this abstraction.\n\n```python\n    PREPROCESSORS = [\n        Builder(\n            ResNetPreprocessor,\n            {\n                \"input_height\": SCREEN_SIZE,\n                \"input_width\": SCREEN_SIZE,\n                \"output_width\": 7,\n                \"output_height\": 7,\n                \"output_dims\": 512,\n                \"pool\": False,\n                \"torchvision_resnet_model\": models.resnet18,\n                \"input_uuids\": [\"rgb_lowres\"],\n                \"output_uuid\": \"rgb_resnet\",\n            },\n        ),\n    ]\n```\nNext, we must define all of the observation inputs that our model will use. These are just\nthe hardcoded ids of the sensors we are using in the experiment.\n\n```python\n    OBSERVATIONS = [\n        \"rgb_resnet\",\n        \"target_coordinates_ind\",\n    ]\n```\nFinally, we must define the settings of our simulator. We set the camera dimensions to the values\nwe defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a\nturn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the\nagent moves forward, it will do so by 0.25 meters.\n\n```python\n    ENV_ARGS = dict(\n        width=CAMERA_WIDTH,\n        height=CAMERA_HEIGHT,\n        rotateStepDegrees=30.0,\n        visibilityDistance=1.0,\n        gridSize=0.25,\n    )\n```\nNow we move on to the methods that we must define to finish implementing an experiment config. Firstly we\nhave a simple method that just returns the name of the experiment.\n\n```python\n    @classmethod\n    def tag(cls):\n        return \"PointNavRobothorRGBPPO\"\n```\nNext, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms\nwe will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4.\nWe specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters\nrespectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval`\nsets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how\noften we save the model weights and run validation on them.\n\n```python\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        ppo_steps = int(250000000)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 3\n        num_steps = 30\n        save_interval = 5000000\n        log_interval = 1000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\"ppo_loss\": PPO(**PPOConfig)},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps)\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n```\nThe `machine_params` method returns the hardware parameters of each\nprocess, based on the list of devices we defined above.\n\n```python\n    def machine_params(self, mode=\"train\", **kwargs):\n        sampler_devices: List[int] = []\n        if mode == \"train\":\n            workers_per_device = 1\n            gpu_ids = (\n                []\n                if not torch.cuda.is_available()\n                else list(self.TRAINING_GPUS) * workers_per_device\n            )\n            nprocesses = (\n                8\n                if not torch.cuda.is_available()\n                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))\n            )\n            sampler_devices = list(self.TRAINING_GPUS)\n        elif mode == \"valid\":\n            nprocesses = 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS\n        elif mode == \"test\":\n            nprocesses = 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        sensor_preprocessor_graph = (\n            SensorPreprocessorGraph(\n                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,\n                preprocessors=self.PREPROCESSORS,\n            )\n            if mode == \"train\"\n            or (\n                (isinstance(nprocesses, int) and nprocesses > 0)\n                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)\n            )\n            else None\n        )\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=gpu_ids,\n            sampler_devices=sampler_devices\n            if mode == \"train\"\n            else gpu_ids,  # ignored with > 1 gpu_ids\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n```\nNow we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch,\nso any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which\nunsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different\nactions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space`\nWe specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define\nthe size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and\ndistance to the target) with `goal_dims`.\n\n```python\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return ResnetTensorPointNavActorCritic(\n            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            goal_sensor_uuid=\"target_coordinates_ind\",\n            rgb_resnet_preprocessor_uuid=\"rgb_resnet\",\n            hidden_size=512,\n            goal_dims=32,\n        )\n```\nWe also need to define the task sampler that we will be using. This is a piece of code that generates instances\nof tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting\nour tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets\nthe agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the\n`stop` action.\n\n```python\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return PointNavDatasetTaskSampler(**kwargs)\n```\nYou might notice that we did not specify the task sampler's arguments, but are rather passing them in. The\nreason for this is that each process will have its own task sampler, and we need to specify exactly which scenes\neach process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this.\n\n```python\n    @staticmethod\n    def _partition_inds(n: int, num_parts: int):\n        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(\n            np.int32\n        )\n\n    def _get_sampler_args_for_scene_split(\n        self,\n        scenes_dir: str,\n        process_ind: int,\n        total_processes: int,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        path = os.path.join(scenes_dir, \"*.json.gz\")\n        scenes = [scene.split(\"/\")[-1].split(\".\")[0] for scene in glob.glob(path)]\n        if len(scenes) == 0:\n            raise RuntimeError(\n                (\n                    \"Could find no scene dataset information in directory {}.\"\n                    \" Are you sure you've downloaded them? \"\n                    \" If not, see https://allenact.org/installation/download-datasets/ information\"\n                    \" on how this can be done.\"\n                ).format(scenes_dir)\n            )\n        if total_processes > len(scenes):  # oversample some scenes -> bias\n            if total_processes % len(scenes) != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisible by the number of scenes\"\n                )\n            scenes = scenes * int(ceil(total_processes / len(scenes)))\n            scenes = scenes[: total_processes * (len(scenes) // total_processes)]\n        else:\n            if len(scenes) % total_processes != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisor of the number of scenes\"\n                )\n        inds = self._partition_inds(len(scenes), total_processes)\n\n        return {\n            \"scenes\": scenes[inds[process_ind] : inds[process_ind + 1]],\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            \"seed\": seeds[process_ind] if seeds is not None else None,\n            \"deterministic_cudnn\": deterministic_cudnn,\n            \"rewards_config\": self.REWARD_CONFIG,\n        }\n```\nThe very last things we need to define are the sampler arguments themselves. We define them separately for a train,\nvalidation, and test sampler, but in this case, they are almost the same. The arguments need to include the location\nof the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above\nand are just referencing here. The only consequential differences between these task samplers are the path to the dataset\nwe are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since\nwe want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of\nRoboTHOR are private we are also testing on our validation set.\n\n```python\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.TRAIN_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.TRAIN_DATASET_DIR\n        res[\"loop_dataset\"] = True\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        res[\"allow_flipping\"] = True\n        return res\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.VAL_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.VAL_DATASET_DIR\n        res[\"loop_dataset\"] = False\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        return res\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.VAL_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.VAL_DATASET_DIR\n        res[\"loop_dataset\"] = False\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        return res\n```\nThis is it! If we copy all of the code into a file we should be able to run our experiment!\n\n## Training Model On Debug Dataset\nWe can test if our installation worked properly by training our model on a small dataset of 4 episodes. This\nshould take about 20 minutes on a computer with a NVIDIA GPU.\n\nWe can now train a model by running:\n```bash\nPYTHONPATH=. python allenact/main.py -o <PATH_TO_OUTPUT> -c -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> <EXPERIMENT_NAME>\n```\nIf using the same configuration as we have set up, the following command should work:\n```bash\nPYTHONPATH=. python allenact/main.py training_a_pointnav_model -o storage/robothor-pointnav-rgb-resnet-resnet -b projects/tutorials\n```\nIf we start up a tensorboard server during training and specify that `output_dir=storage` the output should look\nsomething like this:\n![tensorboard output](../img/point-nav-baseline-tb.png)\n\n## Training Model On Full Dataset\nWe can also train the model on the full dataset by changing back our dataset path and running the same command as above.\nBut be aware, training this takes nearly 2 days on a machine with 8 GPU.\n\n## Testing Model\nTo test the performance of a model please refer to [this tutorial](running-inference-on-a-pretrained-model.md).\n\n## Conclusion\nIn this tutorial, we learned how to create a new PointNav experiment using **AllenAct**. There are many simple\nand obvious ways to modify the experiment from here - changing the model, the learning algorithm and the environment\neach requires very few lines of code changed in the above file, allowing us to explore our embodied ai research ideas\nacross different frameworks with ease.\n\n"
  },
  {
    "path": "docs/tutorials/training-pipelines.md",
    "content": "# Tutorial: IL to RL with a training pipeline\n"
  },
  {
    "path": "docs/tutorials/transfering-to-a-different-environment-framework.md",
    "content": "# Tutorial: Swapping in a new environment\n\n**Note** The provided paths in this tutorial assume you have\n[installed the full library](../installation/installation-allenact.md#full-library).\n\n## Introduction\nThis tutorial was designed as a continuation of the `Robothor PointNav Tutorial` and explains\nhow to modify the experiment config created in that tutorial to work with the iTHOR and\nHabitat environments.\n\nCross-platform support is one of the key design goals of `allenact`. This is achieved through\na total decoupling of the environment code from the engine, model and algorithm code, so that\nswapping in a new environment is as plug and play as possible. Crucially we will be able to \nrun a model on different environments without touching the model code at all, which will allow\nus to train neural networks in one environment and test them in another.\n\n## RoboTHOR to iTHOR\n![iTHOR Framework](../img/iTHOR_framework.jpg)\nSince both the `RoboTHOR` and the `iTHOR` environment stem from the same family and are developed\nby the same organization, switching between the two is incredibly easy. We only have to change\nthe path parameter to point to an iTHOR dataset rather than the RoboTHOR one.\n\n```python\n    # Dataset Parameters\n    TRAIN_DATASET_DIR = \"datasets/ithor-pointnav/train\"\n    VAL_DATASET_DIR = \"datasets/ithor-pointnav/val\"\n```\n\nWe also have to download the `iTHOR-PointNav` dataset, following [these instructions](../installation/download-datasets.md).\n\nWe might also want to modify the `tag` method to accurately reflect our config but this will not change\nthe behavior at all and is merely a bookkeeping convenience.\n```python\n    @classmethod\n    def tag(cls):\n        return \"PointNavRobothorRGBPPO\"\n```\n\n## RoboTHOR to Habitat\n![Habitat Framework](../img/habitat_framework.jpg)\n\nTo train experiments using the Habitat framework we need to install it following [these instructions](../installation/installation-framework.md).\n\nSince the roboTHOR and Habitat simulators are sufficiently different and have different parameters to configure\nthis transformation takes a bit more effort, but we only need to modify the environment config and TaskSampler (we\nhave to change the former because the habitat simulator accepts a different format of configuration and the latter\nbecause the habitat dataset is formatted differently and thus needs to be parsed differently.)\n\nAs part of our environment modification, we need to switch from using RoboTHOR sensors to using Habitat sensors.\nThe implementation of sensors we provide offer an uniform interface across all the environments so we simply have\nto swap out our sensor classes:\n```python\n    SENSORS = [\n        DepthSensorHabitat(\n            height=SCREEN_SIZE,\n            width=SCREEN_SIZE,\n            use_normalization=True,\n        ),\n        TargetCoordinatesSensorHabitat(coordinate_dims=2),\n    ]\n```\n\nNext we need to define the simulator config:\n\n```python\n    CONFIG = get_habitat_config(\"configs/gibson.yaml\")\n    CONFIG.defrost()\n    CONFIG.NUM_PROCESSES = NUM_PROCESSES\n    CONFIG.SIMULATOR_GPU_IDS = TRAIN_GPUS\n    CONFIG.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR\n    CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = [\"*\"]\n    CONFIG.DATASET.DATA_PATH = TRAIN_SCENES\n    CONFIG.SIMULATOR.AGENT_0.SENSORS = [\"RGB_SENSOR\"]\n    CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH\n    CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT\n    CONFIG.SIMULATOR.TURN_ANGLE = 30\n    CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25\n    CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS\n\n    CONFIG.TASK.TYPE = \"Nav-v0\"\n    CONFIG.TASK.SUCCESS_DISTANCE = 0.2\n    CONFIG.TASK.SENSORS = [\"POINTGOAL_WITH_GPS_COMPASS_SENSOR\"]\n    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = \"POLAR\"\n    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2\n    CONFIG.TASK.GOAL_SENSOR_UUID = \"pointgoal_with_gps_compass\"\n    CONFIG.TASK.MEASUREMENTS = [\"DISTANCE_TO_GOAL\", \"SUCCESS\", \"SPL\"]\n    CONFIG.TASK.SPL.TYPE = \"SPL\"\n    CONFIG.TASK.SPL.SUCCESS_DISTANCE = 0.2\n    CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = 0.2\n\n    CONFIG.MODE = \"train\"\n```\nThis `CONFIG` object holds very similar values to the ones `ENV_ARGS` held in the RoboTHOR example. We\ndecided to leave this way of passing in configurations exposed to the user to offer maximum customization\nof the underlying environment.\n\nFinally we need to replace the task sampler and its argument generating functions:\n\n```python\n    # Define Task Sampler\nfrom allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler\n\n\n@classmethod\ndef make_sampler_fn(cls, **kwargs) -> TaskSampler:\n    return PointNavTaskSampler(**kwargs)\n\n\ndef train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n) -> Dict[str, Any]:\n    config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind]\n    return {\n        \"env_config\": config,\n        \"max_steps\": self.MAX_STEPS,\n        \"sensors\": self.SENSORS,\n        \"action_space\": gym.spaces.Discrete(len(PointNavTask.action_names())),\n        \"distance_to_goal\": self.DISTANCE_TO_GOAL,\n    }\n\n\ndef valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n) -> Dict[str, Any]:\n    config = self.CONFIG.clone()\n    config.defrost()\n    config.DATASET.DATA_PATH = self.VALID_SCENES_PATH\n    config.MODE = \"validate\"\n    config.freeze()\n    return {\n        \"env_config\": config,\n        \"max_steps\": self.MAX_STEPS,\n        \"sensors\": self.SENSORS,\n        \"action_space\": gym.spaces.Discrete(len(PointNavTask.action_names())),\n        \"distance_to_goal\": self.DISTANCE_TO_GOAL,\n    }\n\n\ndef test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n) -> Dict[str, Any]:\n    config = self.TEST_CONFIGS[process_ind]\n    return {\n        \"env_config\": config,\n        \"max_steps\": self.MAX_STEPS,\n        \"sensors\": self.SENSORS,\n        \"action_space\": gym.spaces.Discrete(len(PointNavTask.action_names())),\n        \"distance_to_goal\": self.DISTANCE_TO_GOAL,\n    }\n```\n\nAs we can see this code looks very similar as well, we simply need to pass slightly different parameters.\n\n## Conclusion\nIn this tutorial, we learned how to modify our experiment configurations to work with different environments. By\nproviding a high level of modularity and out-of-the-box support for both `Habitat` and `THOR`, two of the most popular embodied frameworks out there **AllenAct** hopes to give researchers the ability to validate their results across many platforms and help guide them towards genuine progress. The source code for this tutorial can be found in `/projects/framework_transfer_tutorial`.\n"
  },
  {
    "path": "main.py",
    "content": "#!/usr/bin/env python3\n\"\"\"Entry point to training/validating/testing for a user given experiment\nname.\"\"\"\nimport allenact.main\n\nif __name__ == \"__main__\":\n    allenact.main.main()\n"
  },
  {
    "path": "mkdocs.yml",
    "content": "site_name: AllenAct\nsite_description: An open source framework for research in Embodied-AI from AI2\nsite_url: https://allenact.org\n\ntheme:\n  name: material\n  custom_dir: overrides\n  palette:\n    primary: blue\n    accent: grey\n  logo: img/AI2_Avatar_White.png\n  favicon: img/AllenAct_A.svg\n  highlightjs: true\n  hljs_languages:\n  - python\n  - typescript\n  - json\n\nextra_css:\n- css/extra.css\n\ngoogle_analytics: [UA-120916510-8, allenact.org]\n\nrepo_name: allenai/allenact\nrepo_url: https://github.com/allenai/allenact\ndocs_dir: docs\n\nnav:\n- Overview: index.md\n- Installation:\n  - Install AllenAct: installation/installation-allenact.md\n  - Install environments: installation/installation-framework.md\n  - Download datasets: installation/download-datasets.md\n- Getting started:\n  - Run your first experiment: getting_started/running-your-first-experiment.md\n  - Primary abstractions: getting_started/abstractions.md\n  - Structure of the codebase: getting_started/structure.md\n- Tutorials:\n  - AllenAct Tutorials: tutorials/index.md\n  - Navigation in Minigrid: tutorials/minigrid-tutorial.md\n  - PointNav in RoboTHOR: tutorials/training-a-pointnav-model.md\n  - Swapping environments: tutorials/transfering-to-a-different-environment-framework.md\n  - Using a pre-trained model: tutorials/running-inference-on-a-pretrained-model.md\n  - Off-policy training: tutorials/offpolicy-tutorial.md\n  - OpenAI gym for continuous control: tutorials/gym-tutorial.md\n  - Multi-node ObjectNav training: tutorials/distributed-objectnav-tutorial.md\n  - OpenAI gym for MuJoCo tasks: tutorials/gym-mujoco-tutorial.md\n#  - IL to RL with pipelines: tutorials/training-pipelines.md\n- HowTos:\n  - Define an experiment: howtos/defining-an-experiment.md\n  - Change rewards and losses: howtos/changing-rewards-and-losses.md\n  - Define a new model: howtos/defining-a-new-model.md\n  - Define a new task: howtos/defining-a-new-task.md\n  - Define a new training pipeline: howtos/defining-a-new-training-pipeline.md\n  # - Visualize results: howtos/visualizing-results.md\n  # - Run a multi-agent experiment: howtos/running-a-multi-agent-experiment.md\n- Projects:\n  - BabyAI baselines: projects/babyai_baselines/README.md\n  - PointNav baselines: projects/pointnav_baselines/README.md\n  - ObjectNav baselines: projects/objectnav_baselines/README.md\n  # - Advisor code: projects/advisor_2020/README.md\n  # - Two Body Problem code: projects/two_body_problem_2019/README.md\n- FAQ: FAQ.md\n- Contributing: CONTRIBUTING.md\n- Licence: LICENSE.md\n- API:\n  - allenact:\n    - _constants: api/allenact/_constants.md\n    - embodiedai:\n      - mapping:\n        - mapping_utils:\n          - map_builders: api/allenact/embodiedai/mapping/mapping_utils/map_builders.md\n          - point_cloud_utils: api/allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.md\n        - mapping_losses: api/allenact/embodiedai/mapping/mapping_losses.md\n        - mapping_models:\n          - active_neural_slam: api/allenact/embodiedai/mapping/mapping_models/active_neural_slam.md\n      - preprocessors:\n        - resnet: api/allenact/embodiedai/preprocessors/resnet.md\n      - sensors:\n        - vision_sensors: api/allenact/embodiedai/sensors/vision_sensors.md\n      - models:\n        - aux_models: api/allenact/embodiedai/models/aux_models.md\n        - basic_models: api/allenact/embodiedai/models/basic_models.md\n        - resnet: api/allenact/embodiedai/models/resnet.md\n        - fusion_models: api/allenact/embodiedai/models/fusion_models.md\n        - visual_nav_models: api/allenact/embodiedai/models/visual_nav_models.md\n      - storage:\n        - vdr_storage: api/allenact/embodiedai/storage/vdr_storage.md\n      - aux_losses:\n        - losses: api/allenact/embodiedai/aux_losses/losses.md\n    - base_abstractions:\n      - experiment_config: api/allenact/base_abstractions/experiment_config.md\n      - misc: api/allenact/base_abstractions/misc.md\n      - task: api/allenact/base_abstractions/task.md\n      - sensor: api/allenact/base_abstractions/sensor.md\n      - preprocessor: api/allenact/base_abstractions/preprocessor.md\n      - distributions: api/allenact/base_abstractions/distributions.md\n    - algorithms:\n      - onpolicy_sync:\n        - losses:\n          - grouped_action_imitation: api/allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.md\n          - imitation: api/allenact/algorithms/onpolicy_sync/losses/imitation.md\n          - abstract_loss: api/allenact/algorithms/onpolicy_sync/losses/abstract_loss.md\n          - ppo: api/allenact/algorithms/onpolicy_sync/losses/ppo.md\n          - a2cacktr: api/allenact/algorithms/onpolicy_sync/losses/a2cacktr.md\n        - misc: api/allenact/algorithms/onpolicy_sync/misc.md\n        - runner: api/allenact/algorithms/onpolicy_sync/runner.md\n        - policy: api/allenact/algorithms/onpolicy_sync/policy.md\n        - engine: api/allenact/algorithms/onpolicy_sync/engine.md\n        - vector_sampled_tasks: api/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.md\n        - storage: api/allenact/algorithms/onpolicy_sync/storage.md\n      - offpolicy_sync:\n        - losses:\n          - abstract_offpolicy_loss: api/allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss.md\n    - utils:\n      - model_utils: api/allenact/utils/model_utils.md\n      - experiment_utils: api/allenact/utils/experiment_utils.md\n      - spaces_utils: api/allenact/utils/spaces_utils.md\n      - system: api/allenact/utils/system.md\n      - cacheless_frcnn: api/allenact/utils/cacheless_frcnn.md\n      - misc_utils: api/allenact/utils/misc_utils.md\n      - multi_agent_viz_utils: api/allenact/utils/multi_agent_viz_utils.md\n      - viz_utils: api/allenact/utils/viz_utils.md\n      - tensor_utils: api/allenact/utils/tensor_utils.md\n      - cache_utils: api/allenact/utils/cache_utils.md\n  - allenact_plugins:\n    - habitat_plugin:\n      - habitat_constants: api/allenact_plugins/habitat_plugin/habitat_constants.md\n      - habitat_tasks: api/allenact_plugins/habitat_plugin/habitat_tasks.md\n      - habitat_sensors: api/allenact_plugins/habitat_plugin/habitat_sensors.md\n      - habitat_environment: api/allenact_plugins/habitat_plugin/habitat_environment.md\n      - habitat_preprocessors: api/allenact_plugins/habitat_plugin/habitat_preprocessors.md\n      - habitat_task_samplers: api/allenact_plugins/habitat_plugin/habitat_task_samplers.md\n      - scripts:\n        - agent_demo: api/allenact_plugins/habitat_plugin/scripts/agent_demo.md\n        - make_map: api/allenact_plugins/habitat_plugin/scripts/make_map.md\n      - habitat_utils: api/allenact_plugins/habitat_plugin/habitat_utils.md\n    - lighthouse_plugin:\n      - lighthouse_models: api/allenact_plugins/lighthouse_plugin/lighthouse_models.md\n      - lighthouse_environment: api/allenact_plugins/lighthouse_plugin/lighthouse_environment.md\n      - lighthouse_tasks: api/allenact_plugins/lighthouse_plugin/lighthouse_tasks.md\n      - lighthouse_sensors: api/allenact_plugins/lighthouse_plugin/lighthouse_sensors.md\n      - lighthouse_util: api/allenact_plugins/lighthouse_plugin/lighthouse_util.md\n    - babyai_plugin:\n      - babyai_constants: api/allenact_plugins/babyai_plugin/babyai_constants.md\n      - babyai_models: api/allenact_plugins/babyai_plugin/babyai_models.md\n      - scripts:\n        - truncate_expert_demos: api/allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.md\n        - get_instr_length_percentiles: api/allenact_plugins/babyai_plugin/scripts/get_instr_length_percentiles.md\n        - download_babyai_expert_demos: api/allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.md\n      - babyai_tasks: api/allenact_plugins/babyai_plugin/babyai_tasks.md\n    - ithor_plugin:\n      - ithor_tasks: api/allenact_plugins/ithor_plugin/ithor_tasks.md\n      - ithor_environment: api/allenact_plugins/ithor_plugin/ithor_environment.md\n      - ithor_constants: api/allenact_plugins/ithor_plugin/ithor_constants.md\n      - ithor_util: api/allenact_plugins/ithor_plugin/ithor_util.md\n      - ithor_sensors: api/allenact_plugins/ithor_plugin/ithor_sensors.md\n      - scripts:\n        - make_objectnav_debug_dataset: api/allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.md\n        - make_pointnav_debug_dataset: api/allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.md\n      - ithor_viz: api/allenact_plugins/ithor_plugin/ithor_viz.md\n      - ithor_task_samplers: api/allenact_plugins/ithor_plugin/ithor_task_samplers.md\n    - robothor_plugin:\n      - robothor_preprocessors: api/allenact_plugins/robothor_plugin/robothor_preprocessors.md\n      - robothor_task_samplers: api/allenact_plugins/robothor_plugin/robothor_task_samplers.md\n      - robothor_environment: api/allenact_plugins/robothor_plugin/robothor_environment.md\n      - robothor_constants: api/allenact_plugins/robothor_plugin/robothor_constants.md\n      - robothor_distributions: api/allenact_plugins/robothor_plugin/robothor_distributions.md\n      - robothor_models: api/allenact_plugins/robothor_plugin/robothor_models.md\n      - robothor_tasks: api/allenact_plugins/robothor_plugin/robothor_tasks.md\n      - scripts:\n        - make_objectnav_debug_dataset: api/allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.md\n        - make_pointnav_debug_dataset: api/allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.md\n      - robothor_sensors: api/allenact_plugins/robothor_plugin/robothor_sensors.md\n      - robothor_viz: api/allenact_plugins/robothor_plugin/robothor_viz.md\n    - minigrid_plugin:\n      - minigrid_tasks: api/allenact_plugins/minigrid_plugin/minigrid_tasks.md\n      - minigrid_environments: api/allenact_plugins/minigrid_plugin/minigrid_environments.md\n      - minigrid_offpolicy: api/allenact_plugins/minigrid_plugin/minigrid_offpolicy.md\n      - minigrid_sensors: api/allenact_plugins/minigrid_plugin/minigrid_sensors.md\n      - configs:\n        - minigrid_nomemory: api/allenact_plugins/minigrid_plugin/configs/minigrid_nomemory.md\n      - minigrid_models: api/allenact_plugins/minigrid_plugin/minigrid_models.md\n    - manipulathor_plugin:\n      - manipulathor_viz: api/allenact_plugins/manipulathor_plugin/manipulathor_viz.md\n      - manipulathor_tasks: api/allenact_plugins/manipulathor_plugin/manipulathor_tasks.md\n      - manipulathor_task_samplers: api/allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.md\n      - manipulathor_constants: api/allenact_plugins/manipulathor_plugin/manipulathor_constants.md\n      - armpointnav_constants: api/allenact_plugins/manipulathor_plugin/armpointnav_constants.md\n      - manipulathor_sensors: api/allenact_plugins/manipulathor_plugin/manipulathor_sensors.md\n      - arm_calculation_utils: api/allenact_plugins/manipulathor_plugin/arm_calculation_utils.md\n      - manipulathor_utils: api/allenact_plugins/manipulathor_plugin/manipulathor_utils.md\n      - manipulathor_environment: api/allenact_plugins/manipulathor_plugin/manipulathor_environment.md\n    - gym_plugin:\n      - gym_environment: api/allenact_plugins/gym_plugin/gym_environment.md\n      - gym_sensors: api/allenact_plugins/gym_plugin/gym_sensors.md\n      - gym_distributions: api/allenact_plugins/gym_plugin/gym_distributions.md\n      - gym_models: api/allenact_plugins/gym_plugin/gym_models.md\n      - gym_tasks: api/allenact_plugins/gym_plugin/gym_tasks.md\n  - constants: api/constants.md\n  - projects:\n    - gym_baselines:\n      - experiments:\n        - gym_base: api/projects/gym_baselines/experiments/gym_base.md\n        - gym_humanoid_base: api/projects/gym_baselines/experiments/gym_humanoid_base.md\n        - gym_mujoco_base: api/projects/gym_baselines/experiments/gym_mujoco_base.md\n        - gym_humanoid_ddppo: api/projects/gym_baselines/experiments/gym_humanoid_ddppo.md\n        - mujoco:\n          - gym_mujoco_swimmer_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_swimmer_ddppo.md\n          - gym_mujoco_reacher_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_reacher_ddppo.md\n          - gym_mujoco_walker2d_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_walker2d_ddppo.md\n          - gym_mujoco_halfcheetah_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_halfcheetah_ddppo.md\n          - gym_mujoco_humanoid_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_humanoid_ddppo.md\n          - gym_mujoco_inverteddoublependulum_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_inverteddoublependulum_ddppo.md\n          - gym_mujoco_ant_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.md\n          - gym_mujoco_hopper_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_hopper_ddppo.md\n          - gym_mujoco_invertedpendulum_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_invertedpendulum_ddppo.md\n        - gym_mujoco_ddppo: api/projects/gym_baselines/experiments/gym_mujoco_ddppo.md\n      - models:\n        - gym_models: api/projects/gym_baselines/models/gym_models.md\n    - objectnav_baselines:\n      - experiments:\n        - robothor:\n          - objectnav_robothor_base: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.md\n          - objectnav_robothor_rgb_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_ddppo.md\n          - objectnav_robothor_rgb_resnetgru_dagger: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_dagger.md\n          - objectnav_robothor_rgbd_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.md\n          - objectnav_robothor_rgb_resnetgru_ddppo_and_gbc: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.md\n          - objectnav_robothor_rgb_unfrozenresnet_gru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet_gru_ddppo.md\n          - objectnav_robothor_rgb_unfrozenresnet_gru_vdr_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet_gru_vdr_ddppo.md\n          - objectnav_robothor_depth_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_depth_resnetgru_ddppo.md\n        - objectnav_mixin_unfrozenresnet_gru: api/projects/objectnav_baselines/experiments/objectnav_mixin_unfrozenresnet_gru.md\n        - ithor:\n          - objectnav_ithor_rgbd_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnetgru_ddppo.md\n          - objectnav_ithor_depth_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnetgru_ddppo.md\n          - objectnav_ithor_rgb_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnetgru_ddppo.md\n          - objectnav_ithor_base: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_base.md\n        - objectnav_thor_mixin_ddppo_and_gbc: api/projects/objectnav_baselines/experiments/objectnav_thor_mixin_ddppo_and_gbc.md\n        - objectnav_thor_base: api/projects/objectnav_baselines/experiments/objectnav_thor_base.md\n        - objectnav_mixin_resnetgru: api/projects/objectnav_baselines/experiments/objectnav_mixin_resnetgru.md\n        - objectnav_base: api/projects/objectnav_baselines/experiments/objectnav_base.md\n        - objectnav_mixin_ddppo: api/projects/objectnav_baselines/experiments/objectnav_mixin_ddppo.md\n        - objectnav_mixin_dagger: api/projects/objectnav_baselines/experiments/objectnav_mixin_dagger.md\n      - models:\n        - object_nav_models: api/projects/objectnav_baselines/models/object_nav_models.md\n    - babyai_baselines:\n      - experiments:\n        - go_to_local:\n          - bc: api/projects/babyai_baselines/experiments/go_to_local/bc.md\n          - distributed_bc_offpolicy: api/projects/babyai_baselines/experiments/go_to_local/distributed_bc_offpolicy.md\n          - dagger: api/projects/babyai_baselines/experiments/go_to_local/dagger.md\n          - bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.md\n          - distributed_bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_local/distributed_bc_teacher_forcing.md\n          - ppo: api/projects/babyai_baselines/experiments/go_to_local/ppo.md\n          - a2c: api/projects/babyai_baselines/experiments/go_to_local/a2c.md\n          - base: api/projects/babyai_baselines/experiments/go_to_local/base.md\n        - go_to_obj:\n          - bc: api/projects/babyai_baselines/experiments/go_to_obj/bc.md\n          - dagger: api/projects/babyai_baselines/experiments/go_to_obj/dagger.md\n          - bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.md\n          - ppo: api/projects/babyai_baselines/experiments/go_to_obj/ppo.md\n          - a2c: api/projects/babyai_baselines/experiments/go_to_obj/a2c.md\n          - base: api/projects/babyai_baselines/experiments/go_to_obj/base.md\n        - base: api/projects/babyai_baselines/experiments/base.md\n    - pointnav_baselines:\n      - experiments:\n        - robothor:\n          - pointnav_robothor_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.md\n          - pointnav_robothor_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.md\n          - pointnav_robothor_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.md\n          - pointnav_robothor_rgb_simpleconvgru_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo_and_gbc.md\n          - pointnav_robothor_base: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_base.md\n        - habitat:\n          - pointnav_habitat_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.md\n          - pointnav_habitat_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.md\n          - pointnav_habitat_base: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_base.md\n          - debug_pointnav_habitat_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgbd_simpleconvgru_ddppo.md\n          - debug_pointnav_habitat_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgb_simpleconvgru_ddppo.md\n          - debug_pointnav_habitat_rgb_simpleconvgru_bc: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgb_simpleconvgru_bc.md\n          - pointnav_habitat_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.md\n          - debug_pointnav_habitat_base: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_base.md\n        - pointnav_base: api/projects/pointnav_baselines/experiments/pointnav_base.md\n        - pointnav_habitat_mixin_ddppo: api/projects/pointnav_baselines/experiments/pointnav_habitat_mixin_ddppo.md\n        - ithor:\n          - pointnav_ithor_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.md\n          - pointnav_ithor_depth_simpleconvgru_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo_and_gbc.md\n          - pointnav_ithor_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.md\n          - pointnav_ithor_base: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_base.md\n          - pointnav_ithor_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.md\n        - pointnav_mixin_simpleconvgru: api/projects/pointnav_baselines/experiments/pointnav_mixin_simpleconvgru.md\n        - pointnav_thor_mixin_ddppo: api/projects/pointnav_baselines/experiments/pointnav_thor_mixin_ddppo.md\n        - pointnav_thor_mixin_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/pointnav_thor_mixin_ddppo_and_gbc.md\n        - pointnav_thor_base: api/projects/pointnav_baselines/experiments/pointnav_thor_base.md\n      - models:\n        - point_nav_models: api/projects/pointnav_baselines/models/point_nav_models.md\n    - tutorials:\n      - pointnav_habitat_rgb_ddppo: api/projects/tutorials/pointnav_habitat_rgb_ddppo.md\n      - object_nav_ithor_dagger_then_ppo_one_object: api/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.md\n      - minigrid_offpolicy_tutorial: api/projects/tutorials/minigrid_offpolicy_tutorial.md\n      - navtopartner_robothor_rgb_ppo: api/projects/tutorials/navtopartner_robothor_rgb_ppo.md\n      - pointnav_ithor_rgb_ddppo: api/projects/tutorials/pointnav_ithor_rgb_ddppo.md\n      - object_nav_ithor_dagger_then_ppo_one_object_viz: api/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.md\n      - object_nav_ithor_ppo_one_object: api/projects/tutorials/object_nav_ithor_ppo_one_object.md\n      - minigrid_tutorial_conds: api/projects/tutorials/minigrid_tutorial_conds.md\n    - manipulathor_baselines:\n      - armpointnav_baselines:\n        - experiments:\n          - armpointnav_thor_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.md\n          - armpointnav_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_base.md\n          - armpointnav_mixin_ddppo: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_ddppo.md\n          - ithor:\n            - armpointnav_no_vision: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_no_vision.md\n            - armpointnav_ithor_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_ithor_base.md\n            - armpointnav_depth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_depth.md\n            - armpointnav_rgb: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgb.md\n            - armpointnav_rgbdepth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgbdepth.md\n            - armpointnav_disjoint_depth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_disjoint_depth.md\n          - armpointnav_mixin_simplegru: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_simplegru.md\n        - models:\n          - arm_pointnav_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.md\n          - base_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/base_models.md\n          - disjoint_arm_pointnav_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.md\n          - manipulathor_net_utils: api/projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.md\n  - tests:\n    - mapping:\n      - test_ai2thor_mapping: api/tests/mapping/test_ai2thor_mapping.md\n    - multiprocessing:\n      - test_frozen_attribs: api/tests/multiprocessing/test_frozen_attribs.md\n    - utils:\n      - test_spaces: api/tests/utils/test_spaces.md\n    - vision:\n      - test_pillow_rescaling: api/tests/vision/test_pillow_rescaling.md\n    - sync_algs_cpu:\n      - test_to_to_obj_trains: api/tests/sync_algs_cpu/test_to_to_obj_trains.md\n    - manipulathor_plugin:\n      - test_utils: api/tests/manipulathor_plugin/test_utils.md\n    - hierarchical_policies:\n      - test_minigrid_conditional: api/tests/hierarchical_policies/test_minigrid_conditional.md\nmarkdown_extensions:\n- toc:\n    permalink: '#'\n- markdown.extensions.codehilite:\n    guess_lang: true\n- meta\n- admonition\n- codehilite\n\n# extra_javascript:\n#  - javascripts/extra.js\n#plugins:\n#  - search\n#  - mkpdfs\n"
  },
  {
    "path": "mypy.ini",
    "content": "[mypy]\npython_version = 3.7\nfollow_imports = skip\nignore_missing_imports = True\nstrict_optional = False\n\n[mypy-demo.*]\nignore_errors = True\n\n"
  },
  {
    "path": "overrides/main.html",
    "content": "{% extends \"base.html\" %}\n\n{% block extrahead %}\n  {% set title = config.site_name %}\n  {% if page and page.meta and page.meta.title %}\n    {% set title = title ~ \" - \" ~ page.meta.title %}\n  {% elif page and page.title and not page.is_homepage %}\n    {% set title = title ~ \" - \" ~ page.title | striptags %}\n  {% endif %}\n  <meta name=\"twitter:card\" content=\"summary_large_image\" />\n  <meta name=\"twitter:title\" content=\"{{ title }}\" />\n  <meta name=\"twitter:description\" content=\"{{ config.site_description }}\" />\n  <meta name=\"twitter:image\" content=\"https://www.allenact.org/img/social-card-AllenAct.png\" />\n{% endblock %}\n"
  },
  {
    "path": "pretrained_model_ckpts/.gitignore",
    "content": "*\n!.gitignore\n!*.sh\n"
  },
  {
    "path": "pretrained_model_ckpts/download_navigation_model_ckpts.sh",
    "content": "#!/bin/bash\n\n# Move to the directory containing this file\ncd \"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" >/dev/null 2>&1 && pwd )\" || exit\n\n# Download, Unzip, and Remove zip\nif [ \"$1\" = \"robothor-pointnav-rgb-resnet\" ]\nthen\n    echo \"Downloading pretrained RoboTHOR PointNav model...\"\n    wget https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/navigation/robothor-pointnav-rgb-resnet.tar.gz\n    tar -xf robothor-pointnav-rgb-resnet.tar.gz && rm robothor-pointnav-rgb-resnet.tar.gz\n    echo \"saved folder: robothor-pointnav-rgb-resnet\"\nelif [ \"$1\" = \"robothor-objectnav-challenge-2021\" ]\nthen\n    echo \"Downloading pretrained RoboTHOR ObjectNav model...\"\n    wget https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/navigation/robothor-objectnav-challenge-2021.tar.gz\n    tar -xf robothor-objectnav-challenge-2021.tar.gz && rm robothor-objectnav-challenge-2021.tar.gz\n    echo \"saved folder: robothor-objectnav-challenge-2021\"\nelse\n    echo \"Failed: Usage download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021\"\n    exit 1\nfi\n"
  },
  {
    "path": "projects/__init__.py",
    "content": ""
  },
  {
    "path": "projects/babyai_baselines/README.md",
    "content": "# Baseline experiments for the BabyAI environment\n\nWe perform a collection of baseline experiments within the BabyAI environment\n on the GoToLocal task, see the `projects/babyai_baselines/experiments/go_to_local` directory.\n For instance, to train a model using PPO, run\n \n```bash\npython main.py go_to_local.ppo --experiment_base projects/babyai_baselines/experiments\n```\n\nNote that these experiments will be quite slow when not using a GPU as the BabyAI model architecture is surprisingly \nlarge. Specifying a GPU (if available) can be done from the command line using hooks we created using \n[gin-config](https://github.com/google/gin-config). E.g. to train using the 0th GPU device, add\n\n```bash\n--gp \"machine_params.gpu_id = 0\"\n```  \n\nto the above command."
  },
  {
    "path": "projects/babyai_baselines/__init__.py",
    "content": ""
  },
  {
    "path": "projects/babyai_baselines/experiments/__init__.py",
    "content": ""
  },
  {
    "path": "projects/babyai_baselines/experiments/base.py",
    "content": "from abc import ABC\nfrom typing import Dict, Any, List, Optional, Union, Sequence, cast\n\nimport gym\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO, A2C\nfrom allenact.algorithms.onpolicy_sync.losses.a2cacktr import A2CConfig\nfrom allenact.algorithms.onpolicy_sync.losses.imitation import Imitation\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams\nfrom allenact.base_abstractions.misc import Loss\nfrom allenact.base_abstractions.sensor import SensorSuite, Sensor, ExpertActionSensor\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    LinearDecay,\n    PipelineStage,\n    TrainingPipeline,\n)\nfrom allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel\nfrom allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask, BabyAITaskSampler\nfrom allenact_plugins.minigrid_plugin.minigrid_sensors import (\n    EgocentricMiniGridSensor,\n    MiniGridMissionSensor,\n)\n\n\nclass BaseBabyAIExperimentConfig(ExperimentConfig, ABC):\n    \"\"\"Base experimental config.\"\"\"\n\n    LEVEL: Optional[str] = None\n    TOTAL_RL_TRAIN_STEPS: Optional[int] = None\n    AGENT_VIEW_SIZE: int = 7\n    ROLLOUT_STEPS: Optional[int] = None\n    NUM_TRAIN_SAMPLERS: Optional[int] = None\n    NUM_TEST_TASKS: Optional[int] = None\n    INSTR_LEN: Optional[int] = None\n    USE_INSTR: Optional[bool] = None\n    GPU_ID: Optional[int] = None\n    USE_EXPERT = False\n    SHOULD_LOG = True\n    PPO_NUM_MINI_BATCH = 2\n    ARCH: Optional[str] = None\n    NUM_CKPTS_TO_SAVE = 50\n\n    TEST_SEED_OFFSET = 0\n\n    DEFAULT_LR = 1e-3\n\n    @classmethod\n    def METRIC_ACCUMULATE_INTERVAL(cls):\n        return cls.NUM_TRAIN_SAMPLERS * 1000\n\n    @classmethod\n    def get_sensors(cls) -> Sequence[Sensor]:\n        assert cls.USE_INSTR is not None\n\n        return (\n            [\n                EgocentricMiniGridSensor(\n                    agent_view_size=cls.AGENT_VIEW_SIZE, view_channels=3\n                ),\n            ]\n            + (\n                [MiniGridMissionSensor(instr_len=cls.INSTR_LEN)]  # type:ignore\n                if cls.USE_INSTR\n                else []\n            )\n            + (\n                [\n                    ExpertActionSensor(  # type: ignore\n                        nactions=len(BabyAITask.class_action_names())\n                    )\n                ]\n                if cls.USE_EXPERT\n                else []\n            )\n        )\n\n    @classmethod\n    def rl_loss_default(cls, alg: str, steps: Optional[int] = None):\n        if alg == \"ppo\":\n            assert steps is not None\n            return {\n                \"loss\": Builder(\n                    PPO,\n                    kwargs={\"clip_decay\": LinearDecay(steps)},\n                    default=PPOConfig,\n                ),\n                \"num_mini_batch\": cls.PPO_NUM_MINI_BATCH,\n                \"update_repeats\": 4,\n            }\n        elif alg == \"a2c\":\n            return {\n                \"loss\": A2C(**A2CConfig),\n                \"num_mini_batch\": 1,\n                \"update_repeats\": 1,\n            }\n        elif alg == \"imitation\":\n            return {\n                \"loss\": Imitation(),\n                \"num_mini_batch\": cls.PPO_NUM_MINI_BATCH,\n                \"update_repeats\": 4,\n            }\n        else:\n            raise NotImplementedError\n\n    @classmethod\n    def _training_pipeline(\n        cls,\n        named_losses: Dict[str, Union[Loss, Builder]],\n        pipeline_stages: List[PipelineStage],\n        num_mini_batch: int,\n        update_repeats: int,\n        total_train_steps: int,\n        lr: Optional[float] = None,\n    ):\n        lr = cls.DEFAULT_LR if lr is None else lr\n\n        num_steps = cls.ROLLOUT_STEPS\n        metric_accumulate_interval = (\n            cls.METRIC_ACCUMULATE_INTERVAL()\n        )  # Log every 10 max length tasks\n        save_interval = int(cls.TOTAL_RL_TRAIN_STEPS / cls.NUM_CKPTS_TO_SAVE)\n        gamma = 0.99\n\n        use_gae = \"reinforce_loss\" not in named_losses\n        gae_lambda = 0.99\n        max_grad_norm = 0.5\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses=named_losses,\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=None,\n            should_log=cls.SHOULD_LOG,\n            pipeline_stages=pipeline_stages,\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=cls.TOTAL_RL_TRAIN_STEPS)}  # type: ignore\n            ),\n        )\n\n    @classmethod\n    def machine_params(\n        cls, mode=\"train\", gpu_id=\"default\", n_train_processes=\"default\", **kwargs\n    ):\n        if mode == \"train\":\n            if n_train_processes == \"default\":\n                nprocesses = cls.NUM_TRAIN_SAMPLERS\n            else:\n                nprocesses = n_train_processes\n        elif mode == \"valid\":\n            nprocesses = 0\n        elif mode == \"test\":\n            nprocesses = min(\n                100 if torch.cuda.is_available() else 8, cls.NUM_TEST_TASKS\n            )\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        if gpu_id == \"default\":\n            devices = [] if cls.GPU_ID is None else [cls.GPU_ID]\n        else:\n            devices = [gpu_id]\n\n        return MachineParams(nprocesses=nprocesses, devices=devices)\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        sensors = cls.get_sensors()\n        return BabyAIRecurrentACModel(\n            action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())),\n            observation_space=SensorSuite(sensors).observation_spaces,\n            use_instr=cls.USE_INSTR,\n            use_memory=True,\n            arch=cls.ARCH,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return BabyAITaskSampler(**kwargs)\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return {\n            \"env_builder\": self.LEVEL,\n            \"sensors\": self.get_sensors(),\n            \"seed\": seeds[process_ind] if seeds is not None else None,\n        }\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        raise RuntimeError\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        max_tasks = self.NUM_TEST_TASKS // total_processes + (\n            process_ind < (self.NUM_TEST_TASKS % total_processes)\n        )\n        task_seeds_list = [\n            2**31 - 1 + self.TEST_SEED_OFFSET + process_ind + total_processes * i\n            for i in range(max_tasks)\n        ]\n        # print(max_tasks, process_ind, total_processes, task_seeds_list)\n\n        assert len(task_seeds_list) == 0 or (\n            min(task_seeds_list) >= 0 and max(task_seeds_list) <= 2**32 - 1\n        )\n\n        train_sampler_args = self.train_task_sampler_args(\n            process_ind=process_ind,\n            total_processes=total_processes,\n            devices=devices,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        return {\n            **train_sampler_args,\n            \"task_seeds_list\": task_seeds_list,\n            \"max_tasks\": max_tasks,\n            \"deterministic_sampling\": True,\n            \"sensors\": [\n                s for s in train_sampler_args[\"sensors\"] if \"Expert\" not in str(type(s))\n            ],\n        }\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_local/__init__.py",
    "content": ""
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_local/a2c.py",
    "content": "import torch\n\nfrom allenact.utils.experiment_utils import PipelineStage\nfrom projects.babyai_baselines.experiments.go_to_local.base import (\n    BaseBabyAIGoToLocalExperimentConfig,\n)\n\n\nclass A2CBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):\n    \"\"\"A2C only.\"\"\"\n\n    NUM_TRAIN_SAMPLERS: int = (\n        128 * 6\n        if torch.cuda.is_available()\n        else BaseBabyAIGoToLocalExperimentConfig.NUM_TRAIN_SAMPLERS\n    )\n    ROLLOUT_STEPS: int = 16\n    USE_LR_DECAY = False\n    DEFAULT_LR = 1e-4\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToLocalA2C\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_training_steps = cls.TOTAL_RL_TRAIN_STEPS\n        a2c_info = cls.rl_loss_default(\"a2c\", steps=total_training_steps)\n\n        return cls._training_pipeline(\n            named_losses={\n                \"a2c_loss\": a2c_info[\"loss\"],\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"a2c_loss\"],\n                    max_stage_steps=total_training_steps,\n                ),\n            ],\n            num_mini_batch=a2c_info[\"num_mini_batch\"],\n            update_repeats=a2c_info[\"update_repeats\"],\n            total_train_steps=total_training_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_local/base.py",
    "content": "from abc import ABC\nfrom typing import Dict, List, Optional, Union, Any, cast\n\nimport gym\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.storage import ExperienceStorage\nfrom allenact.base_abstractions.misc import Loss\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    LinearDecay,\n    PipelineStage,\n    TrainingPipeline,\n)\nfrom allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel\nfrom allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask\nfrom projects.babyai_baselines.experiments.base import BaseBabyAIExperimentConfig\n\n\nclass BaseBabyAIGoToLocalExperimentConfig(BaseBabyAIExperimentConfig, ABC):\n    \"\"\"Base experimental config.\"\"\"\n\n    LEVEL: Optional[str] = \"BabyAI-GoToLocal-v0\"\n    TOTAL_RL_TRAIN_STEPS = int(15e6)\n    TOTAL_IL_TRAIN_STEPS = int(7.5e6)\n    ROLLOUT_STEPS: int = 128\n    NUM_TRAIN_SAMPLERS: int = 128 if torch.cuda.is_available() else 4\n    PPO_NUM_MINI_BATCH = 4\n    NUM_CKPTS_TO_SAVE = 20\n    NUM_TEST_TASKS: int = 1000\n    USE_LR_DECAY: bool = True\n\n    # ARCH = \"cnn1\"\n    # ARCH = \"cnn2\"\n    ARCH = \"expert_filmcnn\"\n\n    USE_INSTR = True\n    INSTR_LEN: int = 5\n\n    INCLUDE_AUXILIARY_HEAD = False\n\n    @classmethod\n    def METRIC_ACCUMULATE_INTERVAL(cls):\n        return cls.NUM_TRAIN_SAMPLERS * 64\n\n    @classmethod\n    def _training_pipeline(  # type:ignore\n        cls,\n        named_losses: Dict[str, Union[Loss, Builder]],\n        pipeline_stages: List[PipelineStage],\n        num_mini_batch: int,\n        update_repeats: int,\n        total_train_steps: int,\n        lr: Optional[float] = None,\n        named_storages: Optional[Dict[str, Union[ExperienceStorage, Builder]]] = None,\n    ):\n        lr = cls.DEFAULT_LR\n\n        num_steps = cls.ROLLOUT_STEPS\n        metric_accumulate_interval = (\n            cls.METRIC_ACCUMULATE_INTERVAL()\n        )  # Log every 10 max length tasks\n        save_interval = int(total_train_steps / cls.NUM_CKPTS_TO_SAVE)\n        gamma = 0.99\n\n        use_gae = \"reinforce_loss\" not in named_losses\n        gae_lambda = 0.99\n        max_grad_norm = 0.5\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses=named_losses,\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=None,\n            should_log=cls.SHOULD_LOG,\n            pipeline_stages=pipeline_stages,\n            named_storages=named_storages,\n            lr_scheduler_builder=(\n                Builder(\n                    LambdaLR, {\"lr_lambda\": LinearDecay(steps=total_train_steps)}  # type: ignore\n                )\n                if cls.USE_LR_DECAY\n                else None\n            ),\n        )\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        sensors = cls.get_sensors()\n        return BabyAIRecurrentACModel(\n            action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())),\n            observation_space=SensorSuite(sensors).observation_spaces,\n            use_instr=cls.USE_INSTR,\n            use_memory=True,\n            arch=cls.ARCH,\n            instr_dim=256,\n            lang_model=\"attgru\",\n            memory_dim=2048,\n            include_auxiliary_head=cls.INCLUDE_AUXILIARY_HEAD,\n        )\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        raise RuntimeError(\"No validation processes for these tasks\")\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_local/bc.py",
    "content": "from allenact.utils.experiment_utils import PipelineStage\nfrom projects.babyai_baselines.experiments.go_to_local.base import (\n    BaseBabyAIGoToLocalExperimentConfig,\n)\n\n\nclass PPOBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):\n    \"\"\"Behavior clone then PPO.\"\"\"\n\n    USE_EXPERT = True\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToLocalBC\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS\n\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=-1)\n        imitation_info = cls.rl_loss_default(\"imitation\")\n\n        return cls._training_pipeline(\n            named_losses={\n                \"imitation_loss\": imitation_info[\"loss\"],\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    max_stage_steps=total_train_steps,\n                ),\n            ],\n            num_mini_batch=min(\n                info[\"num_mini_batch\"] for info in [ppo_info, imitation_info]\n            ),\n            update_repeats=min(\n                info[\"update_repeats\"] for info in [ppo_info, imitation_info]\n            ),\n            total_train_steps=total_train_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.py",
    "content": "import torch\n\nfrom allenact.utils.experiment_utils import PipelineStage, LinearDecay\nfrom projects.babyai_baselines.experiments.go_to_local.base import (\n    BaseBabyAIGoToLocalExperimentConfig,\n)\n\n\nclass BCTeacherForcingBabyAIGoToLocalExperimentConfig(\n    BaseBabyAIGoToLocalExperimentConfig\n):\n    \"\"\"Behavior clone with teacher forcing.\"\"\"\n\n    USE_EXPERT = True\n\n    GPU_ID = 0 if torch.cuda.is_available() else None\n\n    @classmethod\n    def METRIC_ACCUMULATE_INTERVAL(cls):\n        return 1\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToLocalBCTeacherForcing\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS\n\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=-1)\n        imitation_info = cls.rl_loss_default(\"imitation\")\n\n        return cls._training_pipeline(\n            named_losses={\n                \"imitation_loss\": imitation_info[\"loss\"],\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    teacher_forcing=LinearDecay(\n                        startp=1.0,\n                        endp=1.0,\n                        steps=total_train_steps,\n                    ),\n                    max_stage_steps=total_train_steps,\n                ),\n            ],\n            num_mini_batch=min(\n                info[\"num_mini_batch\"] for info in [ppo_info, imitation_info]\n            ),\n            update_repeats=min(\n                info[\"update_repeats\"] for info in [ppo_info, imitation_info]\n            ),\n            total_train_steps=total_train_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_local/dagger.py",
    "content": "from allenact.utils.experiment_utils import PipelineStage, LinearDecay\nfrom projects.babyai_baselines.experiments.go_to_local.base import (\n    BaseBabyAIGoToLocalExperimentConfig,\n)\n\n\nclass DaggerBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):\n    \"\"\"Find goal in lighthouse env using imitation learning.\n\n    Training with Dagger.\n    \"\"\"\n\n    USE_EXPERT = True\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToLocalDagger\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS\n        loss_info = cls.rl_loss_default(\"imitation\")\n        return cls._training_pipeline(\n            named_losses={\"imitation_loss\": loss_info[\"loss\"]},\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    teacher_forcing=LinearDecay(\n                        startp=1.0,\n                        endp=0.0,\n                        steps=total_train_steps // 2,\n                    ),\n                    max_stage_steps=total_train_steps,\n                )\n            ],\n            num_mini_batch=loss_info[\"num_mini_batch\"],\n            update_repeats=loss_info[\"update_repeats\"],\n            total_train_steps=total_train_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_local/distributed_bc_offpolicy.py",
    "content": "import os\nfrom typing import Optional\nfrom typing import Sequence\n\nimport torch\n\nfrom allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage\nfrom allenact.utils.experiment_utils import (\n    PipelineStage,\n    StageComponent,\n    TrainingSettings,\n)\nfrom allenact_plugins.babyai_plugin.babyai_constants import (\n    BABYAI_EXPERT_TRAJECTORIES_DIR,\n)\nfrom allenact_plugins.minigrid_plugin.minigrid_offpolicy import (\n    MiniGridOffPolicyExpertCELoss,\n    MiniGridExpertTrajectoryStorage,\n)\nfrom projects.tutorials.minigrid_offpolicy_tutorial import (\n    BCOffPolicyBabyAIGoToLocalExperimentConfig,\n)\n\n\nclass DistributedBCOffPolicyBabyAIGoToLocalExperimentConfig(\n    BCOffPolicyBabyAIGoToLocalExperimentConfig\n):\n    \"\"\"Distributed Off policy imitation.\"\"\"\n\n    @classmethod\n    def tag(cls):\n        return \"DistributedBabyAIGoToLocalBCOffPolicy\"\n\n    @classmethod\n    def machine_params(\n        cls, mode=\"train\", gpu_id=\"default\", n_train_processes=\"default\", **kwargs\n    ):\n        res = super().machine_params(mode, gpu_id, n_train_processes, **kwargs)\n\n        if res[\"nprocesses\"] > 0 and torch.cuda.is_available():\n            ngpu_to_use = min(torch.cuda.device_count(), 2)\n            res[\"nprocesses\"] = [res[\"nprocesses\"] // ngpu_to_use] * ngpu_to_use\n            res[\"gpu_ids\"] = list(range(ngpu_to_use))\n\n        return res\n\n    @classmethod\n    def expert_ce_loss_kwargs_generator(\n        cls, worker_id: int, rollouts_per_worker: Sequence[int], seed: Optional[int]\n    ):\n        return dict(num_workers=len(rollouts_per_worker), current_worker=worker_id)\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=-1)\n\n        num_mini_batch = ppo_info[\"num_mini_batch\"]\n        update_repeats = ppo_info[\"update_repeats\"]\n\n        return cls._training_pipeline(\n            named_losses={\n                \"offpolicy_expert_ce_loss\": MiniGridOffPolicyExpertCELoss(\n                    total_episodes_in_epoch=int(1e6)\n                ),\n            },\n            named_storages={\n                \"onpolicy\": RolloutBlockStorage(),\n                \"minigrid_offpolicy_expert\": MiniGridExpertTrajectoryStorage(\n                    data_path=os.path.join(\n                        BABYAI_EXPERT_TRAJECTORIES_DIR,\n                        \"BabyAI-GoToLocal-v0{}.pkl\".format(\n                            \"\" if torch.cuda.is_available() else \"-small\"\n                        ),\n                    ),\n                    num_samplers=cls.NUM_TRAIN_SAMPLERS,\n                    rollout_len=cls.ROLLOUT_STEPS,\n                    instr_len=cls.INSTR_LEN,\n                ),\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"offpolicy_expert_ce_loss\"],\n                    max_stage_steps=total_train_steps,\n                    stage_components=[\n                        StageComponent(\n                            uuid=\"offpolicy\",\n                            storage_uuid=\"minigrid_offpolicy_expert\",\n                            loss_names=[\"offpolicy_expert_ce_loss\"],\n                            training_settings=TrainingSettings(\n                                update_repeats=num_mini_batch * update_repeats,\n                                num_mini_batch=1,\n                            ),\n                        )\n                    ],\n                ),\n            ],\n            num_mini_batch=0,\n            update_repeats=0,\n            total_train_steps=total_train_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_local/distributed_bc_teacher_forcing.py",
    "content": "import torch\n\nfrom .bc_teacher_forcing import BCTeacherForcingBabyAIGoToLocalExperimentConfig\n\n\nclass DistributedBCTeacherForcingBabyAIGoToLocalExperimentConfig(\n    BCTeacherForcingBabyAIGoToLocalExperimentConfig\n):\n    \"\"\"Distributed behavior clone with teacher forcing.\"\"\"\n\n    USE_EXPERT = True\n\n    GPU_ID = 0 if torch.cuda.is_available() else None\n\n    @classmethod\n    def METRIC_ACCUMULATE_INTERVAL(cls):\n        return 1\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToLocalBCTeacherForcingDistributed\"\n\n    @classmethod\n    def machine_params(\n        cls, mode=\"train\", gpu_id=\"default\", n_train_processes=\"default\", **kwargs\n    ):\n        res = super().machine_params(mode, gpu_id, n_train_processes, **kwargs)\n\n        if res[\"nprocesses\"] > 0 and torch.cuda.is_available():\n            ngpu_to_use = min(torch.cuda.device_count(), 2)\n            res[\"nprocesses\"] = [res[\"nprocesses\"] // ngpu_to_use] * ngpu_to_use\n            res[\"gpu_ids\"] = list(range(ngpu_to_use))\n\n        return res\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_local/ppo.py",
    "content": "import torch\n\nfrom allenact.utils.experiment_utils import PipelineStage\nfrom projects.babyai_baselines.experiments.go_to_local.base import (\n    BaseBabyAIGoToLocalExperimentConfig,\n)\n\n\nclass PPOBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):\n    \"\"\"PPO only.\"\"\"\n\n    NUM_TRAIN_SAMPLERS: int = (\n        128 * 12\n        if torch.cuda.is_available()\n        else BaseBabyAIGoToLocalExperimentConfig.NUM_TRAIN_SAMPLERS\n    )\n    ROLLOUT_STEPS: int = 32\n    USE_LR_DECAY = False\n    DEFAULT_LR = 1e-4\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToLocalPPO\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_RL_TRAIN_STEPS\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=total_train_steps)\n\n        return cls._training_pipeline(\n            named_losses={\n                \"ppo_loss\": ppo_info[\"loss\"],\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"ppo_loss\"],\n                    max_stage_steps=total_train_steps,\n                ),\n            ],\n            num_mini_batch=ppo_info[\"num_mini_batch\"],\n            update_repeats=ppo_info[\"update_repeats\"],\n            total_train_steps=total_train_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_obj/__init__.py",
    "content": ""
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_obj/a2c.py",
    "content": "from allenact.utils.experiment_utils import PipelineStage\nfrom projects.babyai_baselines.experiments.go_to_obj.base import (\n    BaseBabyAIGoToObjExperimentConfig,\n)\n\n\nclass A2CBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):\n    \"\"\"A2C only.\"\"\"\n\n    TOTAL_RL_TRAIN_STEPS = int(1e5)\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToObjA2C\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_training_steps = cls.TOTAL_RL_TRAIN_STEPS\n        a2c_info = cls.rl_loss_default(\"a2c\", steps=total_training_steps)\n\n        return cls._training_pipeline(\n            named_losses={\n                \"a2c_loss\": a2c_info[\"loss\"],\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"a2c_loss\"],\n                    max_stage_steps=total_training_steps,\n                ),\n            ],\n            num_mini_batch=a2c_info[\"num_mini_batch\"],\n            update_repeats=a2c_info[\"update_repeats\"],\n            total_train_steps=total_training_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_obj/base.py",
    "content": "from abc import ABC\nfrom typing import Dict, List, Optional, Union, cast\n\nimport gym\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.base_abstractions.misc import Loss\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    LinearDecay,\n    PipelineStage,\n    TrainingPipeline,\n)\nfrom allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel\nfrom allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask\nfrom projects.babyai_baselines.experiments.base import BaseBabyAIExperimentConfig\n\n\nclass BaseBabyAIGoToObjExperimentConfig(BaseBabyAIExperimentConfig, ABC):\n    \"\"\"Base experimental config.\"\"\"\n\n    LEVEL: Optional[str] = \"BabyAI-GoToObj-v0\"\n    TOTAL_RL_TRAIN_STEPS = int(5e4)\n    TOTAL_IL_TRAIN_STEPS = int(2e4)\n    ROLLOUT_STEPS: int = 32\n    NUM_TRAIN_SAMPLERS: int = 16\n    PPO_NUM_MINI_BATCH = 2\n    NUM_TEST_TASKS: int = 50\n    USE_LR_DECAY: bool = False\n\n    DEFAULT_LR = 1e-3\n\n    ARCH = \"cnn1\"\n    # ARCH = \"cnn2\"\n    # ARCH = \"expert_filmcnn\"\n\n    USE_INSTR = False\n    INSTR_LEN: int = -1\n\n    @classmethod\n    def METRIC_ACCUMULATE_INTERVAL(cls):\n        return cls.NUM_TRAIN_SAMPLERS * 128\n\n    @classmethod\n    def _training_pipeline(  # type:ignore\n        cls,\n        named_losses: Dict[str, Union[Loss, Builder]],\n        pipeline_stages: List[PipelineStage],\n        num_mini_batch: int,\n        update_repeats: int,\n        total_train_steps: int,\n        lr: Optional[float] = None,\n        **kwargs,\n    ):\n        lr = cls.DEFAULT_LR\n\n        num_steps = cls.ROLLOUT_STEPS\n        metric_accumulate_interval = (\n            cls.METRIC_ACCUMULATE_INTERVAL()\n        )  # Log every 10 max length tasks\n        save_interval = 2**31\n        gamma = 0.99\n\n        use_gae = \"reinforce_loss\" not in named_losses\n        gae_lambda = 0.99\n        max_grad_norm = 0.5\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses=named_losses,\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=None,\n            should_log=cls.SHOULD_LOG,\n            pipeline_stages=pipeline_stages,\n            lr_scheduler_builder=(\n                Builder(\n                    LambdaLR, {\"lr_lambda\": LinearDecay(steps=total_train_steps)}  # type: ignore\n                )\n                if cls.USE_LR_DECAY\n                else None\n            ),\n            **kwargs,\n        )\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        sensors = cls.get_sensors()\n        return BabyAIRecurrentACModel(\n            action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())),\n            observation_space=SensorSuite(sensors).observation_spaces,\n            use_instr=cls.USE_INSTR,\n            use_memory=True,\n            arch=cls.ARCH,\n            instr_dim=8,\n            lang_model=\"gru\",\n            memory_dim=128,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_obj/bc.py",
    "content": "from allenact.utils.experiment_utils import PipelineStage\nfrom projects.babyai_baselines.experiments.go_to_obj.base import (\n    BaseBabyAIGoToObjExperimentConfig,\n)\n\n\nclass PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):\n    \"\"\"Behavior clone then PPO.\"\"\"\n\n    USE_EXPERT = True\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToObjBC\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS\n\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=-1)\n        imitation_info = cls.rl_loss_default(\"imitation\")\n\n        return cls._training_pipeline(\n            named_losses={\n                \"imitation_loss\": imitation_info[\"loss\"],\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    max_stage_steps=total_train_steps,\n                ),\n            ],\n            num_mini_batch=min(\n                info[\"num_mini_batch\"] for info in [ppo_info, imitation_info]\n            ),\n            update_repeats=min(\n                info[\"update_repeats\"] for info in [ppo_info, imitation_info]\n            ),\n            total_train_steps=total_train_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.py",
    "content": "from allenact.utils.experiment_utils import PipelineStage, LinearDecay\nfrom projects.babyai_baselines.experiments.go_to_obj.base import (\n    BaseBabyAIGoToObjExperimentConfig,\n)\n\n\nclass PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):\n    \"\"\"Behavior clone (with teacher forcing) then PPO.\"\"\"\n\n    USE_EXPERT = True\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToObjBCTeacherForcing\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS\n\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=-1)\n        imitation_info = cls.rl_loss_default(\"imitation\")\n\n        return cls._training_pipeline(\n            named_losses={\n                \"imitation_loss\": imitation_info[\"loss\"],\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    teacher_forcing=LinearDecay(\n                        startp=1.0,\n                        endp=1.0,\n                        steps=total_train_steps,\n                    ),\n                    max_stage_steps=total_train_steps,\n                ),\n            ],\n            num_mini_batch=min(\n                info[\"num_mini_batch\"] for info in [ppo_info, imitation_info]\n            ),\n            update_repeats=min(\n                info[\"update_repeats\"] for info in [ppo_info, imitation_info]\n            ),\n            total_train_steps=total_train_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_obj/dagger.py",
    "content": "from allenact.utils.experiment_utils import PipelineStage, LinearDecay\nfrom projects.babyai_baselines.experiments.go_to_obj.base import (\n    BaseBabyAIGoToObjExperimentConfig,\n)\n\n\nclass DaggerBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):\n    \"\"\"Find goal in lighthouse env using imitation learning.\n\n    Training with Dagger.\n    \"\"\"\n\n    USE_EXPERT = True\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToObjDagger\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS\n        loss_info = cls.rl_loss_default(\"imitation\")\n        return cls._training_pipeline(\n            named_losses={\"imitation_loss\": loss_info[\"loss\"]},\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    teacher_forcing=LinearDecay(\n                        startp=1.0,\n                        endp=0.0,\n                        steps=total_train_steps // 2,\n                    ),\n                    max_stage_steps=total_train_steps,\n                )\n            ],\n            num_mini_batch=loss_info[\"num_mini_batch\"],\n            update_repeats=loss_info[\"update_repeats\"],\n            total_train_steps=total_train_steps,\n        )\n"
  },
  {
    "path": "projects/babyai_baselines/experiments/go_to_obj/ppo.py",
    "content": "from allenact.utils.experiment_utils import PipelineStage\nfrom projects.babyai_baselines.experiments.go_to_obj.base import (\n    BaseBabyAIGoToObjExperimentConfig,\n)\n\n\nclass PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig):\n    \"\"\"PPO only.\"\"\"\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToObjPPO\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_RL_TRAIN_STEPS\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=total_train_steps)\n\n        return cls._training_pipeline(\n            named_losses={\n                \"ppo_loss\": ppo_info[\"loss\"],\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"ppo_loss\"],\n                    max_stage_steps=total_train_steps,\n                ),\n            ],\n            num_mini_batch=ppo_info[\"num_mini_batch\"],\n            update_repeats=ppo_info[\"update_repeats\"],\n            total_train_steps=total_train_steps,\n        )\n"
  },
  {
    "path": "projects/gym_baselines/README.md",
    "content": "# Baseline models Gym (for MuJoCo environments)\n\nThis project contains the code for training baseline models for the tasks under the [MuJoCo](https://gym.openai.com/envs/#mujoco) group of Gym environments, included [\"Ant-v2\"](https://gym.openai.com/envs/Ant-v2/), [\"HalfCheetah-v2\"](https://gym.openai.com/envs/HalfCheetah-v2/), [\"Hopper-v2\"](https://gym.openai.com/envs/Hopper-v2/), [\"Humanoid-v2\"](https://gym.openai.com/envs/Humanoid-v2/), [\"InvertedDoublePendulum-v2\"](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), [\"InvertedPendulum-v2\"](https://gym.openai.com/envs/InvertedPendulum-v2/), [Reacher-v2](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), [\"Swimmer-v2\"](https://gym.openai.com/envs/Swimmer-v2/), and [Walker2d-v2\"](https://gym.openai.com/envs/Walker2d-v2/).\n\nProvided are experiment configs for training a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](https://allenact.org/api/allenact_plugins/gym_plugin/gym_models/#memorylessactorcritic), with a [Gaussian distribution](https://allenact.org/api/allenact_plugins/gym_plugin/gym_distributions/#gaussiandistr) to sample actions for all continuous-control environments under the `MuJoCo` group of `Gym` environments. \n\nThe experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)\nReinforcement Learning Algorithm. \n\nTo train an experiment run the following command from the `allenact` root directory:\n\n```bash\npython main.py <PATH_TO_EXPERIMENT_CONFIG> -o <PATH_TO_OUTPUT>\n```\n\nWhere `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights\nand logs to be stored and `<PATH_TO_EXPERIMENT_CONFIG>` is the path to the python file containing\nthe experiment configuration. An example usage of this command would be:\n\n```bash\npython main.py projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py -o /YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo\n```\n\nThis trains a lightweight implementation with separate MLPs for actors and critic with a Gaussian distribution to sample actions in the \"Ant-v2\" environment, and stores the model weights and logs\nto `/YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo`.\n\n## Results\n\nIn our experiments, the rewards for MuJoCo environments we obtained after training using PPO are similar to those reported by OpenAI Gym Baselines(1M steps). The Humanoid environment is compared with the original PPO paper where training 50M steps using PPO. Due to the time constraint, we only tested our baseline across two seeds so far. \n\n\n| Environment           | Gym Baseline Reward | Ours Reward |\n| -----------           | ------------------- | ----------- |\n|[Ant-v2](https://gym.openai.com/envs/Ant-v2/)| 1083.2 |1098.6(reached 4719 in 25M steps)  | \n| [HalfCheetah-v2](https://gym.openai.com/envs/HalfCheetah-v2/) | 1795.43             |  1741(reached 4019 in 18M steps)           |\n|[Hopper-v2](https://gym.openai.com/envs/Hopper-v2/)|2316.16|2266|\n|[Humanoid-v2](https://gym.openai.com/envs/Humanoid-v2/)|4000+|4500+(reached 6500 in 70M steps)|\n| [InvertedPendulum-v2](https://gym.openai.com/envs/InvertedPendulum-v2/) | 809.43              |  1000       |\n|[Reacher-v2](https://gym.openai.com/envs/Reacher-v2/)|-6.71|-7.045|\n|[Swimmer-v2](https://gym.openai.com/envs/Swimmer-v2/)|111.19|124.7|\n|[Walker2d](https://gym.openai.com/envs/Walker2d-v2/)|3424.95|2723 in 10M steps|\n"
  },
  {
    "path": "projects/gym_baselines/__init__.py",
    "content": ""
  },
  {
    "path": "projects/gym_baselines/experiments/__init__.py",
    "content": ""
  },
  {
    "path": "projects/gym_baselines/experiments/gym_base.py",
    "content": "from abc import ABC\nfrom typing import Dict, Sequence, Optional, List, Any\n\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig\nfrom allenact.base_abstractions.sensor import Sensor\n\n\nclass GymBaseConfig(ExperimentConfig, ABC):\n\n    SENSORS: Optional[Sequence[Sensor]] = None\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        raise NotImplementedError\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"train\", seeds=seeds\n        )\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"valid\", seeds=seeds\n        )\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"test\", seeds=seeds)\n"
  },
  {
    "path": "projects/gym_baselines/experiments/gym_humanoid_base.py",
    "content": "from abc import ABC\nfrom typing import Dict, Any\n\nfrom allenact.utils.viz_utils import VizSuite, AgentViewViz\n\nfrom projects.gym_baselines.experiments.gym_base import GymBaseConfig\n\n\nclass GymHumanoidBaseConfig(GymBaseConfig, ABC):\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        visualizer = None\n        if mode == \"test\":\n            visualizer = VizSuite(\n                mode=mode,\n                video_viz=AgentViewViz(\n                    label=\"episode_vid\",\n                    max_clip_length=400,\n                    vector_task_source=(\"render\", {\"mode\": \"rgb_array\"}),\n                    fps=30,\n                ),\n            )\n        return {\n            \"nprocesses\": 8 if mode == \"train\" else 1,  # rollout\n            \"devices\": [],\n            \"visualizer\": visualizer,\n        }\n"
  },
  {
    "path": "projects/gym_baselines/experiments/gym_humanoid_ddppo.py",
    "content": "from abc import ABC\nfrom typing import cast\n\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO\n\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\n\nfrom projects.gym_baselines.experiments.gym_humanoid_base import GymHumanoidBaseConfig\n\n\nclass GymHumanoidPPOConfig(GymHumanoidBaseConfig, ABC):\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        lr = 1e-4\n        ppo_steps = int(8e7)  # convergence may be after 1e8\n        clip_param = 0.1\n        value_loss_coef = 0.5\n        entropy_coef = 0.0\n        num_mini_batch = 4  # optimal 64\n        update_repeats = 10\n        max_grad_norm = 0.5\n        num_steps = 2048\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        advance_scene_rollout_period = None\n        save_interval = 200000\n        metric_accumulate_interval = 50000\n        return TrainingPipeline(\n            named_losses=dict(\n                ppo_loss=PPO(\n                    clip_param=clip_param,\n                    value_loss_coef=value_loss_coef,\n                    entropy_coef=entropy_coef,\n                ),\n            ),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps),\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=advance_scene_rollout_period,\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            lr_scheduler_builder=Builder(\n                LambdaLR,\n                {\n                    \"lr_lambda\": LinearDecay(steps=ppo_steps, startp=1, endp=1)\n                },  # constant learning rate\n            ),\n        )\n"
  },
  {
    "path": "projects/gym_baselines/experiments/gym_mujoco_base.py",
    "content": "from abc import ABC\nfrom typing import Dict, Any\n\nfrom allenact.utils.viz_utils import VizSuite, AgentViewViz\n\nfrom projects.gym_baselines.experiments.gym_base import GymBaseConfig\n\n\nclass GymMoJoCoBaseConfig(GymBaseConfig, ABC):\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        visualizer = None\n        if mode == \"test\":\n            visualizer = VizSuite(\n                mode=mode,\n                video_viz=AgentViewViz(\n                    label=\"episode_vid\",\n                    max_clip_length=400,\n                    vector_task_source=(\"render\", {\"mode\": \"rgb_array\"}),\n                    fps=30,\n                ),\n            )\n        return {\n            \"nprocesses\": 8 if mode == \"train\" else 1,  # rollout\n            \"devices\": [],\n            \"visualizer\": visualizer,\n        }\n"
  },
  {
    "path": "projects/gym_baselines/experiments/gym_mujoco_ddppo.py",
    "content": "from abc import ABC\nfrom typing import cast\n\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO\n\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\n\nfrom projects.gym_baselines.experiments.gym_mujoco_base import GymMoJoCoBaseConfig\n\n\nclass GymMuJoCoPPOConfig(GymMoJoCoBaseConfig, ABC):\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        lr = 3e-4\n        ppo_steps = int(3e7)\n        clip_param = 0.2\n        value_loss_coef = 0.5\n        entropy_coef = 0.0\n        num_mini_batch = 4  # optimal 64\n        update_repeats = 10\n        max_grad_norm = 0.5\n        num_steps = 2048\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        advance_scene_rollout_period = None\n        save_interval = 200000\n        metric_accumulate_interval = 50000\n        return TrainingPipeline(\n            named_losses=dict(\n                ppo_loss=PPO(\n                    clip_param=clip_param,\n                    value_loss_coef=value_loss_coef,\n                    entropy_coef=entropy_coef,\n                ),\n            ),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps),\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=advance_scene_rollout_period,\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            lr_scheduler_builder=Builder(\n                LambdaLR,\n                {\"lr_lambda\": LinearDecay(steps=ppo_steps, startp=1, endp=0)},\n            ),\n        )\n"
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/__init__.py",
    "content": ""
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py",
    "content": "from typing import Dict, List, Any\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\n\nfrom projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig\n\n\nclass GymMuJoCoAntConfig(GymMuJoCoPPOConfig):\n\n    SENSORS = [\n        GymMuJoCoSensor(gym_env_name=\"Ant-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        action_space = gym.spaces.Box(-3.0, 3.0, (8,), \"float32\")\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=action_space,  # specific action_space\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"Ant-v2\", **kwargs)\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"Ant-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"Gym-MuJoCo-Ant-v2-PPO\"\n"
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/gym_mujoco_halfcheetah_ddppo.py",
    "content": "from typing import Dict, List, Any\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\n\nfrom projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig\n\n\nclass GymMuJoCoHalfCheetahConfig(GymMuJoCoPPOConfig):\n\n    SENSORS = [\n        GymMuJoCoSensor(gym_env_name=\"HalfCheetah-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        action_space = gym.spaces.Box(-1.0, 1.0, (6,), \"float32\")\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=action_space,  # specific action_space\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"HalfCheetah-v2\", **kwargs)\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"HalfCheetah-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"Gym-MuJoCo-HalfCheetah-v2-PPO\"\n"
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/gym_mujoco_hopper_ddppo.py",
    "content": "from typing import Dict, List, Any\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\n\nfrom projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig\n\n\nclass GymMuJoCoHopperConfig(GymMuJoCoPPOConfig):\n\n    SENSORS = [\n        GymMuJoCoSensor(gym_env_name=\"Hopper-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        action_space = gym.spaces.Box(-1.0, 1.0, (3,), \"float32\")\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=action_space,  # specific action_space\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"Hopper-v2\", **kwargs)\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"Hopper-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"Gym-MuJoCo-Hopper-v2-PPO\"\n"
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/gym_mujoco_humanoid_ddppo.py",
    "content": "from typing import Dict, List, Any\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\n\nfrom projects.gym_baselines.experiments.gym_humanoid_ddppo import GymHumanoidPPOConfig\n\n\nclass GymMuJoCoHumanoidConfig(GymHumanoidPPOConfig):\n\n    SENSORS = [\n        GymMuJoCoSensor(gym_env_name=\"Humanoid-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        action_space = gym.spaces.Box(\n            -0.4000000059604645, 0.4000000059604645, (17,), \"float32\"\n        )\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=action_space,  # specific action_space\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"Humanoid-v2\", **kwargs)\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"Humanoid-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"Gym-MuJoCo-Humanoid-v2-PPO\"\n"
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/gym_mujoco_inverteddoublependulum_ddppo.py",
    "content": "from typing import Dict, List, Any\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\n\nfrom projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig\n\n\nclass GymMuJoInvertedDoublePendulumConfig(GymMuJoCoPPOConfig):\n\n    SENSORS = [\n        GymMuJoCoSensor(\n            gym_env_name=\"InvertedDoublePendulum-v2\", uuid=\"gym_mujoco_data\"\n        ),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        action_space = gym.spaces.Box(-1.0, 1.0, (1,), \"float32\")\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=action_space,  # specific action_space\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"InvertedDoublePendulum-v2\", **kwargs)\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"InvertedDoublePendulum-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"Gym-MuJoCo-InvertedDoublePendulum-v2-PPO\"\n"
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/gym_mujoco_invertedpendulum_ddppo.py",
    "content": "from typing import Dict, List, Any\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\n\nfrom projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig\n\n\nclass GymMuJoCoInvertedPendulumConfig(GymMuJoCoPPOConfig):\n\n    SENSORS = [\n        GymMuJoCoSensor(gym_env_name=\"InvertedPendulum-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        action_space = gym.spaces.Box(-3.0, 3.0, (1,), \"float32\")\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=action_space,  # specific action_space\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"InvertedPendulum-v2\", **kwargs)\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"InvertedPendulum-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"Gym-MuJoCo-InvertedPendulum-v2-PPO\"\n"
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/gym_mujoco_reacher_ddppo.py",
    "content": "from typing import Dict, List, Any\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\n\nfrom projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig\n\n\nclass GymMuJoCoReacherConfig(GymMuJoCoPPOConfig):\n\n    SENSORS = [\n        GymMuJoCoSensor(gym_env_name=\"Reacher-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        action_space = gym.spaces.Box(-1.0, 1.0, (2,), \"float32\")\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=action_space,  # specific action_space\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"Reacher-v2\", **kwargs)\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"Reacher-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"Gym-MuJoCo-Reacher-v2-PPO\"\n"
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/gym_mujoco_swimmer_ddppo.py",
    "content": "from typing import Dict, List, Any\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\n\nfrom projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig\n\n\nclass GymMuJoCoSwimmerConfig(GymMuJoCoPPOConfig):\n\n    SENSORS = [\n        GymMuJoCoSensor(gym_env_name=\"Swimmer-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        action_space = gym.spaces.Box(-1.0, 1.0, (2,), \"float32\")\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=action_space,  # specific action_space\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"Swimmer-v2\", **kwargs)\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"Swimmer-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"Gym-MuJoCo-Swimmer-v2-PPO\"\n"
  },
  {
    "path": "projects/gym_baselines/experiments/mujoco/gym_mujoco_walker2d_ddppo.py",
    "content": "from typing import Dict, List, Any\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\n\nfrom projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig\n\n\nclass GymMuJoCoWalkerConfig(GymMuJoCoPPOConfig):\n\n    SENSORS = [\n        GymMuJoCoSensor(gym_env_name=\"Walker2d-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        action_space = gym.spaces.Box(-1.0, 1.0, (6,), \"float32\")\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=action_space,  # specific action_space\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"Walker2d-v2\", **kwargs)\n\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"Walker2d-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"Gym-MuJoCo-Walker2d-v2-PPO\"\n"
  },
  {
    "path": "projects/gym_baselines/models/__init__.py",
    "content": ""
  },
  {
    "path": "projects/gym_baselines/models/gym_models.py",
    "content": "\"\"\"\nNote: I add this file just for the format consistence with other baselines in the project, so it is just the same as\n`allenact_plugins.gym_models.py` so far. However, if it is in the Gym Robotics, some modification is need.\nFor example, for `state_dim`:\n        if input_uuid == 'gym_robotics_data':\n            # consider that the observation space is Dict for robotics env\n            state_dim = observation_space[self.input_uuid]['observation'].shape[0]\n        else:\n            assert len(observation_space[self.input_uuid].shape) == 1\n            state_dim = observation_space[self.input_uuid].shape[0]\n\"\"\"\n"
  },
  {
    "path": "projects/manipulathor_baselines/__init__.py",
    "content": ""
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/__init__.py",
    "content": ""
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/__init__.py",
    "content": ""
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_base.py",
    "content": "from abc import ABC\nfrom typing import Optional, Sequence, Union\n\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.utils.experiment_utils import Builder\n\n\nclass ArmPointNavBaseConfig(ExperimentConfig, ABC):\n    \"\"\"The base object navigation configuration file.\"\"\"\n\n    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None\n    SENSORS: Optional[Sequence[Sensor]] = None\n\n    STEP_SIZE = 0.25\n    ROTATION_DEGREES = 45.0\n    VISIBILITY_DISTANCE = 1.0\n    STOCHASTIC = False\n\n    CAMERA_WIDTH = 224\n    CAMERA_HEIGHT = 224\n    SCREEN_SIZE = 224\n    MAX_STEPS = 200\n\n    def __init__(self):\n        self.REWARD_CONFIG = {\n            \"step_penalty\": -0.01,\n            \"goal_success_reward\": 10.0,\n            \"pickup_success_reward\": 5.0,\n            \"failed_stop_reward\": 0.0,\n            \"shaping_weight\": 1.0,  # we are not using this\n            \"failed_action_penalty\": -0.03,\n        }\n\n    @classmethod\n    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return tuple()\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_ddppo.py",
    "content": "import torch.optim as optim\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    PipelineStage,\n    TrainingPipeline,\n    LinearDecay,\n)\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import (\n    ArmPointNavBaseConfig,\n)\n\n\nclass ArmPointNavMixInPPOConfig(ArmPointNavBaseConfig):\n    def training_pipeline(self, **kwargs):\n        ppo_steps = int(300000000)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 4\n        num_steps = self.MAX_STEPS\n        save_interval = 500000  # from 50k\n        log_interval = 1000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\"ppo_loss\": PPO(**PPOConfig)},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps)\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_simplegru.py",
    "content": "from typing import Sequence, Union\n\nimport gym\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import Builder\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import (\n    ArmPointNavBaseConfig,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.models.arm_pointnav_models import (\n    ArmPointNavBaselineActorCritic,\n)\n\n\nclass ArmPointNavMixInSimpleGRUConfig(ArmPointNavBaseConfig):\n    TASK_SAMPLER: TaskSampler\n\n    @classmethod\n    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        preprocessors = []\n        return preprocessors\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n\n        return ArmPointNavBaselineActorCritic(\n            action_space=gym.spaces.Discrete(\n                len(cls.TASK_SAMPLER._TASK_TYPE.class_action_names())\n            ),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            hidden_size=512,\n        )\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.py",
    "content": "import platform\nfrom abc import ABC\nfrom math import ceil\nfrom typing import Dict, Any, List, Optional, Sequence\n\nimport gym\nimport numpy as np\nimport torch\n\nfrom allenact.base_abstractions.experiment_config import MachineParams\nfrom allenact.base_abstractions.preprocessor import SensorPreprocessorGraph\nfrom allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import evenly_distribute_count_into_bins\nfrom allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS\nfrom allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (\n    SimpleArmPointNavGeneralSampler,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_viz import (\n    ImageVisualizer,\n    TestMetricLogger,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import (\n    ArmPointNavBaseConfig,\n)\n\n\nclass ArmPointNavThorBaseConfig(ArmPointNavBaseConfig, ABC):\n    \"\"\"The base config for all iTHOR PointNav experiments.\"\"\"\n\n    TASK_SAMPLER = SimpleArmPointNavGeneralSampler\n    VISUALIZE = False\n    if platform.system() == \"Darwin\":\n        VISUALIZE = True\n\n    NUM_PROCESSES: Optional[int] = None\n    TRAIN_GPU_IDS = list(range(torch.cuda.device_count()))\n    SAMPLER_GPU_IDS = TRAIN_GPU_IDS\n    VALID_GPU_IDS = [torch.cuda.device_count() - 1]\n    TEST_GPU_IDS = [torch.cuda.device_count() - 1]\n\n    TRAIN_DATASET_DIR: Optional[str] = None\n    VAL_DATASET_DIR: Optional[str] = None\n\n    CAP_TRAINING = None\n\n    TRAIN_SCENES: Optional[List[str]] = None\n    VAL_SCENES: Optional[List[str]] = None\n    TEST_SCENES: Optional[List[str]] = None\n\n    OBJECT_TYPES: Optional[Sequence[str]] = None\n    VALID_SAMPLES_IN_SCENE = 1\n    TEST_SAMPLES_IN_SCENE = 1\n\n    NUMBER_OF_TEST_PROCESS = 10\n\n    def __init__(self):\n        super().__init__()\n\n        assert (\n            self.CAMERA_WIDTH == 224\n            and self.CAMERA_HEIGHT == 224\n            and self.VISIBILITY_DISTANCE == 1\n            and self.STEP_SIZE == 0.25\n        )\n        self.ENV_ARGS = ENV_ARGS\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        sampler_devices: Sequence[int] = []\n        if mode == \"train\":\n            workers_per_device = 1\n            gpu_ids = (\n                []\n                if not torch.cuda.is_available()\n                else self.TRAIN_GPU_IDS * workers_per_device\n            )\n            nprocesses = (\n                1\n                if not torch.cuda.is_available()\n                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))\n            )\n            sampler_devices = self.SAMPLER_GPU_IDS\n        elif mode == \"valid\":\n            nprocesses = 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS\n        elif mode == \"test\":\n            nprocesses = self.NUMBER_OF_TEST_PROCESS if torch.cuda.is_available() else 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        sensors = [*self.SENSORS]\n        if mode != \"train\":\n            sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)]\n\n        sensor_preprocessor_graph = (\n            SensorPreprocessorGraph(\n                source_observation_spaces=SensorSuite(sensors).observation_spaces,\n                preprocessors=self.preprocessors(),\n            )\n            if mode == \"train\"\n            or (\n                (isinstance(nprocesses, int) and nprocesses > 0)\n                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)\n            )\n            else None\n        )\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=gpu_ids,\n            sampler_devices=(\n                sampler_devices if mode == \"train\" else gpu_ids\n            ),  # ignored with > 1 gpu_ids\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        from datetime import datetime\n\n        now = datetime.now()\n        exp_name_w_time = cls.__name__ + \"_\" + now.strftime(\"%m_%d_%Y_%H_%M_%S_%f\")\n        if cls.VISUALIZE:\n            visualizers = [\n                ImageVisualizer(exp_name=exp_name_w_time),\n                TestMetricLogger(exp_name=exp_name_w_time),\n            ]\n\n            kwargs[\"visualizers\"] = visualizers\n        kwargs[\"objects\"] = cls.OBJECT_TYPES\n        kwargs[\"exp_name\"] = exp_name_w_time\n        return cls.TASK_SAMPLER(**kwargs)\n\n    @staticmethod\n    def _partition_inds(n: int, num_parts: int):\n        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(\n            np.int32\n        )\n\n    def _get_sampler_args_for_scene_split(\n        self,\n        scenes: List[str],\n        process_ind: int,\n        total_processes: int,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        if total_processes > len(scenes):  # oversample some scenes -> bias\n            if total_processes % len(scenes) != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisible by the number of scenes\"\n                )\n            scenes = scenes * int(ceil(total_processes / len(scenes)))\n            scenes = scenes[: total_processes * (len(scenes) // total_processes)]\n        else:\n            if len(scenes) % total_processes != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisor of the number of scenes\"\n                )\n        inds = self._partition_inds(len(scenes), total_processes)\n\n        return {\n            \"scenes\": scenes[inds[process_ind] : inds[process_ind + 1]],\n            \"env_args\": self.ENV_ARGS,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(\n                len(self.TASK_SAMPLER._TASK_TYPE.class_action_names())\n            ),\n            \"seed\": seeds[process_ind] if seeds is not None else None,\n            \"deterministic_cudnn\": deterministic_cudnn,\n            \"rewards_config\": self.REWARD_CONFIG,\n        }\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            self.TRAIN_SCENES,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_period\"] = \"manual\"\n        res[\"sampler_mode\"] = \"train\"\n        res[\"cap_training\"] = self.CAP_TRAINING\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)]) if len(devices) > 0 else None\n        )\n        return res\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]],\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            self.VALID_SCENES,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_period\"] = self.VALID_SAMPLES_IN_SCENE\n        res[\"sampler_mode\"] = \"val\"\n        res[\"cap_training\"] = self.CAP_TRAINING\n        res[\"max_tasks\"] = self.VALID_SAMPLES_IN_SCENE * len(res[\"scenes\"])\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)]) if len(devices) > 0 else None\n        )\n        return res\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]],\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            self.TEST_SCENES,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_period\"] = self.TEST_SAMPLES_IN_SCENE\n        res[\"sampler_mode\"] = \"test\"\n        res[\"env_args\"] = {}\n        res[\"cap_training\"] = self.CAP_TRAINING\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)]) if len(devices) > 0 else None\n        )\n        return res\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/__init__.py",
    "content": ""
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_depth.py",
    "content": "from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS\nfrom allenact_plugins.manipulathor_plugin.manipulathor_sensors import (\n    DepthSensorThor,\n    RelativeAgentArmToObjectSensor,\n    RelativeObjectToGoalSensor,\n    PickedUpObjSensor,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (\n    ArmPointNavTaskSampler,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (\n    ArmPointNavMixInPPOConfig,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (\n    ArmPointNavMixInSimpleGRUConfig,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (\n    ArmPointNaviThorBaseConfig,\n)\n\n\nclass ArmPointNavDepth(\n    ArmPointNaviThorBaseConfig,\n    ArmPointNavMixInPPOConfig,\n    ArmPointNavMixInSimpleGRUConfig,\n):\n    \"\"\"An Object Navigation experiment configuration in iThor with RGB\n    input.\"\"\"\n\n    SENSORS = [\n        DepthSensorThor(\n            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        RelativeAgentArmToObjectSensor(),\n        RelativeObjectToGoalSensor(),\n        PickedUpObjSensor(),\n    ]\n\n    MAX_STEPS = 200\n    TASK_SAMPLER = ArmPointNavTaskSampler\n\n    def __init__(self):\n        super().__init__()\n\n        assert (\n            self.CAMERA_WIDTH == 224\n            and self.CAMERA_HEIGHT == 224\n            and self.VISIBILITY_DISTANCE == 1\n            and self.STEP_SIZE == 0.25\n        )\n        self.ENV_ARGS = {**ENV_ARGS, \"renderDepthImage\": True}\n\n    @classmethod\n    def tag(cls):\n        return cls.__name__\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_disjoint_depth.py",
    "content": "import gym\nimport torch.nn as nn\n\nfrom allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS\nfrom allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (\n    ArmPointNavTaskSampler,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_depth import (\n    ArmPointNavDepth,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.models.disjoint_arm_pointnav_models import (\n    DisjointArmPointNavBaselineActorCritic,\n)\n\n\nclass ArmPointNavDisjointDepth(ArmPointNavDepth):\n    \"\"\"An Object Navigation experiment configuration in iThor with RGB\n    input.\"\"\"\n\n    TASK_SAMPLER = ArmPointNavTaskSampler\n\n    def __init__(self):\n        super().__init__()\n\n        assert (\n            self.CAMERA_WIDTH == 224\n            and self.CAMERA_HEIGHT == 224\n            and self.VISIBILITY_DISTANCE == 1\n            and self.STEP_SIZE == 0.25\n        )\n        self.ENV_ARGS = {**ENV_ARGS, \"renderDepthImage\": True}\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return DisjointArmPointNavBaselineActorCritic(\n            action_space=gym.spaces.Discrete(\n                len(cls.TASK_SAMPLER._TASK_TYPE.class_action_names())\n            ),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            hidden_size=512,\n        )\n\n    @classmethod\n    def tag(cls):\n        return cls.__name__\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_ithor_base.py",
    "content": "from abc import ABC\n\nfrom allenact_plugins.manipulathor_plugin.armpointnav_constants import (\n    TRAIN_OBJECTS,\n    TEST_OBJECTS,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_thor_base import (\n    ArmPointNavThorBaseConfig,\n)\n\n\nclass ArmPointNaviThorBaseConfig(ArmPointNavThorBaseConfig, ABC):\n    \"\"\"The base config for all iTHOR ObjectNav experiments.\"\"\"\n\n    NUM_PROCESSES = 40\n    # add all the arguments here\n    TOTAL_NUMBER_SCENES = 30\n\n    TRAIN_SCENES = [\n        \"FloorPlan{}_physics\".format(str(i))\n        for i in range(1, TOTAL_NUMBER_SCENES + 1)\n        if (i % 3 == 1 or i % 3 == 0) and i != 28\n    ]  # last scenes are really bad\n    TEST_SCENES = [\n        \"FloorPlan{}_physics\".format(str(i))\n        for i in range(1, TOTAL_NUMBER_SCENES + 1)\n        if i % 3 == 2 and i % 6 == 2\n    ]\n    VALID_SCENES = [\n        \"FloorPlan{}_physics\".format(str(i))\n        for i in range(1, TOTAL_NUMBER_SCENES + 1)\n        if i % 3 == 2 and i % 6 == 5\n    ]\n\n    ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES\n\n    assert (\n        len(ALL_SCENES) == TOTAL_NUMBER_SCENES - 1\n        and len(set(ALL_SCENES)) == TOTAL_NUMBER_SCENES - 1\n    )\n\n    OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS))\n\n    UNSEEN_OBJECT_TYPES = tuple(sorted(TEST_OBJECTS))\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_no_vision.py",
    "content": "from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS\nfrom allenact_plugins.manipulathor_plugin.manipulathor_sensors import (\n    NoVisionSensorThor,\n    RelativeAgentArmToObjectSensor,\n    RelativeObjectToGoalSensor,\n    PickedUpObjSensor,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (\n    ArmPointNavTaskSampler,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (\n    ArmPointNavMixInPPOConfig,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (\n    ArmPointNavMixInSimpleGRUConfig,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (\n    ArmPointNaviThorBaseConfig,\n)\n\n\nclass ArmPointNavNoVision(\n    ArmPointNaviThorBaseConfig,\n    ArmPointNavMixInPPOConfig,\n    ArmPointNavMixInSimpleGRUConfig,\n):\n    \"\"\"An Object Navigation experiment configuration in iThor with RGB\n    input.\"\"\"\n\n    SENSORS = [\n        NoVisionSensorThor(\n            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=False,\n            uuid=\"rgb_lowres\",\n        ),\n        RelativeAgentArmToObjectSensor(),\n        RelativeObjectToGoalSensor(),\n        PickedUpObjSensor(),\n    ]\n\n    MAX_STEPS = 200\n    TASK_SAMPLER = ArmPointNavTaskSampler  #\n\n    def __init__(self):\n        super().__init__()\n\n        assert (\n            self.CAMERA_WIDTH == 224\n            and self.CAMERA_HEIGHT == 224\n            and self.VISIBILITY_DISTANCE == 1\n            and self.STEP_SIZE == 0.25\n        )\n        self.ENV_ARGS = {**ENV_ARGS, \"renderDepthImage\": False}\n\n    @classmethod\n    def tag(cls):\n        return cls.__name__\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgb.py",
    "content": "from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor\nfrom allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS\nfrom allenact_plugins.manipulathor_plugin.manipulathor_sensors import (\n    RelativeAgentArmToObjectSensor,\n    RelativeObjectToGoalSensor,\n    PickedUpObjSensor,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (\n    ArmPointNavTaskSampler,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (\n    ArmPointNavMixInPPOConfig,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (\n    ArmPointNavMixInSimpleGRUConfig,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (\n    ArmPointNaviThorBaseConfig,\n)\n\n\nclass ArmPointNavRGB(\n    ArmPointNaviThorBaseConfig,\n    ArmPointNavMixInPPOConfig,\n    ArmPointNavMixInSimpleGRUConfig,\n):\n    \"\"\"An Object Navigation experiment configuration in iThor with RGB\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        RelativeAgentArmToObjectSensor(),\n        RelativeObjectToGoalSensor(),\n        PickedUpObjSensor(),\n    ]\n\n    MAX_STEPS = 200\n    TASK_SAMPLER = ArmPointNavTaskSampler  #\n\n    def __init__(self):\n        super().__init__()\n\n        assert (\n            self.CAMERA_WIDTH == 224\n            and self.CAMERA_HEIGHT == 224\n            and self.VISIBILITY_DISTANCE == 1\n            and self.STEP_SIZE == 0.25\n        )\n        self.ENV_ARGS = {**ENV_ARGS}\n\n    @classmethod\n    def tag(cls):\n        return cls.__name__\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgbdepth.py",
    "content": "from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor\nfrom allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS\nfrom allenact_plugins.manipulathor_plugin.manipulathor_sensors import (\n    DepthSensorThor,\n    RelativeAgentArmToObjectSensor,\n    RelativeObjectToGoalSensor,\n    PickedUpObjSensor,\n)\nfrom allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import (\n    ArmPointNavTaskSampler,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import (\n    ArmPointNavMixInPPOConfig,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import (\n    ArmPointNavMixInSimpleGRUConfig,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import (\n    ArmPointNaviThorBaseConfig,\n)\n\n\nclass ArmPointNavRGBDepth(\n    ArmPointNaviThorBaseConfig,\n    ArmPointNavMixInPPOConfig,\n    ArmPointNavMixInSimpleGRUConfig,\n):\n    \"\"\"An Object Navigation experiment configuration in iThor with RGB\n    input.\"\"\"\n\n    SENSORS = [\n        DepthSensorThor(\n            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        RGBSensorThor(\n            height=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            width=ArmPointNaviThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        RelativeAgentArmToObjectSensor(),\n        RelativeObjectToGoalSensor(),\n        PickedUpObjSensor(),\n    ]\n\n    MAX_STEPS = 200\n    TASK_SAMPLER = ArmPointNavTaskSampler  #\n\n    def __init__(self):\n        super().__init__()\n\n        assert (\n            self.CAMERA_WIDTH == 224\n            and self.CAMERA_HEIGHT == 224\n            and self.VISIBILITY_DISTANCE == 1\n            and self.STEP_SIZE == 0.25\n        )\n        self.ENV_ARGS = {**ENV_ARGS, \"renderDepthImage\": True}\n\n    @classmethod\n    def tag(cls):\n        return cls.__name__\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/models/__init__.py",
    "content": ""
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.py",
    "content": "\"\"\"Baseline models for use in the Arm Point Navigation task.\n\nArm Point Navigation is currently available as a Task in ManipulaTHOR.\n\"\"\"\n\nfrom typing import Tuple, Optional\n\nimport gym\nimport torch\nfrom gym.spaces.dict import Dict as SpaceDict\n\nfrom allenact.algorithms.onpolicy_sync.policy import (\n    ActorCriticModel,\n    LinearCriticHead,\n    LinearActorHead,\n    DistributionType,\n    Memory,\n    ObservationType,\n)\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput\nfrom allenact.embodiedai.models.basic_models import SimpleCNN, RNNStateEncoder\nfrom projects.manipulathor_baselines.armpointnav_baselines.models.manipulathor_net_utils import (\n    input_embedding_net,\n)\n\n\nclass ArmPointNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]):\n    \"\"\"Baseline recurrent actor critic model for armpointnav task.\n\n    # Attributes\n    action_space : The space of actions available to the agent. Currently only discrete\n        actions are allowed (so this space will always be of type `gym.spaces.Discrete`).\n    observation_space : The observation space expected by the agent. This observation space\n        should include (optionally) 'rgb' images and 'depth' images.\n    hidden_size : The hidden size of the GRU RNN.\n    object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal\n        object type.\n    \"\"\"\n\n    def __init__(\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        hidden_size=512,\n        obj_state_embedding_size=512,\n        trainable_masked_hidden_state: bool = False,\n        num_rnn_layers=1,\n        rnn_type=\"GRU\",\n    ):\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(action_space=action_space, observation_space=observation_space)\n\n        self._hidden_size = hidden_size\n        self.object_type_embedding_size = obj_state_embedding_size\n\n        sensor_names = self.observation_space.spaces.keys()\n        self.visual_encoder = SimpleCNN(\n            self.observation_space,\n            self._hidden_size,\n            rgb_uuid=\"rgb_lowres\" if \"rgb_lowres\" in sensor_names else None,\n            depth_uuid=\"depth_lowres\" if \"depth_lowres\" in sensor_names else None,\n        )\n\n        if \"rgb_lowres\" in sensor_names and \"depth_lowres\" in sensor_names:\n            input_visual_feature_num = 2\n        elif \"rgb_lowres\" in sensor_names:\n            input_visual_feature_num = 1\n        elif \"depth_lowres\" in sensor_names:\n            input_visual_feature_num = 1\n        else:\n            raise NotImplementedError\n\n        self.state_encoder = RNNStateEncoder(\n            self._hidden_size * input_visual_feature_num + obj_state_embedding_size,\n            self._hidden_size,\n            trainable_masked_hidden_state=trainable_masked_hidden_state,\n            num_layers=num_rnn_layers,\n            rnn_type=rnn_type,\n        )\n\n        self.actor = LinearActorHead(self._hidden_size, action_space.n)\n        self.critic = LinearCriticHead(self._hidden_size)\n        relative_dist_embedding_size = torch.Tensor([3, 100, obj_state_embedding_size])\n        self.relative_dist_embedding = input_embedding_net(\n            relative_dist_embedding_size.long().tolist(), dropout=0\n        )\n\n        self.train()\n\n    @property\n    def recurrent_hidden_state_size(self) -> int:\n        \"\"\"The recurrent hidden state size of the model.\"\"\"\n        return self._hidden_size\n\n    @property\n    def num_recurrent_layers(self) -> int:\n        \"\"\"Number of recurrent hidden layers.\"\"\"\n        return self.state_encoder.num_recurrent_layers\n\n    def _recurrent_memory_specification(self):\n        return dict(\n            rnn=(\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n        )\n\n    def get_relative_distance_embedding(\n        self, state_tensor: torch.Tensor\n    ) -> torch.FloatTensor:\n\n        return self.relative_dist_embedding(state_tensor)\n\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        \"\"\"Processes input batched observations to produce new actor and critic\n        values. Processes input batched observations (along with prior hidden\n        states, previous actions, and masks denoting which recurrent hidden\n        states should be masked) and returns an `ActorCriticOutput` object\n        containing the model's policy (distribution over actions) and\n        evaluation of the current state (value).\n\n        # Parameters\n        observations : Batched input observations.\n        memory : `Memory` containing the hidden states from initial timepoints.\n        prev_actions : Tensor of previous actions taken.\n        masks : Masks applied to hidden states. See `RNNStateEncoder`.\n        # Returns\n        Tuple of the `ActorCriticOutput` and recurrent hidden state.\n        \"\"\"\n\n        arm2obj_dist = self.get_relative_distance_embedding(\n            observations[\"relative_agent_arm_to_obj\"]\n        )\n        obj2goal_dist = self.get_relative_distance_embedding(\n            observations[\"relative_obj_to_goal\"]\n        )\n\n        perception_embed = self.visual_encoder(observations)\n\n        pickup_bool = observations[\"pickedup_object\"]\n        after_pickup = pickup_bool == 1\n        distances = arm2obj_dist\n        distances[after_pickup] = obj2goal_dist[after_pickup]\n\n        x = [distances, perception_embed]\n\n        x_cat = torch.cat(x, dim=-1)\n        x_out, rnn_hidden_states = self.state_encoder(\n            x_cat, memory.tensor(\"rnn\"), masks\n        )\n\n        actor_out = self.actor(x_out)\n        critic_out = self.critic(x_out)\n        actor_critic_output = ActorCriticOutput(\n            distributions=actor_out, values=critic_out, extras={}\n        )\n\n        updated_memory = memory.set_tensor(\"rnn\", rnn_hidden_states)\n\n        return (\n            actor_critic_output,\n            updated_memory,\n        )\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/models/base_models.py",
    "content": "import torch\nimport torch.nn as nn\n\n\nclass LinearActorHeadNoCategory(nn.Module):\n    def __init__(self, num_inputs: int, num_outputs: int):\n        super().__init__()\n\n        self.linear = nn.Linear(num_inputs, num_outputs)\n        nn.init.orthogonal_(self.linear.weight, gain=0.01)\n        nn.init.constant_(self.linear.bias, 0)\n\n    def forward(self, x: torch.FloatTensor):  # type: ignore\n        x = self.linear(x)  # type:ignore\n        assert len(x.shape) == 3\n        return x\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.py",
    "content": "\"\"\"Baseline models for use in the Arm Point Navigation task.\n\nArm Point Navigation is currently available as a Task in ManipulaTHOR.\n\"\"\"\n\nfrom typing import Tuple, Optional\n\nimport gym\nimport torch\nfrom gym.spaces.dict import Dict as SpaceDict\n\nfrom allenact.algorithms.onpolicy_sync.policy import (\n    ActorCriticModel,\n    LinearCriticHead,\n    DistributionType,\n    Memory,\n    ObservationType,\n)\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import ActorCriticOutput\nfrom allenact.embodiedai.models.basic_models import SimpleCNN, RNNStateEncoder\nfrom projects.manipulathor_baselines.armpointnav_baselines.models.base_models import (\n    LinearActorHeadNoCategory,\n)\nfrom projects.manipulathor_baselines.armpointnav_baselines.models.manipulathor_net_utils import (\n    input_embedding_net,\n)\n\n\nclass DisjointArmPointNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]):\n    \"\"\"Disjoint Baseline recurrent actor critic model for armpointnav.\n\n    # Attributes\n    action_space : The space of actions available to the agent. Currently only discrete\n        actions are allowed (so this space will always be of type `gym.spaces.Discrete`).\n    observation_space : The observation space expected by the agent. This observation space\n        should include (optionally) 'rgb' images and 'depth' images and is required to\n        have a component corresponding to the goal `goal_sensor_uuid`.\n    goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor`\n        as an example of such a sensor.\n    hidden_size : The hidden size of the GRU RNN.\n    object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal\n        object type.\n    \"\"\"\n\n    def __init__(\n        self,\n        action_space: gym.spaces.Discrete,\n        observation_space: SpaceDict,\n        hidden_size=512,\n        obj_state_embedding_size=512,\n        trainable_masked_hidden_state: bool = False,\n        num_rnn_layers=1,\n        rnn_type=\"GRU\",\n    ):\n        \"\"\"Initializer.\n\n        See class documentation for parameter definitions.\n        \"\"\"\n        super().__init__(action_space=action_space, observation_space=observation_space)\n\n        self._hidden_size = hidden_size\n        self.object_type_embedding_size = obj_state_embedding_size\n\n        self.visual_encoder_pick = SimpleCNN(\n            self.observation_space,\n            self._hidden_size,\n            rgb_uuid=None,\n            depth_uuid=\"depth_lowres\",\n        )\n        self.visual_encoder_drop = SimpleCNN(\n            self.observation_space,\n            self._hidden_size,\n            rgb_uuid=None,\n            depth_uuid=\"depth_lowres\",\n        )\n\n        self.state_encoder = RNNStateEncoder(\n            self._hidden_size + obj_state_embedding_size,\n            self._hidden_size,\n            trainable_masked_hidden_state=trainable_masked_hidden_state,\n            num_layers=num_rnn_layers,\n            rnn_type=rnn_type,\n        )\n\n        self.actor_pick = LinearActorHeadNoCategory(self._hidden_size, action_space.n)\n        self.critic_pick = LinearCriticHead(self._hidden_size)\n        self.actor_drop = LinearActorHeadNoCategory(self._hidden_size, action_space.n)\n        self.critic_drop = LinearCriticHead(self._hidden_size)\n\n        # self.object_state_embedding = nn.Embedding(num_embeddings=6, embedding_dim=obj_state_embedding_size)\n\n        relative_dist_embedding_size = torch.Tensor([3, 100, obj_state_embedding_size])\n        self.relative_dist_embedding_pick = input_embedding_net(\n            relative_dist_embedding_size.long().tolist(), dropout=0\n        )\n        self.relative_dist_embedding_drop = input_embedding_net(\n            relative_dist_embedding_size.long().tolist(), dropout=0\n        )\n\n        self.train()\n\n    @property\n    def recurrent_hidden_state_size(self) -> int:\n        \"\"\"The recurrent hidden state size of the model.\"\"\"\n        return self._hidden_size\n\n    @property\n    def num_recurrent_layers(self) -> int:\n        \"\"\"Number of recurrent hidden layers.\"\"\"\n        return self.state_encoder.num_recurrent_layers\n\n    def _recurrent_memory_specification(self):\n        return dict(\n            rnn=(\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n        )\n\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        \"\"\"Processes input batched observations to produce new actor and critic\n        values. Processes input batched observations (along with prior hidden\n        states, previous actions, and masks denoting which recurrent hidden\n        states should be masked) and returns an `ActorCriticOutput` object\n        containing the model's policy (distribution over actions) and\n        evaluation of the current state (value).\n\n        # Parameters\n        observations : Batched input observations.\n        memory : `Memory` containing the hidden states from initial timepoints.\n        prev_actions : Tensor of previous actions taken.\n        masks : Masks applied to hidden states. See `RNNStateEncoder`.\n        # Returns\n        Tuple of the `ActorCriticOutput` and recurrent hidden state.\n        \"\"\"\n\n        arm2obj_dist = self.relative_dist_embedding_pick(\n            observations[\"relative_agent_arm_to_obj\"]\n        )\n        obj2goal_dist = self.relative_dist_embedding_drop(\n            observations[\"relative_obj_to_goal\"]\n        )\n\n        perception_embed_pick = self.visual_encoder_pick(observations)\n        perception_embed_drop = self.visual_encoder_drop(observations)\n\n        pickup_bool = observations[\"pickedup_object\"]\n        after_pickup = pickup_bool == 1\n        distances = arm2obj_dist\n        distances[after_pickup] = obj2goal_dist[after_pickup]\n\n        perception_embed = perception_embed_pick\n        perception_embed[after_pickup] = perception_embed_drop[after_pickup]\n\n        x = [distances, perception_embed]\n\n        x_cat = torch.cat(x, dim=-1)  # type: ignore\n        x_out, rnn_hidden_states = self.state_encoder(\n            x_cat, memory.tensor(\"rnn\"), masks\n        )\n        actor_out_pick = self.actor_pick(x_out)\n        critic_out_pick = self.critic_pick(x_out)\n\n        actor_out_drop = self.actor_drop(x_out)\n        critic_out_drop = self.critic_drop(x_out)\n\n        actor_out = actor_out_pick\n        actor_out[after_pickup] = actor_out_drop[after_pickup]\n        critic_out = critic_out_pick\n        critic_out[after_pickup] = critic_out_drop[after_pickup]\n\n        actor_out = CategoricalDistr(logits=actor_out)\n        actor_critic_output = ActorCriticOutput(\n            distributions=actor_out, values=critic_out, extras={}\n        )\n        updated_memory = memory.set_tensor(\"rnn\", rnn_hidden_states)\n\n        return (\n            actor_critic_output,\n            updated_memory,\n        )\n"
  },
  {
    "path": "projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.py",
    "content": "import pdb\n\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\ndef upshuffle(\n    in_planes, out_planes, upscale_factor, kernel_size=3, stride=1, padding=1\n):\n    return nn.Sequential(\n        nn.Conv2d(\n            in_planes,\n            out_planes * upscale_factor**2,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n        ),\n        nn.PixelShuffle(upscale_factor),\n        nn.LeakyReLU(),\n    )\n\n\ndef upshufflenorelu(\n    in_planes, out_planes, upscale_factor, kernel_size=3, stride=1, padding=1\n):\n    return nn.Sequential(\n        nn.Conv2d(\n            in_planes,\n            out_planes * upscale_factor**2,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n        ),\n        nn.PixelShuffle(upscale_factor),\n    )\n\n\ndef combine_block_w_bn(in_planes, out_planes):\n    return nn.Sequential(\n        nn.Conv2d(in_planes, out_planes, 1, 1),\n        nn.BatchNorm2d(out_planes),\n        nn.LeakyReLU(),\n    )\n\n\ndef conv2d_block(in_planes, out_planes, kernel_size, stride=1, padding=1):\n    return nn.Sequential(\n        nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride, padding=padding),\n        nn.BatchNorm2d(out_planes),\n        nn.LeakyReLU(),\n        nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1),\n        nn.BatchNorm2d(out_planes),\n    )\n\n\ndef combine_block_w_do(in_planes, out_planes, dropout=0.0):\n    return nn.Sequential(\n        nn.Conv2d(in_planes, out_planes, 1, 1),\n        nn.LeakyReLU(),\n        nn.Dropout(dropout),\n    )\n\n\ndef combine_block_no_do(in_planes, out_planes):\n    return nn.Sequential(\n        nn.Conv2d(in_planes, out_planes, 1, 1),\n        nn.LeakyReLU(),\n    )\n\n\ndef linear_block(in_features, out_features, dropout=0.0):\n    return nn.Sequential(\n        nn.Linear(in_features, out_features),\n        nn.LeakyReLU(),\n        nn.Dropout(dropout),\n    )\n\n\ndef linear_block_norelu(in_features, out_features):\n    return nn.Sequential(\n        nn.Linear(in_features, out_features),\n    )\n\n\ndef input_embedding_net(list_of_feature_sizes, dropout=0.0):\n    modules = []\n    for i in range(len(list_of_feature_sizes) - 1):\n        input_size, output_size = list_of_feature_sizes[i : i + 2]\n        if i + 2 == len(list_of_feature_sizes):\n            modules.append(linear_block_norelu(input_size, output_size))\n        else:\n            modules.append(linear_block(input_size, output_size, dropout=dropout))\n    return nn.Sequential(*modules)\n\n\ndef _upsample_add(x, y):\n    _, _, H, W = y.size()\n    return F.upsample(x, size=(H, W), mode=\"bilinear\") + y\n\n\ndef replace_all_relu_w_leakyrelu(model):\n    pdb.set_trace()\n    print(\"Not sure if using this is a good idea\")\n    modules = model._modules\n    for m in modules.keys():\n        module = modules[m]\n        if isinstance(module, nn.ReLU):\n            model._modules[m] = nn.LeakyReLU()\n        elif isinstance(module, nn.Module):\n            model._modules[m] = replace_all_relu_w_leakyrelu(module)\n    return model\n\n\ndef replace_all_leakyrelu_w_relu(model):\n    modules = model._modules\n    for m in modules.keys():\n        module = modules[m]\n        if isinstance(module, nn.LeakyReLU):\n            model._modules[m] = nn.ReLU()\n        elif isinstance(module, nn.Module):\n            model._modules[m] = replace_all_leakyrelu_w_relu(module)\n    return model\n\n\ndef replace_all_bn_w_groupnorm(model):\n    pdb.set_trace()\n    print(\"Not sure if using this is a good idea\")\n    modules = model._modules\n    for m in modules.keys():\n        module = modules[m]\n        if isinstance(module, nn.BatchNorm2d) or isinstance(module, nn.BatchNorm1d):\n            feature_number = module.num_features\n            model._modules[m] = nn.GroupNorm(32, feature_number)\n        elif isinstance(module, nn.BatchNorm3d):\n            raise Exception(\"Not implemented\")\n        elif isinstance(module, nn.Module):\n            model._modules[m] = replace_all_bn_w_groupnorm(module)\n    return model\n\n\ndef flat_temporal(tensor, batch_size, sequence_length):\n    tensor_shape = [s for s in tensor.shape]\n    assert tensor_shape[0] == batch_size and tensor_shape[1] == sequence_length\n    result_shape = [batch_size * sequence_length] + tensor_shape[2:]\n    return tensor.contiguous().view(result_shape)\n\n\ndef unflat_temporal(tensor, batch_size, sequence_length):\n    tensor_shape = [s for s in tensor.shape]\n    assert tensor_shape[0] == batch_size * sequence_length\n    result_shape = [batch_size, sequence_length] + tensor_shape[1:]\n    return tensor.contiguous().view(result_shape)\n"
  },
  {
    "path": "projects/objectnav_baselines/README.md",
    "content": "# Baseline models ObjectNav (for RoboTHOR/iTHOR)\n\nThis project contains the code for training baseline models for the ObjectNav task. In ObjectNav, the agent\nspawns at a location in an environment and is tasked to explore the environment until it finds an object of a\ncertain type (such as TV or Basketball). Once the agent is confident that it has the object within sight\nit executes the `END` action which terminates the episode. If the agent is within a set\ndistance to the target (in our case 1.0 meters) and the target is visible within its observation frame\nthe agent succeeded, otherwise it failed.\n\nProvided are experiment configs for training a simple convolutional model with\nan GRU using `RGB`, `Depth` or `RGB-D` (i.e. `RGB+Depth`) as inputs in\n[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).\n\nThe experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)\nReinforcement Learning Algorithm. For the RoboTHOR environment we also have and experiment\n(`objectnav_robothor_rgb_resnetgru_dagger.py`) showing how a model can be trained using DAgger,\na form of imitation learning.\n\nTo train an experiment run the following command from the `allenact` root directory:\n\n```bash\npython main.py <PATH_TO_EXPERIMENT_CONFIG> -o <PATH_TO_OUTPUT> -c\n```\n\nWhere `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights\nand logs to be stored and `<PATH_TO_EXPERIMENT_CONFIG>` is the path to the python file containing\nthe experiment configuration. An example usage of this command would be:\n\n```bash\npython main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet_ddppo.py -o storage/objectnav-robothor-rgb\n```\n\nThis trains a simple convolutional neural network with a GRU using RGB input \npassed through a pretrained ResNet-18 visual encoder on the\nPointNav task in the RoboTHOR environment and stores the model weights and logs\nto `storage/pointnav-robothor-rgb`.\n\n## RoboTHOR ObjectNav 2021 Challenge\n\nThe experiment configs found under the `projects/objectnav_baselines/experiments/robothor` directory are designed\nto conform to the requirements of the [RoboTHOR ObjectNav 2021 Challenge](https://ai2thor.allenai.org/robothor/cvpr-2021-challenge).\n\n### Training a baseline\nTo train a baseline ResNet->GRU model taking RGB-D inputs, run the following command\n```bash\npython main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet_ddppo.py -o storage/objectnav-robothor-rgbd\n```\nBy default, when using a machine with a GPU, the above experiment will attempt to train using 60 parallel processes\nacross all available GPUs. See the `TRAIN_GPU_IDS` constant in `experiments/objectnav_thor_base.py` and\nthe `NUM_PROCESSES` constant in `experiments/robothor/objectnav_robothor_base.py` if you'd like to change which\nGPUs are used or how many processes are run respectively.\n\n### Downloading our pretrained model checkpoint\nWe provide a pretrained model obtained allowing the above command to run for all 300M training steps and then selecting\nthe model checkpoint with best validation-set performance (for us occuring at ~170M training steps). You can download \nthis model checkpoint by running\n```bash\nbash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021\n```\nfrom the top-level directory. This will download the pretrained model weights and save them at the path\n```bash\npretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt\n```\n\n### Running inference on the pretrained model\n\nYou can run inference on the above pretrained model (on the test dataset) by running\n```bash\nexport SAVED_MODEL_PATH=pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt\npython main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.py -c $SAVED_MODEL_PATH --eval\n```\nTo discourage \"cheating\", the test dataset has been scrubbed of the information needed to actually compute the success rate / SPL\nof your model and so running the above will only save the trajectories your models take. To evaluate these\ntrajectories you will have to submit them to our leaderboard, see [here for more details](https://github.com/allenai/robothor-challenge/).\nIf you'd like to get a sense of if your model is doing well before submitting to the leaderboard, you can obtain the \nsuccess rate / SPL of it on our validation dataset. To do this, you can simply comment-out the line\n```python\n    TEST_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-objectnav/test\")\n```\nwithin the `projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py` file and rerun the above\n`python main.py ...` command (when the test dataset is not given, the code defaults to using the validation set)."
  },
  {
    "path": "projects/objectnav_baselines/__init__.py",
    "content": ""
  },
  {
    "path": "projects/objectnav_baselines/experiments/__init__.py",
    "content": ""
  },
  {
    "path": "projects/objectnav_baselines/experiments/clip/__init__.py",
    "content": ""
  },
  {
    "path": "projects/objectnav_baselines/experiments/clip/mixins.py",
    "content": "from typing import Sequence, Union, Type, Tuple, Optional, Dict, Any\n\nimport attr\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.distributions import CategoricalDistr\nfrom allenact.base_abstractions.misc import (\n    ObservationType,\n    Memory,\n    ActorCriticOutput,\n    DistributionType,\n)\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor\nfrom allenact.utils.experiment_utils import Builder\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor\nfrom allenact_plugins.navigation_plugin.objectnav.models import (\n    ResnetTensorNavActorCritic,\n)\n\n\nclass LookDownFirstResnetTensorNavActorCritic(ResnetTensorNavActorCritic):\n    def __init__(self, look_down_action_index: int, **kwargs):\n        super().__init__(**kwargs)\n\n        self.look_down_action_index = look_down_action_index\n        self.register_buffer(\n            \"look_down_delta\", torch.zeros(1, 1, self.action_space.n), persistent=False\n        )\n        self.look_down_delta[0, 0, self.look_down_action_index] = 99999\n\n    def forward(  # type:ignore\n        self,\n        observations: ObservationType,\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        ac_out, memory = super(LookDownFirstResnetTensorNavActorCritic, self).forward(\n            **prepare_locals_for_super(locals())\n        )\n\n        logits = ac_out.distributions.logits * masks + self.look_down_delta * (\n            1 - masks\n        )\n        ac_out = ActorCriticOutput(\n            distributions=CategoricalDistr(logits=logits),\n            values=ac_out.values,\n            extras=ac_out.extras,\n        )\n\n        return ac_out, memory\n\n\n@attr.s(kw_only=True)\nclass ClipResNetPreprocessGRUActorCriticMixin:\n    sensors: Sequence[Sensor] = attr.ib()\n    clip_model_type: str = attr.ib()\n    screen_size: int = attr.ib()\n    goal_sensor_type: Type[Optional[Sensor]] = attr.ib()\n    pool: bool = attr.ib(default=False)\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        preprocessors = []\n\n        rgb_sensor = next((s for s in self.sensors if isinstance(s, RGBSensor)), None)\n        assert (\n            np.linalg.norm(\n                np.array(rgb_sensor._norm_means)\n                - np.array(ClipResNetPreprocessor.CLIP_RGB_MEANS)\n            )\n            < 1e-5\n        )\n        assert (\n            np.linalg.norm(\n                np.array(rgb_sensor._norm_sds)\n                - np.array(ClipResNetPreprocessor.CLIP_RGB_STDS)\n            )\n            < 1e-5\n        )\n\n        if rgb_sensor is not None:\n            preprocessors.append(\n                ClipResNetPreprocessor(\n                    rgb_input_uuid=rgb_sensor.uuid,\n                    clip_model_type=self.clip_model_type,\n                    pool=self.pool,\n                    output_uuid=\"rgb_clip_resnet\",\n                    input_img_height_width=(rgb_sensor.height, rgb_sensor.width),\n                )\n            )\n\n        depth_sensor = next(\n            (s for s in self.sensors if isinstance(s, DepthSensor)), None\n        )\n        if depth_sensor is not None:\n            preprocessors.append(\n                ClipResNetPreprocessor(\n                    rgb_input_uuid=depth_sensor.uuid,\n                    clip_model_type=self.clip_model_type,\n                    pool=self.pool,\n                    output_uuid=\"depth_clip_resnet\",\n                    input_img_height_width=(depth_sensor.height, depth_sensor.width),\n                )\n            )\n\n        return preprocessors\n\n    def create_model(\n        self,\n        num_actions: int,\n        add_prev_actions: bool,\n        look_down_first: bool = False,\n        look_down_action_index: Optional[int] = None,\n        hidden_size: int = 512,\n        rnn_type=\"GRU\",\n        model_kwargs: Optional[Dict[str, Any]] = None,\n        **kwargs\n    ) -> nn.Module:\n        has_rgb = any(isinstance(s, RGBSensor) for s in self.sensors)\n        has_depth = any(isinstance(s, DepthSensor) for s in self.sensors)\n\n        goal_sensor_uuid = next(\n            (s.uuid for s in self.sensors if isinstance(s, self.goal_sensor_type)),\n            None,\n        )\n\n        if model_kwargs is None:\n            model_kwargs = {}\n\n        model_kwargs = dict(\n            action_space=gym.spaces.Discrete(num_actions),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            goal_sensor_uuid=goal_sensor_uuid,\n            rgb_resnet_preprocessor_uuid=\"rgb_clip_resnet\" if has_rgb else None,\n            depth_resnet_preprocessor_uuid=\"depth_clip_resnet\" if has_depth else None,\n            hidden_size=hidden_size,\n            goal_dims=32,\n            add_prev_actions=add_prev_actions,\n            rnn_type=rnn_type,\n            **model_kwargs\n        )\n\n        if not look_down_first:\n            return ResnetTensorNavActorCritic(**model_kwargs)\n        else:\n            return LookDownFirstResnetTensorNavActorCritic(\n                look_down_action_index=look_down_action_index, **model_kwargs\n            )\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/habitat/__init__.py",
    "content": ""
  },
  {
    "path": "projects/objectnav_baselines/experiments/habitat/clip/__init__.py",
    "content": ""
  },
  {
    "path": "projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\nfrom torch.distributions.utils import lazy_property\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor\nfrom allenact_plugins.habitat_plugin.habitat_sensors import (\n    RGBSensorHabitat,\n    TargetObjectSensorHabitat,\n)\nfrom projects.objectnav_baselines.experiments.clip.mixins import (\n    ClipResNetPreprocessGRUActorCriticMixin,\n)\nfrom projects.objectnav_baselines.experiments.habitat.objectnav_habitat_base import (\n    ObjectNavHabitatBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import ObjectNavPPOMixin\n\n\nclass ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig(\n    ObjectNavHabitatBaseConfig\n):\n    \"\"\"An Object Navigation experiment configuration in Habitat.\"\"\"\n\n    CLIP_MODEL_TYPE = \"RN50\"\n\n    def __init__(self, lr: float, **kwargs):\n        super().__init__(**kwargs)\n\n        self.lr = lr\n\n        self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            clip_model_type=self.CLIP_MODEL_TYPE,\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=TargetObjectSensorHabitat,\n        )\n\n    @lazy_property\n    def SENSORS(self):\n        return [\n            RGBSensorHabitat(\n                height=ObjectNavHabitatBaseConfig.SCREEN_SIZE,\n                width=ObjectNavHabitatBaseConfig.SCREEN_SIZE,\n                use_resnet_normalization=True,\n                mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,\n                stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,\n            ),\n            TargetObjectSensorHabitat(len(self.DEFAULT_OBJECT_CATEGORIES_TO_IND)),\n        ]\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            lr=self.lr,\n            auxiliary_uuids=self.auxiliary_uuids,\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n,\n            add_prev_actions=self.add_prev_actions,\n            auxiliary_uuids=self.auxiliary_uuids,\n            **kwargs,\n        )\n\n    def tag(self):\n        return (\n            f\"{super(ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig, self).tag()}\"\n            f\"-RGB-ClipResNet50GRU-DDPPO-lr{self.lr}\"\n        )\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py",
    "content": "import torch\nimport torch.optim as optim\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    TrainingPipeline,\n    PipelineStage,\n    TrainingSettings,\n)\nfrom projects.objectnav_baselines.experiments.habitat.clip.objectnav_habitat_rgb_clipresnet50gru_ddppo import (\n    ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig,\n)\nfrom projects.objectnav_baselines.mixins import update_with_auxiliary_losses\n\n\nclass ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig(\n    ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig\n):\n    def __init__(self, lr=1e-4, **kwargs):\n        super().__init__(lr, **kwargs)\n        self.lr = lr\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        auxiliary_uuids = self.auxiliary_uuids\n        multiple_beliefs = False\n        normalize_advantage = False\n        advance_scene_rollout_period = self.ADVANCE_SCENE_ROLLOUT_PERIOD\n        log_interval_small = (\n            self.num_train_processes * 32 * 10 if torch.cuda.is_available() else 1\n        )\n        log_interval_med = (\n            self.num_train_processes * 64 * 5 if torch.cuda.is_available() else 1\n        )\n        log_interval_large = (\n            self.num_train_processes * 128 * 5 if torch.cuda.is_available() else 1\n        )\n\n        batch_steps_0 = int(10e6)\n        batch_steps_1 = int(10e6)\n        batch_steps_2 = int(1e9) - batch_steps_0 - batch_steps_1\n\n        lr = self.lr\n        num_mini_batch = 1\n        update_repeats = 4\n        save_interval = 5000000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n\n        named_losses = {\n            \"ppo_loss\": (PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0)\n        }\n        named_losses = update_with_auxiliary_losses(\n            named_losses=named_losses,\n            auxiliary_uuids=auxiliary_uuids,\n            multiple_beliefs=multiple_beliefs,\n        )\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            named_losses={key: val[0] for key, val in named_losses.items()},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=advance_scene_rollout_period,\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=list(named_losses.keys()),\n                    max_stage_steps=batch_steps_0,\n                    training_settings=TrainingSettings(\n                        num_steps=32, metric_accumulate_interval=log_interval_small\n                    ),\n                ),\n                PipelineStage(\n                    loss_names=list(named_losses.keys()),\n                    max_stage_steps=batch_steps_1,\n                    training_settings=TrainingSettings(\n                        num_steps=64,\n                        metric_accumulate_interval=log_interval_med,\n                    ),\n                ),\n                PipelineStage(\n                    loss_names=list(named_losses.keys()),\n                    max_stage_steps=batch_steps_2,\n                    training_settings=TrainingSettings(\n                        num_steps=128,\n                        metric_accumulate_interval=log_interval_large,\n                    ),\n                ),\n            ],\n            lr_scheduler_builder=None,\n        )\n\n    def tag(self):\n        return (\n            super(\n                ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig, self\n            )\n            .tag()\n            .replace(\"-DDPPO-lr\", \"-DDPPO-IncRollouts-lr\")\n        )\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py",
    "content": "import glob\nimport math\nimport os\nimport warnings\nfrom abc import ABC\nfrom typing import Dict, Any, List, Optional, Sequence, Union, Tuple\n\nimport gym\nimport numpy as np\nimport torch\nfrom torch.distributions.utils import lazy_property\n\n# noinspection PyUnresolvedReferences\nimport habitat\nfrom allenact.base_abstractions.experiment_config import MachineParams\nfrom allenact.base_abstractions.preprocessor import (\n    SensorPreprocessorGraph,\n    Preprocessor,\n)\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor\nfrom allenact.utils.experiment_utils import evenly_distribute_count_into_bins, Builder\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.habitat_plugin.habitat_constants import (\n    HABITAT_DATASETS_DIR,\n    HABITAT_CONFIGS_DIR,\n    HABITAT_SCENE_DATASETS_DIR,\n)\nfrom allenact_plugins.habitat_plugin.habitat_task_samplers import ObjectNavTaskSampler\nfrom allenact_plugins.habitat_plugin.habitat_tasks import ObjectNavTask\nfrom allenact_plugins.habitat_plugin.habitat_utils import (\n    get_habitat_config,\n    construct_env_configs,\n)\nfrom projects.objectnav_baselines.experiments.objectnav_base import ObjectNavBaseConfig\n\n\ndef create_objectnav_config(\n    config_yaml_path: str,\n    mode: str,\n    scenes_path: str,\n    simulator_gpu_ids: Sequence[int],\n    rotation_degrees: float,\n    step_size: float,\n    max_steps: int,\n    num_processes: int,\n    camera_width: int,\n    camera_height: int,\n    using_rgb: bool,\n    using_depth: bool,\n    training: bool,\n    num_episode_sample: int,\n    horizontal_fov: Optional[int] = None,\n) -> habitat.Config:\n    config = get_habitat_config(config_yaml_path)\n\n    config.defrost()\n    config.NUM_PROCESSES = num_processes\n    config.SIMULATOR_GPU_IDS = simulator_gpu_ids\n    config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR\n\n    config.DATASET.DATA_PATH = scenes_path\n\n    config.SIMULATOR.AGENT_0.SENSORS = []\n    if using_rgb:\n        config.SIMULATOR.AGENT_0.SENSORS.append(\"RGB_SENSOR\")\n    if using_depth:\n        config.SIMULATOR.AGENT_0.SENSORS.append(\"DEPTH_SENSOR\")\n\n    config.SIMULATOR.RGB_SENSOR.WIDTH = camera_width\n    config.SIMULATOR.RGB_SENSOR.HEIGHT = camera_height\n    config.SIMULATOR.DEPTH_SENSOR.WIDTH = camera_width\n    config.SIMULATOR.DEPTH_SENSOR.HEIGHT = camera_height\n    config.SIMULATOR.SEMANTIC_SENSOR.WIDTH = camera_width\n    config.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = camera_height\n\n    if horizontal_fov is not None:\n        config.SIMULATOR.RGB_SENSOR.HFOV = horizontal_fov\n        config.SIMULATOR.DEPTH_SENSOR.HFOV = horizontal_fov\n        config.SIMULATOR.SEMANTIC_SENSOR.HFOV = horizontal_fov\n\n    assert rotation_degrees == config.SIMULATOR.TURN_ANGLE\n    assert step_size == config.SIMULATOR.FORWARD_STEP_SIZE\n    assert max_steps == config.ENVIRONMENT.MAX_EPISODE_STEPS\n    config.SIMULATOR.MAX_EPISODE_STEPS = max_steps\n\n    assert config.TASK.TYPE == \"ObjectNav-v1\"\n\n    assert config.TASK.SUCCESS.SUCCESS_DISTANCE == 0.1\n    assert config.TASK.DISTANCE_TO_GOAL.DISTANCE_TO == \"VIEW_POINTS\"\n\n    config.TASK.SENSORS = [\"OBJECTGOAL_SENSOR\", \"COMPASS_SENSOR\", \"GPS_SENSOR\"]\n    config.TASK.GOAL_SENSOR_UUID = \"objectgoal\"\n    config.TASK.MEASUREMENTS = [\"DISTANCE_TO_GOAL\", \"SUCCESS\", \"SPL\", \"SOFT_SPL\"]\n\n    if not training:\n        config.SEED = 0\n        config.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False\n\n    if num_episode_sample > 0:\n        config.ENVIRONMENT.ITERATOR_OPTIONS.NUM_EPISODE_SAMPLE = num_episode_sample\n\n    config.MODE = mode\n\n    config.freeze()\n\n    return config\n\n\nclass ObjectNavHabitatBaseConfig(ObjectNavBaseConfig, ABC):\n    \"\"\"The base config for all Habitat ObjectNav experiments.\"\"\"\n\n    # selected auxiliary uuids\n    ## if comment all the keys, then it's vanilla DD-PPO\n    _AUXILIARY_UUIDS = [\n        # InverseDynamicsLoss.UUID,\n        # TemporalDistanceLoss.UUID,\n        # CPCA1Loss.UUID,\n        # CPCA4Loss.UUID,\n        # CPCA8Loss.UUID,\n        # CPCA16Loss.UUID,\n    ]\n    MULTIPLE_BELIEFS = False\n    BELIEF_FUSION = (  # choose one\n        None\n        # AttentiveFusion\n        # AverageFusion\n        # SoftmaxFusion\n    )\n\n    FAILED_END_REWARD = -1.0\n\n    ACTION_SPACE = gym.spaces.Discrete(len(ObjectNavTask.class_action_names()))\n\n    DEFAULT_NUM_TRAIN_PROCESSES = (\n        5 * torch.cuda.device_count() if torch.cuda.is_available() else 1\n    )\n    DEFAULT_NUM_TEST_PROCESSES = 11\n\n    DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))\n    DEFAULT_VALID_GPU_IDS = [torch.cuda.device_count() - 1]\n    DEFAULT_TEST_GPU_IDS = tuple(range(torch.cuda.device_count()))\n\n    def __init__(\n        self,\n        scene_dataset: str,  # Should be \"mp3d\" or \"hm3d\"\n        debug: bool = False,\n        num_train_processes: Optional[int] = None,\n        num_test_processes: Optional[int] = None,\n        test_on_validation: bool = False,\n        run_valid: bool = True,\n        train_gpu_ids: Optional[Sequence[int]] = None,\n        val_gpu_ids: Optional[Sequence[int]] = None,\n        test_gpu_ids: Optional[Sequence[int]] = None,\n        add_prev_actions: bool = False,\n        look_constraints: Optional[Tuple[int, int]] = None,\n        **kwargs,\n    ):\n        super().__init__(**kwargs)\n\n        self.scene_dataset = scene_dataset\n        self.debug = debug\n\n        assert look_constraints is None or all(\n            lc in [0, 1, 2, 3] for lc in look_constraints\n        ), \"Look constraints limit the number of times agents can look up/down when starting from the horizon line.\"\n        assert (\n            look_constraints is None or look_constraints[1] > 0\n        ), \"The agent must be allowed to look down from the horizon at least once.\"\n        self.look_constraints = look_constraints\n\n        def v_or_default(v, default):\n            return v if v is not None else default\n\n        self.num_train_processes = v_or_default(\n            num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES\n        )\n        self.num_test_processes = v_or_default(\n            num_test_processes, (10 if torch.cuda.is_available() else 1)\n        )\n        self.test_on_validation = test_on_validation\n        self.run_valid = run_valid\n        self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS)\n        self.val_gpu_ids = v_or_default(\n            val_gpu_ids, self.DEFAULT_VALID_GPU_IDS if run_valid else []\n        )\n        self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)\n        self.add_prev_actions = add_prev_actions\n\n        self.auxiliary_uuids = self._AUXILIARY_UUIDS\n\n    def _create_config(\n        self,\n        mode: str,\n        scenes_path: str,\n        num_processes: int,\n        simulator_gpu_ids: Sequence[int],\n        training: bool = True,\n        num_episode_sample: int = -1,\n    ):\n        return create_objectnav_config(\n            config_yaml_path=self.BASE_CONFIG_YAML_PATH,\n            mode=mode,\n            scenes_path=scenes_path,\n            simulator_gpu_ids=simulator_gpu_ids,\n            rotation_degrees=self.ROTATION_DEGREES,\n            step_size=self.STEP_SIZE,\n            max_steps=self.MAX_STEPS,\n            num_processes=num_processes,\n            camera_width=self.CAMERA_WIDTH,\n            camera_height=self.CAMERA_HEIGHT,\n            horizontal_fov=self.HORIZONTAL_FIELD_OF_VIEW,\n            using_rgb=any(isinstance(s, RGBSensor) for s in self.SENSORS),\n            using_depth=any(isinstance(s, DepthSensor) for s in self.SENSORS),\n            training=training,\n            num_episode_sample=num_episode_sample,\n        )\n\n    @lazy_property\n    def DEFAULT_OBJECT_CATEGORIES_TO_IND(self):\n        if self.scene_dataset == \"mp3d\":\n            return {\n                \"chair\": 0,\n                \"table\": 1,\n                \"picture\": 2,\n                \"cabinet\": 3,\n                \"cushion\": 4,\n                \"sofa\": 5,\n                \"bed\": 6,\n                \"chest_of_drawers\": 7,\n                \"plant\": 8,\n                \"sink\": 9,\n                \"toilet\": 10,\n                \"stool\": 11,\n                \"towel\": 12,\n                \"tv_monitor\": 13,\n                \"shower\": 14,\n                \"bathtub\": 15,\n                \"counter\": 16,\n                \"fireplace\": 17,\n                \"gym_equipment\": 18,\n                \"seating\": 19,\n                \"clothes\": 20,\n            }\n        elif self.scene_dataset == \"hm3d\":\n            return {\n                \"chair\": 0,\n                \"bed\": 1,\n                \"plant\": 2,\n                \"toilet\": 3,\n                \"tv_monitor\": 4,\n                \"sofa\": 5,\n            }\n        else:\n            raise NotImplementedError\n\n    @lazy_property\n    def TASK_DATA_DIR_TEMPLATE(self):\n        return os.path.join(\n            HABITAT_DATASETS_DIR, f\"objectnav/{self.scene_dataset}/v1/{{}}/{{}}.json.gz\"\n        )\n\n    @lazy_property\n    def BASE_CONFIG_YAML_PATH(self):\n        return os.path.join(\n            HABITAT_CONFIGS_DIR, f\"tasks/objectnav_{self.scene_dataset}.yaml\"\n        )\n\n    @lazy_property\n    def TRAIN_CONFIG(self):\n        return self._create_config(\n            mode=\"train\",\n            scenes_path=self.train_scenes_path(),\n            num_processes=self.num_train_processes,\n            simulator_gpu_ids=self.train_gpu_ids,\n            training=True,\n        )\n\n    @lazy_property\n    def VALID_CONFIG(self):\n        return self._create_config(\n            mode=\"validate\",\n            scenes_path=self.valid_scenes_path(),\n            num_processes=1,\n            simulator_gpu_ids=self.val_gpu_ids,\n            training=False,\n            num_episode_sample=200,\n        )\n\n    @lazy_property\n    def TEST_CONFIG(self):\n        return self._create_config(\n            mode=\"validate\",\n            scenes_path=self.test_scenes_path(),\n            num_processes=self.num_test_processes,\n            simulator_gpu_ids=self.test_gpu_ids,\n            training=False,\n        )\n\n    @lazy_property\n    def TRAIN_CONFIGS_PER_PROCESS(self):\n        configs = construct_env_configs(self.TRAIN_CONFIG, allow_scene_repeat=True)\n\n        if len(self.train_gpu_ids) >= 2:\n            scenes_dir = configs[0].DATASET.SCENES_DIR\n            memory_use_per_config = []\n            for config in configs:\n                assert (\n                    len(config.DATASET.CONTENT_SCENES) == 1\n                ), config.DATASET.CONTENT_SCENES\n                scene_name = config.DATASET.CONTENT_SCENES[0]\n\n                paths = glob.glob(\n                    os.path.join(\n                        scenes_dir, self.scene_dataset, \"**\", f\"{scene_name}.*\"\n                    ),\n                    recursive=True,\n                )\n\n                if self.scene_dataset == \"mp3d\":\n                    assert len(paths) == 4\n                else:\n                    assert len(paths) == 2\n\n                memory_use_per_config.append(sum(os.path.getsize(p) for p in paths))\n\n            max_configs_per_device = math.ceil(len(configs) / len(self.train_gpu_ids))\n            mem_per_device = np.array([0.0 for _ in range(len(self.train_gpu_ids))])\n            configs_per_device = [[] for _ in range(len(mem_per_device))]\n            for mem, config in sorted(\n                list(zip(memory_use_per_config, configs)), key=lambda x: x[0]\n            ):\n                ind = int(np.argmin(mem_per_device))\n                config.defrost()\n                config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = self.train_gpu_ids[ind]\n                config.freeze()\n                configs_per_device[ind].append(config)\n\n                mem_per_device[ind] += mem\n                if len(configs_per_device[ind]) >= max_configs_per_device:\n                    mem_per_device[ind] = float(\"inf\")\n\n            configs_per_device.sort(key=lambda x: len(x))\n            configs = sum(configs_per_device, [])\n\n        if self.debug:\n            warnings.warn(\n                \"IN DEBUG MODE, WILL ONLY USE `1LXtFkjw3qL` SCENE IN MP3D OR `1S7LAXRdDqK` scene in HM3D!!!\"\n            )\n            for config in configs:\n                config.defrost()\n                if self.scene_dataset == \"mp3d\":\n                    config.DATASET.CONTENT_SCENES = [\"1LXtFkjw3qL\"]\n                elif self.scene_dataset == \"hm3d\":\n                    config.DATASET.CONTENT_SCENES = [\"1S7LAXRdDqK\"]\n                else:\n                    raise NotImplementedError\n                config.freeze()\n        return configs\n\n    @lazy_property\n    def TEST_CONFIG_PER_PROCESS(self):\n        return construct_env_configs(self.TEST_CONFIG, allow_scene_repeat=False)\n\n    def train_scenes_path(self):\n        return self.TASK_DATA_DIR_TEMPLATE.format(*([\"train\"] * 2))\n\n    def valid_scenes_path(self):\n        return self.TASK_DATA_DIR_TEMPLATE.format(*([\"val\"] * 2))\n\n    def test_scenes_path(self):\n        get_logger().warning(\"Running tests on the validation set!\")\n        return self.TASK_DATA_DIR_TEMPLATE.format(*([\"val\"] * 2))\n        # return self.TASK_DATA_DIR_TEMPLATE.format(*([\"test\"] * 2))\n\n    def tag(self):\n        t = f\"ObjectNav-Habitat-{self.scene_dataset.upper()}\"\n        if self.add_prev_actions:\n            t = f\"{t}-PrevActions\"\n\n        if self.look_constraints is not None:\n            t = f\"{t}-Look{','.join(map(str, self.look_constraints))}\"\n\n        return t\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return tuple()\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        has_gpus = torch.cuda.is_available()\n        if not has_gpus:\n            gpu_ids = []\n            nprocesses = 1\n        elif mode == \"train\":\n            gpu_ids = self.train_gpu_ids\n            nprocesses = self.num_train_processes\n        elif mode == \"valid\":\n            gpu_ids = self.val_gpu_ids\n            nprocesses = 1 if self.run_valid else 0\n        elif mode == \"test\":\n            gpu_ids = self.test_gpu_ids\n            nprocesses = self.num_test_processes\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        if has_gpus:\n            nprocesses = evenly_distribute_count_into_bins(nprocesses, len(gpu_ids))\n\n        sensor_preprocessor_graph = (\n            SensorPreprocessorGraph(\n                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,\n                preprocessors=self.preprocessors(),\n            )\n            if mode == \"train\"\n            or (\n                (isinstance(nprocesses, int) and nprocesses > 0)\n                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)\n            )\n            else None\n        )\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=gpu_ids,\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n\n    def make_sampler_fn(self, **kwargs) -> TaskSampler:\n        return ObjectNavTaskSampler(\n            task_kwargs={\n                \"look_constraints\": self.look_constraints,\n            },\n            **{\"failed_end_reward\": self.FAILED_END_REWARD, **kwargs},  # type: ignore\n        )\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind]\n        return {\n            \"env_config\": config,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": self.ACTION_SPACE,\n        }\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        if total_processes != 1:\n            raise NotImplementedError(\n                \"In validation, `total_processes` must equal 1 for habitat tasks\"\n            )\n        return {\n            \"env_config\": self.VALID_CONFIG,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(\n                len(ObjectNavTask.class_action_names())\n            ),\n        }\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        config = self.TEST_CONFIG_PER_PROCESS[process_ind]\n        return {\n            \"env_config\": config,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(\n                len(ObjectNavTask.class_action_names())\n            ),\n        }\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/ithor/__init__.py",
    "content": ""
  },
  {
    "path": "projects/objectnav_baselines/experiments/ithor/objectnav_ithor_base.py",
    "content": "import os\nfrom abc import ABC\n\nimport torch\n\nfrom projects.objectnav_baselines.experiments.objectnav_thor_base import (\n    ObjectNavThorBaseConfig,\n)\n\n\nclass ObjectNaviThorBaseConfig(ObjectNavThorBaseConfig, ABC):\n    \"\"\"The base config for all iTHOR ObjectNav experiments.\"\"\"\n\n    THOR_COMMIT_ID = \"9549791ce2e7f472063a10abb1fb7664159fec23\"\n    AGENT_MODE = \"default\"\n\n    DEFAULT_NUM_TRAIN_PROCESSES = 40 if torch.cuda.is_available() else 1\n\n    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/ithor-objectnav/train\")\n    VAL_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/ithor-objectnav/val\")\n\n    TARGET_TYPES = tuple(\n        sorted(\n            [\n                \"AlarmClock\",\n                \"Apple\",\n                \"Book\",\n                \"Bowl\",\n                \"Box\",\n                \"Candle\",\n                \"GarbageCan\",\n                \"HousePlant\",\n                \"Laptop\",\n                \"SoapBottle\",\n                \"Television\",\n                \"Toaster\",\n            ],\n        )\n    )\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnet18gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor\nfrom allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor\nfrom projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import (\n    ObjectNaviThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ResNetPreprocessGRUActorCriticMixin,\n    ObjectNavPPOMixin,\n)\n\n\nclass ObjectNaviThorDepthPPOExperimentConfig(ObjectNaviThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in iThor with Depth\n    input.\"\"\"\n\n    SENSORS = (\n        DepthSensorThor(\n            height=ObjectNaviThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNaviThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,\n        ),\n    )\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            resnet_type=\"RN18\",\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n, **kwargs\n        )\n\n    def tag(self):\n        return \"ObjectNav-iTHOR-Depth-ResNet18GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnet18gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    GoalObjectTypeThorSensor,\n    RGBSensorThor,\n)\nfrom projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import (\n    ObjectNaviThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ResNetPreprocessGRUActorCriticMixin,\n    ObjectNavPPOMixin,\n)\n\n\nclass ObjectNaviThorRGBPPOExperimentConfig(ObjectNaviThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in iThor with RGB\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNaviThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNaviThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,\n        ),\n    ]\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            resnet_type=\"RN18\",\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n, **kwargs\n        )\n\n    @classmethod\n    def tag(cls):\n        return \"ObjectNav-iTHOR-RGB-ResNet18GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnet18gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    RGBSensorThor,\n    GoalObjectTypeThorSensor,\n)\nfrom allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor\nfrom projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import (\n    ObjectNaviThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ResNetPreprocessGRUActorCriticMixin,\n    ObjectNavPPOMixin,\n)\n\n\nclass ObjectNaviThorRGBDPPOExperimentConfig(ObjectNaviThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in iTHOR with RGBD\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNaviThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNaviThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        DepthSensorThor(\n            height=ObjectNaviThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNaviThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,\n        ),\n    ]\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            resnet_type=\"RN18\",\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n, **kwargs\n        )\n\n    def tag(self):\n        return \"ObjectNav-iTHOR-RGBD-ResNet18GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/objectnav_base.py",
    "content": "from abc import ABC\nfrom typing import Optional, Sequence, Union\n\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.utils.experiment_utils import Builder\n\n\nclass ObjectNavBaseConfig(ExperimentConfig, ABC):\n    \"\"\"The base object navigation configuration file.\"\"\"\n\n    STEP_SIZE = 0.25\n    ROTATION_DEGREES = 30.0\n    VISIBILITY_DISTANCE = 1.0\n    STOCHASTIC = True\n    HORIZONTAL_FIELD_OF_VIEW = 79\n\n    CAMERA_WIDTH = 400\n    CAMERA_HEIGHT = 300\n    SCREEN_SIZE = 224\n    MAX_STEPS = 500\n\n    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None\n    SENSORS: Sequence[Sensor] = []\n\n    def __init__(self):\n        self.REWARD_CONFIG = {\n            \"step_penalty\": -0.01,\n            \"goal_success_reward\": 10.0,\n            \"failed_stop_reward\": 0.0,\n            \"shaping_weight\": 1.0,\n        }\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return tuple()\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/objectnav_thor_base.py",
    "content": "import glob\nimport os\nimport platform\nfrom abc import ABC\nfrom math import ceil\nfrom typing import Dict, Any, List, Optional, Sequence, Tuple, cast\n\nimport ai2thor\nimport ai2thor.build\nimport gym\nimport numpy as np\nimport torch\nfrom packaging import version\n\nfrom allenact.base_abstractions.experiment_config import MachineParams\nfrom allenact.base_abstractions.preprocessor import SensorPreprocessorGraph\nfrom allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import evenly_distribute_count_into_bins\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.ithor_plugin.ithor_util import (\n    horizontal_to_vertical_fov,\n    get_open_x_displays,\n)\nfrom allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor\nfrom allenact_plugins.robothor_plugin.robothor_task_samplers import (\n    ObjectNavDatasetTaskSampler,\n)\nfrom allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask\nfrom projects.objectnav_baselines.experiments.objectnav_base import ObjectNavBaseConfig\n\nif (\n    ai2thor.__version__ not in [\"0.0.1\", None]\n    and not ai2thor.__version__.startswith(\"0+\")\n    and version.parse(ai2thor.__version__) < version.parse(\"3.2.0\")\n):\n    raise ImportError(\n        \"To run the AI2-THOR ObjectNav baseline experiments you must use\"\n        \" ai2thor version 3.2.0 or higher.\"\n    )\n\nimport ai2thor.platform\n\n\nclass ObjectNavThorBaseConfig(ObjectNavBaseConfig, ABC):\n    \"\"\"The base config for all AI2-THOR ObjectNav experiments.\"\"\"\n\n    DEFAULT_NUM_TRAIN_PROCESSES: Optional[int] = None\n    DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))\n    DEFAULT_VALID_GPU_IDS = (torch.cuda.device_count() - 1,)\n    DEFAULT_TEST_GPU_IDS = (torch.cuda.device_count() - 1,)\n\n    TRAIN_DATASET_DIR: Optional[str] = None\n    VAL_DATASET_DIR: Optional[str] = None\n    TEST_DATASET_DIR: Optional[str] = None\n\n    AGENT_MODE = \"default\"\n\n    TARGET_TYPES: Optional[Sequence[str]] = None\n\n    THOR_COMMIT_ID: Optional[str] = None\n    DEFAULT_THOR_IS_HEADLESS: bool = False\n\n    ACTION_SPACE = gym.spaces.Discrete(len(ObjectNavTask.class_action_names()))\n\n    def __init__(\n        self,\n        num_train_processes: Optional[int] = None,\n        num_test_processes: Optional[int] = None,\n        test_on_validation: bool = False,\n        train_gpu_ids: Optional[Sequence[int]] = None,\n        val_gpu_ids: Optional[Sequence[int]] = None,\n        test_gpu_ids: Optional[Sequence[int]] = None,\n        randomize_train_materials: bool = False,\n        headless: bool = False,\n    ):\n        super().__init__()\n\n        def v_or_default(v, default):\n            return v if v is not None else default\n\n        self.num_train_processes = v_or_default(\n            num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES\n        )\n        self.num_test_processes = v_or_default(\n            num_test_processes, (10 if torch.cuda.is_available() else 1)\n        )\n        self.test_on_validation = test_on_validation\n        self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS)\n        self.val_gpu_ids = v_or_default(val_gpu_ids, self.DEFAULT_VALID_GPU_IDS)\n        self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)\n\n        self.headless = v_or_default(headless, self.DEFAULT_THOR_IS_HEADLESS)\n\n        self.sampler_devices = self.train_gpu_ids\n        self.randomize_train_materials = randomize_train_materials\n\n    def env_args(self):\n        assert self.THOR_COMMIT_ID is not None\n\n        return dict(\n            width=self.CAMERA_WIDTH,\n            height=self.CAMERA_HEIGHT,\n            commit_id=(\n                self.THOR_COMMIT_ID if not self.headless else ai2thor.build.COMMIT_ID\n            ),\n            stochastic=True,\n            continuousMode=True,\n            applyActionNoise=self.STOCHASTIC,\n            rotateStepDegrees=self.ROTATION_DEGREES,\n            visibilityDistance=self.VISIBILITY_DISTANCE,\n            gridSize=self.STEP_SIZE,\n            snapToGrid=False,\n            agentMode=self.AGENT_MODE,\n            fieldOfView=horizontal_to_vertical_fov(\n                horizontal_fov_in_degrees=self.HORIZONTAL_FIELD_OF_VIEW,\n                width=self.CAMERA_WIDTH,\n                height=self.CAMERA_HEIGHT,\n            ),\n            include_private_scenes=False,\n            renderDepthImage=any(isinstance(s, DepthSensorThor) for s in self.SENSORS),\n        )\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        sampler_devices: Sequence[torch.device] = []\n        devices: Sequence[torch.device]\n        if mode == \"train\":\n            workers_per_device = 1\n            devices = (\n                [torch.device(\"cpu\")]\n                if not torch.cuda.is_available()\n                else cast(Tuple, self.train_gpu_ids) * workers_per_device\n            )\n            nprocesses = evenly_distribute_count_into_bins(\n                self.num_train_processes, max(len(devices), 1)\n            )\n            sampler_devices = self.sampler_devices\n        elif mode == \"valid\":\n            nprocesses = 1\n            devices = (\n                [torch.device(\"cpu\")]\n                if not torch.cuda.is_available()\n                else self.val_gpu_ids\n            )\n        elif mode == \"test\":\n            devices = (\n                [torch.device(\"cpu\")]\n                if not torch.cuda.is_available()\n                else self.test_gpu_ids\n            )\n            nprocesses = evenly_distribute_count_into_bins(\n                self.num_test_processes, max(len(devices), 1)\n            )\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        sensors = [*self.SENSORS]\n        if mode != \"train\":\n            sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)]\n\n        sensor_preprocessor_graph = (\n            SensorPreprocessorGraph(\n                source_observation_spaces=SensorSuite(sensors).observation_spaces,\n                preprocessors=self.preprocessors(),\n            )\n            if mode == \"train\"\n            or (\n                (isinstance(nprocesses, int) and nprocesses > 0)\n                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)\n            )\n            else None\n        )\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=devices,\n            sampler_devices=(\n                sampler_devices if mode == \"train\" else devices\n            ),  # ignored with > 1 gpu_ids\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return ObjectNavDatasetTaskSampler(**kwargs)\n\n    @staticmethod\n    def _partition_inds(n: int, num_parts: int):\n        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(\n            np.int32\n        )\n\n    def _get_sampler_args_for_scene_split(\n        self,\n        scenes_dir: str,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]],\n        seeds: Optional[List[int]],\n        deterministic_cudnn: bool,\n        include_expert_sensor: bool = True,\n        allow_oversample: bool = False,\n    ) -> Dict[str, Any]:\n        path = os.path.join(scenes_dir, \"*.json.gz\")\n        scenes = [scene.split(\"/\")[-1].split(\".\")[0] for scene in glob.glob(path)]\n        if len(scenes) == 0:\n            raise RuntimeError(\n                (\n                    \"Could find no scene dataset information in directory {}.\"\n                    \" Are you sure you've downloaded them? \"\n                    \" If not, see https://allenact.org/installation/download-datasets/ information\"\n                    \" on how this can be done.\"\n                ).format(scenes_dir)\n            )\n\n        oversample_warning = (\n            f\"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes}).\"\n            \" You can avoid this by setting a number of workers divisible by the number of scenes\"\n        )\n        if total_processes > len(scenes):  # oversample some scenes -> bias\n            if not allow_oversample:\n                raise RuntimeError(\n                    f\"Cannot have `total_processes > len(scenes)`\"\n                    f\" ({total_processes} > {len(scenes)}) when `allow_oversample` is `False`.\"\n                )\n\n            if total_processes % len(scenes) != 0:\n                get_logger().warning(oversample_warning)\n            scenes = scenes * int(ceil(total_processes / len(scenes)))\n            scenes = scenes[: total_processes * (len(scenes) // total_processes)]\n        elif len(scenes) % total_processes != 0:\n            get_logger().warning(oversample_warning)\n\n        inds = self._partition_inds(len(scenes), total_processes)\n\n        if not self.headless:\n            x_display: Optional[str] = None\n            if platform.system() == \"Linux\":\n                x_displays = get_open_x_displays(throw_error_if_empty=True)\n\n                if len([d for d in devices if d != torch.device(\"cpu\")]) > len(\n                    x_displays\n                ):\n                    get_logger().warning(\n                        f\"More GPU devices found than X-displays (devices: `{x_displays}`, x_displays: `{x_displays}`).\"\n                        f\" This is not necessarily a bad thing but may mean that you're not using GPU memory as\"\n                        f\" efficiently as possible. Consider following the instructions here:\"\n                        f\" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin\"\n                        f\" describing how to start an X-display on every GPU.\"\n                    )\n                x_display = x_displays[process_ind % len(x_displays)]\n\n            device_dict = dict(x_display=x_display)\n        else:\n            device_dict = dict(\n                gpu_device=devices[process_ind % len(devices)],\n                platform=ai2thor.platform.CloudRendering,\n            )\n\n        return {\n            \"scenes\": scenes[inds[process_ind] : inds[process_ind + 1]],\n            \"object_types\": self.TARGET_TYPES,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": [\n                s\n                for s in self.SENSORS\n                if (include_expert_sensor or not isinstance(s, ExpertActionSensor))\n            ],\n            \"action_space\": self.ACTION_SPACE,\n            \"seed\": seeds[process_ind] if seeds is not None else None,\n            \"deterministic_cudnn\": deterministic_cudnn,\n            \"rewards_config\": self.REWARD_CONFIG,\n            \"env_args\": {**self.env_args(), **device_dict},\n        }\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            scenes_dir=os.path.join(self.TRAIN_DATASET_DIR, \"episodes\"),\n            process_ind=process_ind,\n            total_processes=total_processes,\n            devices=devices,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n            allow_oversample=True,\n        )\n        res[\"scene_directory\"] = self.TRAIN_DATASET_DIR\n        res[\"loop_dataset\"] = True\n        res[\"allow_flipping\"] = True\n        res[\"randomize_materials_in_training\"] = self.randomize_train_materials\n        return res\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            scenes_dir=os.path.join(self.VAL_DATASET_DIR, \"episodes\"),\n            process_ind=process_ind,\n            total_processes=total_processes,\n            devices=devices,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n            include_expert_sensor=False,\n            allow_oversample=False,\n        )\n        res[\"scene_directory\"] = self.VAL_DATASET_DIR\n        res[\"loop_dataset\"] = False\n        return res\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n\n        if self.test_on_validation or self.TEST_DATASET_DIR is None:\n            if not self.test_on_validation:\n                get_logger().warning(\n                    \"`test_on_validation` is set to `True` and thus we will run evaluation on the validation set instead.\"\n                    \" Be careful as the saved metrics json and tensorboard files **will still be labeled as\"\n                    \" 'test' rather than 'valid'**.\"\n                )\n            else:\n                get_logger().warning(\n                    \"No test dataset dir detected, running test on validation set instead.\"\n                    \" Be careful as the saved metrics json and tensorboard files *will still be labeled as\"\n                    \" 'test' rather than 'valid'**.\"\n                )\n\n            return self.valid_task_sampler_args(\n                process_ind=process_ind,\n                total_processes=total_processes,\n                devices=devices,\n                seeds=seeds,\n                deterministic_cudnn=deterministic_cudnn,\n            )\n\n        else:\n            res = self._get_sampler_args_for_scene_split(\n                scenes_dir=os.path.join(self.TEST_DATASET_DIR, \"episodes\"),\n                process_ind=process_ind,\n                total_processes=total_processes,\n                devices=devices,\n                seeds=seeds,\n                deterministic_cudnn=deterministic_cudnn,\n                include_expert_sensor=False,\n                allow_oversample=False,\n            )\n            res[\"env_args\"][\"all_metadata_available\"] = False\n            res[\"rewards_config\"] = {**res[\"rewards_config\"], \"shaping_weight\": 0}\n            res[\"scene_directory\"] = self.TEST_DATASET_DIR\n            res[\"loop_dataset\"] = False\n            return res\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/__init__.py",
    "content": ""
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/beta/README.md",
    "content": "# Beta experiments\n\nThis folder contains \"beta\" experiments, e.g. training experiments meant to be used\nto test new features. These experiments may have bugs or not train well."
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/beta/__init__.py",
    "content": ""
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.py",
    "content": "import torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.grouped_action_imitation import (\n    GroupedActionImitation,\n)\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    PipelineStage,\n    TrainingPipeline,\n    LinearDecay,\n)\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    RGBSensorThor,\n    GoalObjectTypeThorSensor,\n)\nfrom allenact_plugins.ithor_plugin.ithor_sensors import TakeEndActionThorNavSensor\nfrom allenact_plugins.robothor_plugin import robothor_constants\nfrom allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import ResNetPreprocessGRUActorCriticMixin\n\n\nclass ObjectNavRoboThorResNet18GRURGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in RoboThor with RGB\n    input.\"\"\"\n\n    SENSORS = (  # type:ignore\n        RGBSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n        TakeEndActionThorNavSensor(\n            nactions=len(ObjectNavTask.class_action_names()), uuid=\"expert_group_action\"\n        ),\n    )\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            resnet_type=\"RN18\",\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n\n    def preprocessors(self):\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs):\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n, **kwargs\n        )\n\n    def training_pipeline(self, **kwargs):\n        ppo_steps = int(300000000)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 4\n        num_steps = 128\n        save_interval = 5000000\n        log_interval = 10000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n\n        action_strs = ObjectNavTask.class_action_names()\n        non_end_action_inds_set = {\n            i for i, a in enumerate(action_strs) if a != robothor_constants.END\n        }\n        end_action_ind_set = {action_strs.index(robothor_constants.END)}\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\n                \"ppo_loss\": PPO(**PPOConfig),\n                \"grouped_action_imitation\": GroupedActionImitation(\n                    nactions=len(ObjectNavTask.class_action_names()),\n                    action_groups=[non_end_action_inds_set, end_action_ind_set],\n                ),\n            },\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"ppo_loss\", \"grouped_action_imitation\"],\n                    max_stage_steps=ppo_steps,\n                )\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n\n    def tag(self):\n        return \"ObjectNav-RoboTHOR-RGB-ResNet18GRU-DDPPOAndGBC\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py",
    "content": "from typing import Union, Optional, Any\n\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage\n\n# noinspection PyUnresolvedReferences\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.base_abstractions.task import Task\nfrom allenact.embodiedai.storage.vdr_storage import (\n    DiscreteVisualDynamicsReplayStorage,\n    InverseDynamicsVDRLoss,\n)\nfrom allenact.utils.experiment_utils import Builder, TrainingSettings\nfrom allenact.utils.experiment_utils import (\n    PipelineStage,\n    LinearDecay,\n    StageComponent,\n)\nfrom allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    RGBSensorThor,\n    GoalObjectTypeThorSensor,\n)\nfrom allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ObjectNavUnfrozenResNetWithGRUActorCriticMixin,\n    update_with_auxiliary_losses,\n)\n\n\ndef compute_inv_dyn_action_logits(\n    model,\n    img0,\n    img1,\n):\n    rgb_uuid = model.visual_encoder.rgb_uuid\n    img0_enc = model.visual_encoder({rgb_uuid: img0.unsqueeze(0)}).squeeze(0)\n    img1_enc = model.visual_encoder({rgb_uuid: img1.unsqueeze(0)}).squeeze(0)\n    return model.inv_dyn_mlp(torch.cat((img0_enc, img1_enc), dim=1))\n\n\nclass LastActionSuccessSensor(\n    Sensor[\n        Union[IThorEnvironment, RoboThorEnvironment],\n        Union[Task[IThorEnvironment], Task[RoboThorEnvironment]],\n    ]\n):\n    def __init__(self, uuid: str = \"last_action_success\", **kwargs: Any):\n        super().__init__(\n            uuid=uuid, observation_space=gym.spaces.MultiBinary(1), **kwargs\n        )\n\n    def get_observation(\n        self,\n        env: Union[IThorEnvironment, RoboThorEnvironment],\n        task: Optional[Task],\n        *args: Any,\n        **kwargs: Any\n    ) -> Any:\n        return 1 * task.last_action_success\n\n\nclass VisibleObjectTypesSensor(\n    Sensor[\n        Union[IThorEnvironment, RoboThorEnvironment],\n        Union[Task[IThorEnvironment], Task[RoboThorEnvironment]],\n    ]\n):\n    def __init__(self, uuid: str = \"visible_objects\", **kwargs: Any):\n        super().__init__(\n            uuid=uuid,\n            observation_space=gym.spaces.Box(\n                low=0, high=1, shape=(len(ObjectNavRoboThorBaseConfig.TARGET_TYPES),)\n            ),\n            **kwargs\n        )\n        self.type_to_index = {\n            tt: i for i, tt in enumerate(ObjectNavRoboThorBaseConfig.TARGET_TYPES)\n        }\n\n    def get_observation(\n        self,\n        env: Union[IThorEnvironment, RoboThorEnvironment],\n        task: Optional[Task],\n        *args: Any,\n        **kwargs: Any\n    ) -> Any:\n        out = np.zeros((len(self.type_to_index),))\n        for o in env.controller.last_event.metadata[\"objects\"]:\n            if o[\"visible\"] and o[\"objectType\"] in self.type_to_index:\n                out[self.type_to_index[o[\"objectType\"]]] = 1.0\n        return out\n\n\nclass ObjectNavRoboThorVdrTmpRGBExperimentConfig(ObjectNavRoboThorBaseConfig):\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n        LastActionSuccessSensor(),\n        VisibleObjectTypesSensor(),\n    ]\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.model_creation_handler = ObjectNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"gnresnet18\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs):\n        # PPO\n        ppo_steps = int(300000000)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 4\n        num_steps = 128\n        save_interval = 5000000\n        log_interval = 10000 if torch.cuda.is_available() else 1\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n\n        auxiliary_uuids = tuple()\n        multiple_beliefs = False\n\n        named_losses = {\"ppo_loss\": (PPO(**PPOConfig), 1.0)}\n        named_losses = update_with_auxiliary_losses(\n            named_losses=named_losses,\n            auxiliary_uuids=auxiliary_uuids,\n            multiple_beliefs=multiple_beliefs,\n        )\n\n        default_ts = TrainingSettings(\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n        )\n\n        named_losses = {\n            **named_losses,\n            \"inv_dyn_vdr\": (\n                InverseDynamicsVDRLoss(\n                    compute_action_logits_fn=compute_inv_dyn_action_logits,\n                    img0_key=\"img0\",\n                    img1_key=\"img1\",\n                    action_key=\"action\",\n                ),\n                1.0,\n            ),\n        }\n\n        sorted_loss_names = list(sorted(named_losses.keys()))\n        return TrainingPipeline(\n            training_settings=default_ts,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            named_losses={k: v[0] for k, v in named_losses.items()},\n            named_storages={\n                \"onpolicy\": RolloutBlockStorage(init_size=num_steps),\n                \"discrete_vdr\": DiscreteVisualDynamicsReplayStorage(\n                    image_uuid=\"rgb_lowres\",\n                    action_success_uuid=\"last_action_success\",\n                    extra_targets=[\"visible_objects\"],\n                    nactions=6,\n                    num_to_store_per_action=200 if torch.cuda.is_available() else 10,\n                    max_to_save_per_episode=6,\n                    target_batch_size=256 if torch.cuda.is_available() else 128,\n                ),\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=sorted_loss_names,\n                    max_stage_steps=ppo_steps,\n                    loss_weights=[\n                        named_losses[loss_name][1] for loss_name in sorted_loss_names\n                    ],\n                    stage_components=[\n                        StageComponent(\n                            uuid=\"onpolicy\",\n                            storage_uuid=\"onpolicy\",\n                            loss_names=[\n                                ln for ln in sorted_loss_names if ln != \"inv_dyn_vdr\"\n                            ],\n                        ),\n                        StageComponent(\n                            uuid=\"vdr\",\n                            storage_uuid=\"discrete_vdr\",\n                            loss_names=[\"inv_dyn_vdr\"],\n                            training_settings=TrainingSettings(\n                                num_mini_batch=1,\n                                update_repeats=1,\n                            ),\n                        ),\n                    ],\n                )\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n\n    def create_model(self, **kwargs) -> nn.Module:\n        model = self.model_creation_handler.create_model(**kwargs)\n        model.inv_dyn_mlp = nn.Sequential(\n            nn.Linear(1024, 256),\n            nn.ReLU(inplace=True),\n            nn.Linear(256, 6),\n        )\n        return model\n\n    def tag(self):\n        return \"Objectnav-RoboTHOR-RGB-UnfrozenResNet18GRU-VDR\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/clip/__init__.py",
    "content": ""
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    GoalObjectTypeThorSensor,\n    RGBSensorThor,\n)\nfrom projects.objectnav_baselines.experiments.clip.mixins import (\n    ClipResNetPreprocessGRUActorCriticMixin,\n)\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import ObjectNavPPOMixin\n\n\nclass ObjectNavRoboThorClipRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in RoboThor with RGB\n    input.\"\"\"\n\n    CLIP_MODEL_TYPE = \"RN50\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,\n            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n    ]\n\n    def __init__(self, add_prev_actions: bool = False, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            clip_model_type=self.CLIP_MODEL_TYPE,\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n        self.add_prev_actions = add_prev_actions\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n,\n            add_prev_actions=self.add_prev_actions,\n            **kwargs\n        )\n\n    @classmethod\n    def tag(cls):\n        return \"ObjectNav-RoboTHOR-RGB-ClipResNet50GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    GoalObjectTypeThorSensor,\n    RGBSensorThor,\n)\nfrom projects.objectnav_baselines.experiments.clip.mixins import (\n    ClipResNetPreprocessGRUActorCriticMixin,\n)\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import ObjectNavPPOMixin\n\n\nclass ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in RoboThor with RGB\n    input.\"\"\"\n\n    CLIP_MODEL_TYPE = \"RN50x16\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,\n            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n    ]\n\n    def __init__(self, add_prev_actions: bool = False, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            clip_model_type=self.CLIP_MODEL_TYPE,\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n        self.add_prev_actions = add_prev_actions\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n,\n            add_prev_actions=self.add_prev_actions,\n            **kwargs\n        )\n\n    @classmethod\n    def tag(cls):\n        return \"ObjectNav-RoboTHOR-RGB-ClipResNet50x16GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py",
    "content": "import os\nfrom abc import ABC\nfrom typing import Optional, List, Any, Dict\n\nimport torch\n\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom projects.objectnav_baselines.experiments.objectnav_thor_base import (\n    ObjectNavThorBaseConfig,\n)\n\n\nclass ObjectNavRoboThorBaseConfig(ObjectNavThorBaseConfig, ABC):\n    \"\"\"The base config for all RoboTHOR ObjectNav experiments.\"\"\"\n\n    THOR_COMMIT_ID = \"bad5bc2b250615cb766ffb45d455c211329af17e\"\n    THOR_COMMIT_ID_FOR_RAND_MATERIALS = \"9549791ce2e7f472063a10abb1fb7664159fec23\"\n\n    AGENT_MODE = \"locobot\"\n\n    DEFAULT_NUM_TRAIN_PROCESSES = 60 if torch.cuda.is_available() else 1\n\n    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-objectnav/train\")\n    VAL_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-objectnav/val\")\n    TEST_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-objectnav/test\")\n\n    TARGET_TYPES = tuple(\n        sorted(\n            [\n                \"AlarmClock\",\n                \"Apple\",\n                \"BaseballBat\",\n                \"BasketBall\",\n                \"Bowl\",\n                \"GarbageCan\",\n                \"HousePlant\",\n                \"Laptop\",\n                \"Mug\",\n                \"SprayBottle\",\n                \"Television\",\n                \"Vase\",\n            ]\n        )\n    )\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        kwargs = super(ObjectNavRoboThorBaseConfig, self).train_task_sampler_args(\n            **prepare_locals_for_super(locals())\n        )\n        if self.randomize_train_materials:\n            kwargs[\"env_args\"][\"commit_id\"] = self.THOR_COMMIT_ID_FOR_RAND_MATERIALS\n        return kwargs\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/objectnav_robothor_depth_resnet18gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor\nfrom allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ResNetPreprocessGRUActorCriticMixin,\n    ObjectNavPPOMixin,\n)\n\n\nclass ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in RoboThor with Depth\n    input.\"\"\"\n\n    SENSORS = (\n        DepthSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n    )\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            resnet_type=\"RN18\",\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n, **kwargs\n        )\n\n    def tag(self):\n        return \"ObjectNav-RoboTHOR-Depth-ResNet18GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_dagger.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.base_abstractions.sensor import ExpertActionSensor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    GoalObjectTypeThorSensor,\n    RGBSensorThor,\n)\nfrom allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ResNetPreprocessGRUActorCriticMixin,\n    ObjectNavDAggerMixin,\n)\n\n\nclass ObjectNavRoboThorRGBDAggerExperimentConfig(ObjectNavRoboThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in RoboThor with RGB\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n        ExpertActionSensor(\n            nactions=len(ObjectNavTask.class_action_names()),\n        ),\n    ]\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.REWARD_CONFIG[\"shaping\"] = 0\n\n        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            resnet_type=\"RN18\",\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavDAggerMixin.training_pipeline(\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n, **kwargs\n        )\n\n    @classmethod\n    def tag(cls):\n        return \"ObjectNav-RoboTHOR-RGB-ResNet18GRU-DAgger\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    GoalObjectTypeThorSensor,\n    RGBSensorThor,\n)\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ResNetPreprocessGRUActorCriticMixin,\n    ObjectNavPPOMixin,\n)\n\n\nclass ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in RoboThor with RGB\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n    ]\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            resnet_type=\"RN18\",\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n, **kwargs\n        )\n\n    @classmethod\n    def tag(cls):\n        return \"ObjectNav-RoboTHOR-RGB-ResNet18GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet50gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    GoalObjectTypeThorSensor,\n    RGBSensorThor,\n)\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ResNetPreprocessGRUActorCriticMixin,\n    ObjectNavPPOMixin,\n)\n\n\nclass ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in RoboThor with RGB\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n    ]\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            resnet_type=\"RN50\",\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n, **kwargs\n        )\n\n    def tag(self):\n        return \"ObjectNav-RoboTHOR-RGB-ResNet50GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet18gru_ddppo.py",
    "content": "import torch.nn as nn\n\nfrom allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    RGBSensorThor,\n    GoalObjectTypeThorSensor,\n)\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ObjectNavUnfrozenResNetWithGRUActorCriticMixin,\n    ObjectNavPPOMixin,\n)\n\n\nclass ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in RoboThor with RGB input\n    without preprocessing by frozen ResNet (instead, a trainable ResNet).\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n    ]\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.model_creation_handler = ObjectNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"gnresnet18\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.model_creation_handler.create_model(**kwargs)\n\n    def tag(self):\n        return \"ObjectNav-RoboTHOR-RGB-UnfrozenResNet18GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet18gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    RGBSensorThor,\n    GoalObjectTypeThorSensor,\n)\nfrom allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import (\n    ObjectNavRoboThorBaseConfig,\n)\nfrom projects.objectnav_baselines.mixins import (\n    ResNetPreprocessGRUActorCriticMixin,\n    ObjectNavPPOMixin,\n)\n\n\nclass ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):\n    \"\"\"An Object Navigation experiment configuration in RoboThor with RGBD\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        DepthSensorThor(\n            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        GoalObjectTypeThorSensor(\n            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,\n        ),\n    ]\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            resnet_type=\"RN18\",\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=GoalObjectTypeThorSensor,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return ObjectNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n, **kwargs\n        )\n\n    def tag(self):\n        return \"ObjectNav-RoboTHOR-RGBD-ResNet18GRU-DDPPO\"\n"
  },
  {
    "path": "projects/objectnav_baselines/mixins.py",
    "content": "from typing import Sequence, Union, Optional, Dict, Tuple, Type\n\nimport attr\nimport gym\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\nfrom torchvision import models\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n)\nfrom allenact.algorithms.onpolicy_sync.losses.imitation import Imitation\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.embodiedai.aux_losses.losses import (\n    InverseDynamicsLoss,\n    TemporalDistanceLoss,\n    CPCA1Loss,\n    CPCA2Loss,\n    CPCA4Loss,\n    CPCA8Loss,\n    CPCA16Loss,\n    MultiAuxTaskNegEntropyLoss,\n    CPCA1SoftMaxLoss,\n    CPCA2SoftMaxLoss,\n    CPCA4SoftMaxLoss,\n    CPCA8SoftMaxLoss,\n    CPCA16SoftMaxLoss,\n)\nfrom allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor\nfrom allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    TrainingPipeline,\n    PipelineStage,\n    LinearDecay,\n)\nfrom allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor\nfrom allenact_plugins.navigation_plugin.objectnav.models import (\n    ResnetTensorNavActorCritic,\n    ObjectNavActorCritic,\n)\nfrom allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask\n\n\n@attr.s(kw_only=True)\nclass ResNetPreprocessGRUActorCriticMixin:\n    sensors: Sequence[Sensor] = attr.ib()\n    resnet_type: str = attr.ib()\n    screen_size: int = attr.ib()\n    goal_sensor_type: Type[Sensor] = attr.ib()\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        preprocessors = []\n\n        if self.resnet_type in [\"RN18\", \"RN34\"]:\n            output_shape = (512, 7, 7)\n        elif self.resnet_type in [\"RN50\", \"RN101\", \"RN152\"]:\n            output_shape = (2048, 7, 7)\n        else:\n            raise NotImplementedError(\n                f\"`RESNET_TYPE` must be one 'RNx' with x equaling one of\"\n                f\" 18, 34, 50, 101, or 152.\"\n            )\n\n        rgb_sensor = next((s for s in self.sensors if isinstance(s, RGBSensor)), None)\n        if rgb_sensor is not None:\n            preprocessors.append(\n                ResNetPreprocessor(\n                    input_height=self.screen_size,\n                    input_width=self.screen_size,\n                    output_width=output_shape[2],\n                    output_height=output_shape[1],\n                    output_dims=output_shape[0],\n                    pool=False,\n                    torchvision_resnet_model=getattr(\n                        models, f\"resnet{self.resnet_type.replace('RN', '')}\"\n                    ),\n                    input_uuids=[rgb_sensor.uuid],\n                    output_uuid=\"rgb_resnet_imagenet\",\n                )\n            )\n\n        depth_sensor = next(\n            (s for s in self.sensors if isinstance(s, DepthSensor)), None\n        )\n        if depth_sensor is not None:\n            preprocessors.append(\n                ResNetPreprocessor(\n                    input_height=self.screen_size,\n                    input_width=self.screen_size,\n                    output_width=output_shape[2],\n                    output_height=output_shape[1],\n                    output_dims=output_shape[0],\n                    pool=False,\n                    torchvision_resnet_model=getattr(\n                        models, f\"resnet{self.resnet_type.replace('RN', '')}\"\n                    ),\n                    input_uuids=[depth_sensor.uuid],\n                    output_uuid=\"depth_resnet_imagenet\",\n                )\n            )\n\n        return preprocessors\n\n    def create_model(self, **kwargs) -> nn.Module:\n        has_rgb = any(isinstance(s, RGBSensor) for s in self.sensors)\n        has_depth = any(isinstance(s, DepthSensor) for s in self.sensors)\n        goal_sensor_uuid = next(\n            (s.uuid for s in self.sensors if isinstance(s, self.goal_sensor_type)),\n            None,\n        )\n\n        return ResnetTensorNavActorCritic(\n            action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            goal_sensor_uuid=goal_sensor_uuid,\n            rgb_resnet_preprocessor_uuid=\"rgb_resnet_imagenet\" if has_rgb else None,\n            depth_resnet_preprocessor_uuid=(\n                \"depth_resnet_imagenet\" if has_depth else None\n            ),\n            hidden_size=512,\n            goal_dims=32,\n        )\n\n\n@attr.s(kw_only=True)\nclass ObjectNavUnfrozenResNetWithGRUActorCriticMixin:\n    backbone: str = attr.ib()\n    sensors: Sequence[Sensor] = attr.ib()\n    auxiliary_uuids: Sequence[str] = attr.ib()\n    add_prev_actions: bool = attr.ib()\n    multiple_beliefs: bool = attr.ib()\n    belief_fusion: Optional[str] = attr.ib()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        rgb_uuid = next(\n            (s.uuid for s in self.sensors if isinstance(s, RGBSensor)), None\n        )\n        depth_uuid = next(\n            (s.uuid for s in self.sensors if isinstance(s, DepthSensor)), None\n        )\n        goal_sensor_uuid = next(\n            (s.uuid for s in self.sensors if isinstance(s, GoalObjectTypeThorSensor))\n        )\n\n        return ObjectNavActorCritic(\n            action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            rgb_uuid=rgb_uuid,\n            depth_uuid=depth_uuid,\n            goal_sensor_uuid=goal_sensor_uuid,\n            hidden_size=(\n                192 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512\n            ),\n            backbone=self.backbone,\n            resnet_baseplanes=32,\n            object_type_embedding_dim=32,\n            num_rnn_layers=1,\n            rnn_type=\"GRU\",\n            add_prev_actions=self.add_prev_actions,\n            action_embed_size=6,\n            auxiliary_uuids=self.auxiliary_uuids,\n            multiple_beliefs=self.multiple_beliefs,\n            beliefs_fusion=self.belief_fusion,\n        )\n\n\nclass ObjectNavDAggerMixin:\n    @staticmethod\n    def training_pipeline(\n        advance_scene_rollout_period: Optional[int] = None,\n    ) -> TrainingPipeline:\n        training_steps = int(300000000)\n        tf_steps = int(5e6)\n        anneal_steps = int(5e6)\n        il_no_tf_steps = training_steps - tf_steps - anneal_steps\n        assert il_no_tf_steps > 0\n\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 4\n        num_steps = 128\n        save_interval = 5000000\n        log_interval = 10000 if torch.cuda.is_available() else 1\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\n                \"imitation_loss\": Imitation(),\n            },\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=advance_scene_rollout_period,\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    max_stage_steps=tf_steps,\n                    teacher_forcing=LinearDecay(\n                        startp=1.0,\n                        endp=1.0,\n                        steps=tf_steps,\n                    ),\n                ),\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    max_stage_steps=anneal_steps + il_no_tf_steps,\n                    teacher_forcing=LinearDecay(\n                        startp=1.0,\n                        endp=0.0,\n                        steps=anneal_steps,\n                    ),\n                ),\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR,\n                {\"lr_lambda\": LinearDecay(steps=training_steps)},\n            ),\n        )\n\n\ndef update_with_auxiliary_losses(\n    named_losses: Dict[str, Tuple[AbstractActorCriticLoss, float]],\n    auxiliary_uuids: Sequence[str],\n    multiple_beliefs: bool,\n) -> Dict[str, Tuple[AbstractActorCriticLoss, float]]:\n    # auxliary losses\n    aux_loss_total_weight = 2.0\n\n    # Total losses\n    total_aux_losses: Dict[str, Tuple[AbstractActorCriticLoss, float]] = {\n        InverseDynamicsLoss.UUID: (\n            InverseDynamicsLoss(\n                subsample_rate=0.2,\n                subsample_min_num=10,  # TODO: test its effects\n            ),\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        TemporalDistanceLoss.UUID: (\n            TemporalDistanceLoss(\n                num_pairs=8,\n                epsiode_len_min=5,  # TODO: test its effects\n            ),\n            0.2 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA1Loss.UUID: (\n            CPCA1Loss(\n                subsample_rate=0.2,\n            ),  # TODO: test its effects\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA2Loss.UUID: (\n            CPCA2Loss(\n                subsample_rate=0.2,\n            ),  # TODO: test its effects\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA4Loss.UUID: (\n            CPCA4Loss(\n                subsample_rate=0.2,\n            ),  # TODO: test its effects\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA8Loss.UUID: (\n            CPCA8Loss(\n                subsample_rate=0.2,\n            ),  # TODO: test its effects\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA16Loss.UUID: (\n            CPCA16Loss(\n                subsample_rate=0.2,\n            ),  # TODO: test its effects\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA1SoftMaxLoss.UUID: (\n            CPCA1SoftMaxLoss(\n                subsample_rate=1.0,\n            ),\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA2SoftMaxLoss.UUID: (\n            CPCA2SoftMaxLoss(\n                subsample_rate=1.0,\n            ),\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA4SoftMaxLoss.UUID: (\n            CPCA4SoftMaxLoss(\n                subsample_rate=1.0,\n            ),\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA8SoftMaxLoss.UUID: (\n            CPCA8SoftMaxLoss(\n                subsample_rate=1.0,\n            ),\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n        CPCA16SoftMaxLoss.UUID: (\n            CPCA16SoftMaxLoss(\n                subsample_rate=1.0,\n            ),\n            0.05 * aux_loss_total_weight,  # should times 2\n        ),\n    }\n    named_losses.update({uuid: total_aux_losses[uuid] for uuid in auxiliary_uuids})\n\n    if multiple_beliefs:  # add weight entropy loss automatically\n        named_losses[MultiAuxTaskNegEntropyLoss.UUID] = (\n            MultiAuxTaskNegEntropyLoss(auxiliary_uuids),\n            0.01,\n        )\n\n    return named_losses\n\n\nclass ObjectNavPPOMixin:\n    @staticmethod\n    def training_pipeline(\n        auxiliary_uuids: Sequence[str],\n        multiple_beliefs: bool,\n        normalize_advantage: bool = True,\n        advance_scene_rollout_period: Optional[int] = None,\n        lr=3e-4,\n        num_mini_batch=1,\n        update_repeats=4,\n        num_steps=128,\n        save_interval=5000000,\n        log_interval=10000 if torch.cuda.is_available() else 1,\n        gamma=0.99,\n        use_gae=True,\n        gae_lambda=0.95,\n        max_grad_norm=0.5,\n        anneal_lr: bool = True,\n        extra_losses: Optional[Dict[str, Tuple[AbstractActorCriticLoss, float]]] = None,\n    ) -> TrainingPipeline:\n        ppo_steps = int(300000000)\n\n        named_losses = {\n            \"ppo_loss\": (\n                PPO(**PPOConfig, normalize_advantage=normalize_advantage),\n                1.0,\n            ),\n            **({} if extra_losses is None else extra_losses),\n        }\n        named_losses = update_with_auxiliary_losses(\n            named_losses=named_losses,\n            auxiliary_uuids=auxiliary_uuids,\n            multiple_beliefs=multiple_beliefs,\n        )\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={key: val[0] for key, val in named_losses.items()},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=advance_scene_rollout_period,\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=list(named_losses.keys()),\n                    max_stage_steps=ppo_steps,\n                    loss_weights=[val[1] for val in named_losses.values()],\n                )\n            ],\n            lr_scheduler_builder=(\n                Builder(LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)})\n                if anneal_lr\n                else None\n            ),\n        )\n"
  },
  {
    "path": "projects/pointnav_baselines/README.md",
    "content": "# Baseline models for the Point Navigation task in the Habitat, RoboTHOR and iTHOR environments\n\nThis project contains the code for training baseline models on the PointNav task. In this setting the agent\nspawns at a location in an environment and is tasked to move to another location. The agent is given a \"compass\"\nthat tells it the distance and bearing to the target position at every frame. Once the agent is confident that\nit has reached the end it executes the `END` action which terminates the episode. If the agent is within a set\ndistance to the target (in our case 0.2 meters) the agent succeeded, else it failed.\n\nProvided are experiment configs for training a simple convolutional model with\nan GRU using `RGB`, `Depth` or `RGBD` as inputs in [Habitat](https://github.com/facebookresearch/habitat-sim), \n[RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/).\n\nThe experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf)\nReinforcement Learning Algorithm.\n\nTo train an experiment run the following command from the `allenact` root directory:\n\n```bash\npython main.py -o <PATH_TO_OUTPUT> -c -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> <EXPERIMENT_NAME>\n```\n\nWhere `<PATH_TO_OUTPUT>` is the path of the directory where we want the model weights\nand logs to be stored, `<BASE_DIRECTORY_OF_YOUR_EXPERIMENT>` is the directory where our\nexperiment file is located and `<EXPERIMENT_NAME>` is the name of the python module containing\nthe experiment. An example usage of this command would be:\n\n```bash\npython main.py -o storage/pointnav-robothor-depth -b projects/pointnav_baselines/experiments/robothor/ pointnav_robothor_depth_simpleconvgru_ddppo\n```\n\nThis trains a simple convolutional neural network with a GRU using Depth input on the\nPointNav task in the RoboTHOR environment and stores the model weights and logs\nto `storage/pointnav-robothor-rgb`.\n"
  },
  {
    "path": "projects/pointnav_baselines/__init__.py",
    "content": ""
  },
  {
    "path": "projects/pointnav_baselines/experiments/__init__.py",
    "content": ""
  },
  {
    "path": "projects/pointnav_baselines/experiments/habitat/__init__.py",
    "content": ""
  },
  {
    "path": "projects/pointnav_baselines/experiments/habitat/clip/__init__.py",
    "content": ""
  },
  {
    "path": "projects/pointnav_baselines/experiments/habitat/clip/pointnav_habitat_rgb_clipresnet50gru_ddppo.py",
    "content": "from typing import Sequence, Union\n\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.preprocessor import Preprocessor\nfrom allenact.utils.experiment_utils import Builder, TrainingPipeline\nfrom allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor\nfrom allenact_plugins.habitat_plugin.habitat_sensors import (\n    RGBSensorHabitat,\n    TargetCoordinatesSensorHabitat,\n)\nfrom projects.objectnav_baselines.experiments.clip.mixins import (\n    ClipResNetPreprocessGRUActorCriticMixin,\n)\nfrom projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (\n    PointNavHabitatBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import PointNavPPOMixin\n\n\nclass PointNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig(PointNavHabitatBaseConfig):\n    \"\"\"An Point Navigation experiment configuration in Habitat with Depth\n    input.\"\"\"\n\n    CLIP_MODEL_TYPE = \"RN50\"\n\n    SENSORS = [\n        RGBSensorHabitat(\n            height=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            width=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,\n            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,\n        ),\n        TargetCoordinatesSensorHabitat(coordinate_dims=2),\n    ]\n\n    def __init__(self, add_prev_actions: bool = False, **kwargs):\n        super().__init__(**kwargs)\n\n        self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(\n            sensors=self.SENSORS,\n            clip_model_type=self.CLIP_MODEL_TYPE,\n            screen_size=self.SCREEN_SIZE,\n            goal_sensor_type=TargetCoordinatesSensorHabitat,\n        )\n        self.add_prev_actions = add_prev_actions\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=False,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return self.preprocessing_and_model.preprocessors()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        return self.preprocessing_and_model.create_model(\n            num_actions=self.ACTION_SPACE.n,\n            add_prev_actions=self.add_prev_actions,\n            **kwargs,\n        )\n\n    @classmethod\n    def tag(cls):\n        return \"PointNav-Habitat-RGB-ClipResNet50GRU-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/habitat/pointnav_habitat_base.py",
    "content": "import os\nfrom abc import ABC\nfrom typing import Dict, Any, List, Optional, Sequence, Union\n\nimport gym\nimport torch\n\n# noinspection PyUnresolvedReferences\nimport habitat\nfrom allenact.base_abstractions.experiment_config import MachineParams\nfrom allenact.base_abstractions.preprocessor import (\n    SensorPreprocessorGraph,\n    Preprocessor,\n)\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor\nfrom allenact.utils.experiment_utils import evenly_distribute_count_into_bins, Builder\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.habitat_plugin.habitat_constants import (\n    HABITAT_DATASETS_DIR,\n    HABITAT_CONFIGS_DIR,\n    HABITAT_SCENE_DATASETS_DIR,\n)\nfrom allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler\nfrom allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask\nfrom allenact_plugins.habitat_plugin.habitat_utils import (\n    get_habitat_config,\n    construct_env_configs,\n)\nfrom projects.pointnav_baselines.experiments.pointnav_base import PointNavBaseConfig\n\n\ndef create_pointnav_config(\n    config_yaml_path: str,\n    mode: str,\n    scenes_path: str,\n    simulator_gpu_ids: Sequence[int],\n    distance_to_goal: float,\n    rotation_degrees: float,\n    step_size: float,\n    max_steps: int,\n    num_processes: int,\n    camera_width: int,\n    camera_height: int,\n    using_rgb: bool,\n    using_depth: bool,\n    training: bool,\n    num_episode_sample: int,\n) -> habitat.Config:\n    config = get_habitat_config(config_yaml_path)\n\n    config.defrost()\n    config.NUM_PROCESSES = num_processes\n    config.SIMULATOR_GPU_IDS = simulator_gpu_ids\n    config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR\n\n    config.DATASET.DATA_PATH = scenes_path\n\n    config.SIMULATOR.AGENT_0.SENSORS = []\n    if using_rgb:\n        config.SIMULATOR.AGENT_0.SENSORS.append(\"RGB_SENSOR\")\n    if using_depth:\n        config.SIMULATOR.AGENT_0.SENSORS.append(\"DEPTH_SENSOR\")\n\n    config.SIMULATOR.RGB_SENSOR.WIDTH = camera_width\n    config.SIMULATOR.RGB_SENSOR.HEIGHT = camera_height\n    config.SIMULATOR.DEPTH_SENSOR.WIDTH = camera_width\n    config.SIMULATOR.DEPTH_SENSOR.HEIGHT = camera_height\n    config.SIMULATOR.TURN_ANGLE = rotation_degrees\n    config.SIMULATOR.FORWARD_STEP_SIZE = step_size\n    config.ENVIRONMENT.MAX_EPISODE_STEPS = max_steps\n\n    config.TASK.TYPE = \"Nav-v0\"\n    config.TASK.SUCCESS_DISTANCE = distance_to_goal\n    config.TASK.SENSORS = [\"POINTGOAL_WITH_GPS_COMPASS_SENSOR\"]\n    config.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = \"POLAR\"\n    config.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2\n    config.TASK.GOAL_SENSOR_UUID = \"pointgoal_with_gps_compass\"\n    config.TASK.MEASUREMENTS = [\"DISTANCE_TO_GOAL\", \"SUCCESS\", \"SPL\"]\n    config.TASK.SPL.TYPE = \"SPL\"\n    config.TASK.SPL.SUCCESS_DISTANCE = distance_to_goal\n    config.TASK.SUCCESS.SUCCESS_DISTANCE = distance_to_goal\n\n    if not training:\n        config.SEED = 0\n        config.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False\n\n    if num_episode_sample > 0:\n        config.ENVIRONMENT.ITERATOR_OPTIONS.NUM_EPISODE_SAMPLE = num_episode_sample\n\n    config.MODE = mode\n\n    config.freeze()\n\n    return config\n\n\nclass PointNavHabitatBaseConfig(PointNavBaseConfig, ABC):\n    \"\"\"The base config for all Habitat PointNav experiments.\"\"\"\n\n    # selected auxiliary uuids\n    ## if comment all the keys, then it's vanilla DD-PPO\n    AUXILIARY_UUIDS = [\n        # InverseDynamicsLoss.UUID,\n        # TemporalDistanceLoss.UUID,\n        # CPCA1Loss.UUID,\n        # CPCA4Loss.UUID,\n        # CPCA8Loss.UUID,\n        # CPCA16Loss.UUID,\n    ]\n    ADD_PREV_ACTIONS = False\n    MULTIPLE_BELIEFS = False\n    BELIEF_FUSION = (  # choose one\n        None\n        # AttentiveFusion\n        # AverageFusion\n        # SoftmaxFusion\n    )\n\n    FAILED_END_REWARD = -1.0\n\n    TASK_DATA_DIR_TEMPLATE = os.path.join(\n        HABITAT_DATASETS_DIR, \"pointnav/gibson/v1/{}/{}.json.gz\"\n    )\n    BASE_CONFIG_YAML_PATH = os.path.join(\n        HABITAT_CONFIGS_DIR, \"tasks/pointnav_gibson.yaml\"\n    )\n\n    ACTION_SPACE = gym.spaces.Discrete(len(PointNavTask.class_action_names()))\n\n    DEFAULT_NUM_TRAIN_PROCESSES = (\n        5 * torch.cuda.device_count() if torch.cuda.is_available() else 1\n    )\n    DEFAULT_NUM_TEST_PROCESSES = 10\n\n    DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))\n    DEFAULT_VALID_GPU_IDS = [torch.cuda.device_count() - 1]\n    DEFAULT_TEST_GPU_IDS = [torch.cuda.device_count() - 1]\n\n    def __init__(\n        self,\n        debug: bool = False,\n        num_train_processes: Optional[int] = None,\n        num_test_processes: Optional[int] = None,\n        test_on_validation: bool = False,\n        run_valid: bool = True,\n        train_gpu_ids: Optional[Sequence[int]] = None,\n        val_gpu_ids: Optional[Sequence[int]] = None,\n        test_gpu_ids: Optional[Sequence[int]] = None,\n        **kwargs,\n    ):\n        super().__init__(**kwargs)\n\n        def v_or_default(v, default):\n            return v if v is not None else default\n\n        self.num_train_processes = v_or_default(\n            num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES\n        )\n        self.num_test_processes = v_or_default(\n            num_test_processes, (10 if torch.cuda.is_available() else 1)\n        )\n        self.test_on_validation = test_on_validation\n        self.run_valid = run_valid\n        self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS)\n        self.val_gpu_ids = v_or_default(\n            val_gpu_ids, self.DEFAULT_VALID_GPU_IDS if run_valid else []\n        )\n        self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)\n\n        def create_config(\n            mode: str,\n            scenes_path: str,\n            num_processes: int,\n            simulator_gpu_ids: Sequence[int],\n            training: bool = True,\n            num_episode_sample: int = -1,\n        ):\n            return create_pointnav_config(\n                config_yaml_path=self.BASE_CONFIG_YAML_PATH,\n                mode=mode,\n                scenes_path=scenes_path,\n                simulator_gpu_ids=simulator_gpu_ids,\n                distance_to_goal=self.DISTANCE_TO_GOAL,\n                rotation_degrees=self.ROTATION_DEGREES,\n                step_size=self.STEP_SIZE,\n                max_steps=self.MAX_STEPS,\n                num_processes=num_processes,\n                camera_width=self.CAMERA_WIDTH,\n                camera_height=self.CAMERA_HEIGHT,\n                using_rgb=any(isinstance(s, RGBSensor) for s in self.SENSORS),\n                using_depth=any(isinstance(s, DepthSensor) for s in self.SENSORS),\n                training=training,\n                num_episode_sample=num_episode_sample,\n            )\n\n        self.TRAIN_CONFIG = create_config(\n            mode=\"train\",\n            scenes_path=self.train_scenes_path(),\n            num_processes=self.num_train_processes,\n            simulator_gpu_ids=self.train_gpu_ids,\n            training=True,\n        )\n        self.VALID_CONFIG = create_config(\n            mode=\"validate\",\n            scenes_path=self.valid_scenes_path(),\n            num_processes=1,\n            simulator_gpu_ids=self.val_gpu_ids,\n            training=False,\n            num_episode_sample=200,\n        )\n        self.TEST_CONFIG = create_config(\n            mode=\"validate\",\n            scenes_path=self.test_scenes_path(),\n            num_processes=self.num_test_processes,\n            simulator_gpu_ids=self.test_gpu_ids,\n            training=False,\n        )\n\n        self.TRAIN_CONFIGS_PER_PROCESS = construct_env_configs(\n            self.TRAIN_CONFIG, allow_scene_repeat=True\n        )\n\n        if debug:\n            get_logger().warning(\"IN DEBUG MODE, WILL ONLY USE `Adrian` SCENE!!!\")\n            for config in self.TRAIN_CONFIGS_PER_PROCESS:\n                config.defrost()\n                config.DATASET.CONTENT_SCENES = [\"Adrian\"]\n                config.freeze()\n\n        self.TEST_CONFIG_PER_PROCESS = construct_env_configs(\n            self.TEST_CONFIG, allow_scene_repeat=False\n        )\n\n    def train_scenes_path(self):\n        return self.TASK_DATA_DIR_TEMPLATE.format(*([\"train\"] * 2))\n\n    def valid_scenes_path(self):\n        return self.TASK_DATA_DIR_TEMPLATE.format(*([\"val\"] * 2))\n\n    def test_scenes_path(self):\n        get_logger().warning(\"Running tests on the validation set!\")\n        return self.TASK_DATA_DIR_TEMPLATE.format(*([\"val\"] * 2))\n        # return self.TASK_DATA_DIR_TEMPLATE.format(*([\"test\"] * 2))\n\n    @classmethod\n    def tag(cls):\n        return \"PointNav\"\n\n    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:\n        return tuple()\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        has_gpus = torch.cuda.is_available()\n        if not has_gpus:\n            gpu_ids = []\n            nprocesses = 1\n        elif mode == \"train\":\n            gpu_ids = self.train_gpu_ids\n            nprocesses = self.num_train_processes\n        elif mode == \"valid\":\n            gpu_ids = self.val_gpu_ids\n            nprocesses = 1 if self.run_valid else 0\n        elif mode == \"test\":\n            gpu_ids = self.test_gpu_ids\n            nprocesses = self.num_test_processes\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        if has_gpus:\n            nprocesses = evenly_distribute_count_into_bins(nprocesses, len(gpu_ids))\n\n        sensor_preprocessor_graph = (\n            SensorPreprocessorGraph(\n                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,\n                preprocessors=self.preprocessors(),\n            )\n            if mode == \"train\"\n            or (\n                (isinstance(nprocesses, int) and nprocesses > 0)\n                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)\n            )\n            else None\n        )\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=gpu_ids,\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return PointNavTaskSampler(\n            **{\"failed_end_reward\": cls.FAILED_END_REWARD, **kwargs}  # type: ignore\n        )\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind]\n        return {\n            \"env_config\": config,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": self.ACTION_SPACE,\n            \"distance_to_goal\": self.DISTANCE_TO_GOAL,\n        }\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        if total_processes != 1:\n            raise NotImplementedError(\n                \"In validation, `total_processes` must equal 1 for habitat tasks\"\n            )\n        return {\n            \"env_config\": self.VALID_CONFIG,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            \"distance_to_goal\": self.DISTANCE_TO_GOAL,\n        }\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        config = self.TEST_CONFIG_PER_PROCESS[process_ind]\n        return {\n            \"env_config\": config,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            \"distance_to_goal\": self.DISTANCE_TO_GOAL,\n        }\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.py",
    "content": "from allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.habitat_plugin.habitat_sensors import (\n    DepthSensorHabitat,\n    TargetCoordinatesSensorHabitat,\n)\nfrom projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (\n    PointNavHabitatBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import (\n    PointNavPPOMixin,\n    PointNavUnfrozenResNetWithGRUActorCriticMixin,\n)\n\n\nclass PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig(\n    PointNavHabitatBaseConfig,\n):\n    \"\"\"An Point Navigation experiment configuration in Habitat with Depth\n    input.\"\"\"\n\n    SENSORS = [\n        DepthSensorHabitat(\n            height=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            width=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n        ),\n        TargetCoordinatesSensorHabitat(coordinate_dims=2),\n    ]\n\n    def __init__(self):\n        super().__init__()\n\n        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"simple_cnn\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=True,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs):\n        return self.model_creation_handler.create_model(**kwargs)\n\n    def tag(self):\n        return \"PointNav-Habitat-Depth-SimpleConv-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.py",
    "content": "from allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.habitat_plugin.habitat_sensors import RGBSensorHabitat\nfrom allenact_plugins.habitat_plugin.habitat_sensors import (\n    TargetCoordinatesSensorHabitat,\n)\nfrom projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (\n    PointNavHabitatBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import PointNavPPOMixin\nfrom projects.pointnav_baselines.mixins import (\n    PointNavUnfrozenResNetWithGRUActorCriticMixin,\n)\n\n\nclass PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig(\n    PointNavHabitatBaseConfig\n):\n    \"\"\"An Point Navigation experiment configuration in Habitat with Depth\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorHabitat(\n            height=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            width=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n        ),\n        TargetCoordinatesSensorHabitat(coordinate_dims=2),\n    ]\n\n    def __init__(self):\n        super().__init__()\n\n        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"simple_cnn\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=True,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs):\n        return self.model_creation_handler.create_model(**kwargs)\n\n    @classmethod\n    def tag(cls):\n        return \"PointNav-Habitat-RGB-SimpleConv-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.py",
    "content": "from allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.habitat_plugin.habitat_sensors import DepthSensorHabitat\nfrom allenact_plugins.habitat_plugin.habitat_sensors import RGBSensorHabitat\nfrom allenact_plugins.habitat_plugin.habitat_sensors import (\n    TargetCoordinatesSensorHabitat,\n)\nfrom projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import (\n    PointNavHabitatBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import PointNavPPOMixin\nfrom projects.pointnav_baselines.mixins import (\n    PointNavUnfrozenResNetWithGRUActorCriticMixin,\n)\n\n\nclass PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig(\n    PointNavHabitatBaseConfig\n):\n    \"\"\"An Point Navigation experiment configuration in Habitat with RGBD\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorHabitat(\n            height=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            width=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n        ),\n        DepthSensorHabitat(\n            height=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            width=PointNavHabitatBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n        ),\n        TargetCoordinatesSensorHabitat(coordinate_dims=2),\n    ]\n\n    def __init__(self):\n        super().__init__()\n\n        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"simple_cnn\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=True,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs):\n        return self.model_creation_handler.create_model(**kwargs)\n\n    def tag(self):\n        return \"PointNav-Habitat-RGBD-SimpleConv-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/ithor/__init__.py",
    "content": ""
  },
  {
    "path": "projects/pointnav_baselines/experiments/ithor/pointnav_ithor_base.py",
    "content": "import os\nfrom abc import ABC\n\nfrom projects.pointnav_baselines.experiments.pointnav_thor_base import (\n    PointNavThorBaseConfig,\n)\n\n\nclass PointNaviThorBaseConfig(PointNavThorBaseConfig, ABC):\n    \"\"\"The base config for all iTHOR PointNav experiments.\"\"\"\n\n    NUM_PROCESSES = 40\n\n    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/ithor-pointnav/train\")\n    VAL_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/ithor-pointnav/val\")\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.py",
    "content": "from allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.robothor_plugin.robothor_sensors import (\n    DepthSensorThor,\n    GPSCompassSensorRoboThor,\n)\nfrom projects.pointnav_baselines.mixins import (\n    PointNavUnfrozenResNetWithGRUActorCriticMixin,\n)\nfrom projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import (\n    PointNaviThorBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import PointNavPPOMixin\n\n\nclass PointNaviThorDepthPPOExperimentConfig(PointNaviThorBaseConfig):\n    \"\"\"An Point Navigation experiment configuration in iThor with Depth\n    input.\"\"\"\n\n    SENSORS = [\n        DepthSensorThor(\n            height=PointNaviThorBaseConfig.SCREEN_SIZE,\n            width=PointNaviThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        GPSCompassSensorRoboThor(),\n    ]\n\n    def __init__(self):\n        super().__init__()\n\n        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"simple_cnn\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=True,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs):\n        return self.model_creation_handler.create_model(**kwargs)\n\n    def tag(self):\n        return \"PointNav-iTHOR-Depth-SimpleConv-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.py",
    "content": "from allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor\nfrom allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor\nfrom projects.pointnav_baselines.mixins import (\n    PointNavUnfrozenResNetWithGRUActorCriticMixin,\n)\nfrom projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import (\n    PointNaviThorBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import PointNavPPOMixin\n\n\nclass PointNaviThorRGBPPOExperimentConfig(PointNaviThorBaseConfig):\n    \"\"\"An Point Navigation experiment configuration in iThor with RGB input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=PointNaviThorBaseConfig.SCREEN_SIZE,\n            width=PointNaviThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GPSCompassSensorRoboThor(),\n    ]\n\n    def __init__(self):\n        super().__init__()\n\n        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"simple_cnn\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=True,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs):\n        return self.model_creation_handler.create_model(**kwargs)\n\n    def tag(self):\n        return \"PointNav-iTHOR-RGB-SimpleConv-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.py",
    "content": "from allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor\nfrom allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor\nfrom allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor\nfrom projects.pointnav_baselines.mixins import (\n    PointNavUnfrozenResNetWithGRUActorCriticMixin,\n)\nfrom projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import (\n    PointNaviThorBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import PointNavPPOMixin\n\n\nclass PointNaviThorRGBDPPOExperimentConfig(PointNaviThorBaseConfig):\n    \"\"\"An Point Navigation experiment configuration in iThor with RGBD\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=PointNaviThorBaseConfig.SCREEN_SIZE,\n            width=PointNaviThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        DepthSensorThor(\n            height=PointNaviThorBaseConfig.SCREEN_SIZE,\n            width=PointNaviThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        GPSCompassSensorRoboThor(),\n    ]\n\n    def __init__(self):\n        super().__init__()\n\n        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"simple_cnn\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=True,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs):\n        return self.model_creation_handler.create_model(**kwargs)\n\n    def tag(self):\n        return \"PointNav-iTHOR-RGBD-SimpleConv-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/pointnav_base.py",
    "content": "from abc import ABC\nfrom typing import Optional, Sequence\n\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig\nfrom allenact.base_abstractions.sensor import Sensor\n\n\nclass PointNavBaseConfig(ExperimentConfig, ABC):\n    \"\"\"An Object Navigation experiment configuration in iThor.\"\"\"\n\n    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None\n    SENSORS: Optional[Sequence[Sensor]] = None\n\n    STEP_SIZE = 0.25\n    ROTATION_DEGREES = 30.0\n    DISTANCE_TO_GOAL = 0.2\n    STOCHASTIC = True\n\n    CAMERA_WIDTH = 400\n    CAMERA_HEIGHT = 300\n    SCREEN_SIZE = 224\n    MAX_STEPS = 500\n\n    def __init__(self):\n        self.REWARD_CONFIG = {\n            \"step_penalty\": -0.01,\n            \"goal_success_reward\": 10.0,\n            \"failed_stop_reward\": 0.0,\n            \"reached_max_steps_reward\": 0.0,\n            \"shaping_weight\": 1.0,\n        }\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/pointnav_thor_base.py",
    "content": "import glob\nimport os\nimport platform\nfrom abc import ABC\nfrom math import ceil\nfrom typing import Dict, Any, List, Optional, Sequence\n\nimport ai2thor\nimport gym\nimport numpy as np\nimport torch\nfrom packaging import version\n\nfrom allenact.base_abstractions.experiment_config import MachineParams\nfrom allenact.base_abstractions.preprocessor import SensorPreprocessorGraph\nfrom allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import evenly_distribute_count_into_bins\nfrom allenact.utils.system import get_logger\nfrom allenact_plugins.ithor_plugin.ithor_util import get_open_x_displays\nfrom allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor\nfrom allenact_plugins.robothor_plugin.robothor_task_samplers import (\n    PointNavDatasetTaskSampler,\n)\nfrom allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask\nfrom projects.pointnav_baselines.experiments.pointnav_base import PointNavBaseConfig\n\nif ai2thor.__version__ not in [\"0.0.1\", None] and version.parse(\n    ai2thor.__version__\n) < version.parse(\"2.7.2\"):\n    raise ImportError(\n        \"To run the PointNav baseline experiments you must use\"\n        \" ai2thor version 2.7.1 or higher.\"\n    )\n\n\nclass PointNavThorBaseConfig(PointNavBaseConfig, ABC):\n    \"\"\"The base config for all iTHOR PointNav experiments.\"\"\"\n\n    NUM_PROCESSES: Optional[int] = None\n    TRAIN_GPU_IDS = list(range(torch.cuda.device_count()))\n    VALID_GPU_IDS = [torch.cuda.device_count() - 1]\n    TEST_GPU_IDS = [torch.cuda.device_count() - 1]\n\n    TRAIN_DATASET_DIR: Optional[str] = None\n    VAL_DATASET_DIR: Optional[str] = None\n\n    TARGET_TYPES: Optional[Sequence[str]] = None\n\n    ACTION_SPACE = gym.spaces.Discrete(len(PointNavTask.class_action_names()))\n\n    def __init__(self):\n        super().__init__()\n        self.ENV_ARGS = dict(\n            width=self.CAMERA_WIDTH,\n            height=self.CAMERA_HEIGHT,\n            continuousMode=True,\n            applyActionNoise=self.STOCHASTIC,\n            rotateStepDegrees=self.ROTATION_DEGREES,\n            gridSize=self.STEP_SIZE,\n            snapToGrid=False,\n            agentMode=\"bot\",\n            include_private_scenes=False,\n            renderDepthImage=any(isinstance(s, DepthSensorThor) for s in self.SENSORS),\n        )\n\n    def preprocessors(self):\n        return tuple()\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        sampler_devices: Sequence[int] = []\n        if mode == \"train\":\n            workers_per_device = 1\n            gpu_ids = (\n                []\n                if not torch.cuda.is_available()\n                else self.TRAIN_GPU_IDS * workers_per_device\n            )\n            nprocesses = (\n                1\n                if not torch.cuda.is_available()\n                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))\n            )\n            sampler_devices = self.TRAIN_GPU_IDS\n        elif mode == \"valid\":\n            nprocesses = 1 if torch.cuda.is_available() else 0\n            gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS\n        elif mode == \"test\":\n            nprocesses = 10\n            gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        sensor_preprocessor_graph = (\n            SensorPreprocessorGraph(\n                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,\n                preprocessors=self.preprocessors(),\n            )\n            if mode == \"train\"\n            or (\n                (isinstance(nprocesses, int) and nprocesses > 0)\n                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)\n            )\n            else None\n        )\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=gpu_ids,\n            sampler_devices=(\n                sampler_devices if mode == \"train\" else gpu_ids\n            ),  # ignored with > 1 gpu_ids\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return PointNavDatasetTaskSampler(**kwargs)\n\n    @staticmethod\n    def _partition_inds(n: int, num_parts: int):\n        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(\n            np.int32\n        )\n\n    def _get_sampler_args_for_scene_split(\n        self,\n        scenes_dir: str,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]],\n        seeds: Optional[List[int]],\n        deterministic_cudnn: bool,\n        include_expert_sensor: bool = True,\n    ) -> Dict[str, Any]:\n        path = os.path.join(scenes_dir, \"*.json.gz\")\n        scenes = [scene.split(\"/\")[-1].split(\".\")[0] for scene in glob.glob(path)]\n        if len(scenes) == 0:\n            raise RuntimeError(\n                (\n                    \"Could find no scene dataset information in directory {}.\"\n                    \" Are you sure you've downloaded them? \"\n                    \" If not, see https://allenact.org/installation/download-datasets/ information\"\n                    \" on how this can be done.\"\n                ).format(scenes_dir)\n            )\n\n        oversample_warning = (\n            f\"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes}).\"\n            \" You can avoid this by setting a number of workers divisible by the number of scenes\"\n        )\n        if total_processes > len(scenes):  # oversample some scenes -> bias\n            if total_processes % len(scenes) != 0:\n                get_logger().warning(oversample_warning)\n            scenes = scenes * int(ceil(total_processes / len(scenes)))\n            scenes = scenes[: total_processes * (len(scenes) // total_processes)]\n        elif len(scenes) % total_processes != 0:\n            get_logger().warning(oversample_warning)\n\n        inds = self._partition_inds(len(scenes), total_processes)\n\n        x_display: Optional[str] = None\n        if platform.system() == \"Linux\":\n            x_displays = get_open_x_displays(throw_error_if_empty=True)\n\n            if len([d for d in devices if d != torch.device(\"cpu\")]) > len(x_displays):\n                get_logger().warning(\n                    f\"More GPU devices found than X-displays (devices: `{x_displays}`, x_displays: `{x_displays}`).\"\n                    f\" This is not necessarily a bad thing but may mean that you're not using GPU memory as\"\n                    f\" efficiently as possible. Consider following the instructions here:\"\n                    f\" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin\"\n                    f\" describing how to start an X-display on every GPU.\"\n                )\n            x_display = x_displays[process_ind % len(x_displays)]\n\n        return {\n            \"scenes\": scenes[inds[process_ind] : inds[process_ind + 1]],\n            \"object_types\": self.TARGET_TYPES,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": [\n                s\n                for s in self.SENSORS\n                if (include_expert_sensor or not isinstance(s, ExpertActionSensor))\n            ],\n            \"action_space\": self.ACTION_SPACE,\n            \"seed\": seeds[process_ind] if seeds is not None else None,\n            \"deterministic_cudnn\": deterministic_cudnn,\n            \"rewards_config\": self.REWARD_CONFIG,\n            \"env_args\": {\n                **self.ENV_ARGS,\n                \"x_display\": x_display,\n            },\n        }\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.TRAIN_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            devices=devices,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.TRAIN_DATASET_DIR\n        res[\"loop_dataset\"] = True\n        res[\"allow_flipping\"] = True\n        return res\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.VAL_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            devices=devices,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n            include_expert_sensor=False,\n        )\n        res[\"scene_directory\"] = self.VAL_DATASET_DIR\n        res[\"loop_dataset\"] = False\n        return res\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self.valid_task_sampler_args(\n            process_ind=process_ind,\n            total_processes=total_processes,\n            devices=devices,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/robothor/__init__.py",
    "content": ""
  },
  {
    "path": "projects/pointnav_baselines/experiments/robothor/pointnav_robothor_base.py",
    "content": "import os\nfrom abc import ABC\n\nfrom projects.pointnav_baselines.experiments.pointnav_thor_base import (\n    PointNavThorBaseConfig,\n)\n\n\nclass PointNavRoboThorBaseConfig(PointNavThorBaseConfig, ABC):\n    \"\"\"The base config for all iTHOR PointNav experiments.\"\"\"\n\n    NUM_PROCESSES = 60\n\n    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-pointnav/train\")\n    VAL_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-pointnav/val\")\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py",
    "content": "from allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.robothor_plugin.robothor_sensors import (\n    DepthSensorThor,\n    GPSCompassSensorRoboThor,\n)\n\n\nfrom projects.pointnav_baselines.mixins import (\n    PointNavUnfrozenResNetWithGRUActorCriticMixin,\n)\nfrom projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import (\n    PointNavRoboThorBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import PointNavPPOMixin\n\n\nclass PointNavRoboThorRGBPPOExperimentConfig(\n    PointNavRoboThorBaseConfig,\n):\n    \"\"\"An Point Navigation experiment configuration in RoboTHOR with Depth\n    input.\"\"\"\n\n    SENSORS = [\n        DepthSensorThor(\n            height=PointNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=PointNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        GPSCompassSensorRoboThor(),\n    ]\n\n    def __init__(self):\n        super().__init__()\n\n        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"simple_cnn\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=True,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs):\n        return self.model_creation_handler.create_model(**kwargs)\n\n    def tag(self):\n        return \"PointNav-RoboTHOR-Depth-SimpleConv-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py",
    "content": "from allenact.utils.experiment_utils import TrainingPipeline\nfrom allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor\nfrom allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor\nfrom projects.pointnav_baselines.mixins import (\n    PointNavUnfrozenResNetWithGRUActorCriticMixin,\n)\nfrom projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import (\n    PointNavRoboThorBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import PointNavPPOMixin\n\n\nclass PointNavRoboThorRGBPPOExperimentConfig(\n    PointNavRoboThorBaseConfig,\n):\n    \"\"\"An Point Navigation experiment configuration in RoboThor with RGB\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=PointNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=PointNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GPSCompassSensorRoboThor(),\n    ]\n\n    def __init__(self):\n        super().__init__()\n\n        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"simple_cnn\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=True,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs):\n        return self.model_creation_handler.create_model(**kwargs)\n\n    def tag(self):\n        return \"PointNav-RoboTHOR-RGB-SimpleConv-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py",
    "content": "from allenact.utils.experiment_utils import TrainingPipeline\n\nfrom allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor\nfrom allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor\nfrom allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor\nfrom projects.pointnav_baselines.mixins import (\n    PointNavUnfrozenResNetWithGRUActorCriticMixin,\n)\nfrom projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import (\n    PointNavRoboThorBaseConfig,\n)\nfrom projects.pointnav_baselines.mixins import PointNavPPOMixin\n\n\nclass PointNavRoboThorRGBPPOExperimentConfig(\n    PointNavRoboThorBaseConfig,\n):\n    \"\"\"An Point Navigation experiment configuration in RoboThor with RGBD\n    input.\"\"\"\n\n    SENSORS = [\n        RGBSensorThor(\n            height=PointNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=PointNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        DepthSensorThor(\n            height=PointNavRoboThorBaseConfig.SCREEN_SIZE,\n            width=PointNavRoboThorBaseConfig.SCREEN_SIZE,\n            use_normalization=True,\n            uuid=\"depth_lowres\",\n        ),\n        GPSCompassSensorRoboThor(),\n    ]\n\n    def __init__(self):\n        super().__init__()\n\n        self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(\n            backbone=\"simple_cnn\",\n            sensors=self.SENSORS,\n            auxiliary_uuids=[],\n            add_prev_actions=True,\n            multiple_beliefs=False,\n            belief_fusion=None,\n        )\n\n    def training_pipeline(self, **kwargs) -> TrainingPipeline:\n        return PointNavPPOMixin.training_pipeline(\n            auxiliary_uuids=[],\n            multiple_beliefs=False,\n            normalize_advantage=True,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n        )\n\n    def create_model(self, **kwargs):\n        return self.model_creation_handler.create_model(**kwargs)\n\n    def tag(self):\n        return \"PointNav-RoboTHOR-RGBD-SimpleConv-DDPPO\"\n"
  },
  {
    "path": "projects/pointnav_baselines/mixins.py",
    "content": "from typing import Optional\nfrom typing import Sequence\n\nimport attr\nimport gym\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.sensor import Sensor\nfrom allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    TrainingPipeline,\n    PipelineStage,\n    LinearDecay,\n)\nfrom projects.objectnav_baselines.mixins import update_with_auxiliary_losses\n\n# fmt: off\ntry:\n    # Habitat may not be installed, just create a fake class here in that case\n    from allenact_plugins.habitat_plugin.habitat_sensors import TargetCoordinatesSensorHabitat\nexcept ImportError:\n    class TargetCoordinatesSensorHabitat:  #type:ignore\n        pass\n# fmt: on\n\nfrom allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor\nfrom allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask\nfrom allenact_plugins.navigation_plugin.pointnav.models import PointNavActorCritic\n\n\n@attr.s(kw_only=True)\nclass PointNavUnfrozenResNetWithGRUActorCriticMixin:\n    backbone: str = attr.ib()\n    sensors: Sequence[Sensor] = attr.ib()\n    auxiliary_uuids: Sequence[str] = attr.ib()\n    add_prev_actions: bool = attr.ib()\n    multiple_beliefs: bool = attr.ib()\n    belief_fusion: Optional[str] = attr.ib()\n\n    def create_model(self, **kwargs) -> nn.Module:\n        rgb_uuid = next(\n            (s.uuid for s in self.sensors if isinstance(s, RGBSensor)), None\n        )\n        depth_uuid = next(\n            (s.uuid for s in self.sensors if isinstance(s, DepthSensor)), None\n        )\n        goal_sensor_uuid = next(\n            (\n                s.uuid\n                for s in self.sensors\n                if isinstance(\n                    s, (GPSCompassSensorRoboThor, TargetCoordinatesSensorHabitat)\n                )\n            )\n        )\n\n        return PointNavActorCritic(\n            # Env and Tak\n            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            rgb_uuid=rgb_uuid,\n            depth_uuid=depth_uuid,\n            goal_sensor_uuid=goal_sensor_uuid,\n            # RNN\n            hidden_size=(\n                228 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512\n            ),\n            num_rnn_layers=1,\n            rnn_type=\"GRU\",\n            add_prev_actions=self.add_prev_actions,\n            action_embed_size=4,\n            # CNN\n            backbone=self.backbone,\n            resnet_baseplanes=32,\n            embed_coordinates=False,\n            coordinate_dims=2,\n            # Aux\n            auxiliary_uuids=self.auxiliary_uuids,\n            multiple_beliefs=self.multiple_beliefs,\n            beliefs_fusion=self.belief_fusion,\n        )\n\n\nclass PointNavPPOMixin:\n    @staticmethod\n    def training_pipeline(\n        auxiliary_uuids: Sequence[str],\n        multiple_beliefs: bool,\n        normalize_advantage: bool,\n        advance_scene_rollout_period: Optional[int] = None,\n    ) -> TrainingPipeline:\n        ppo_steps = int(75000000)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 4\n        num_steps = 128\n        save_interval = 5000000\n        log_interval = 10000 if torch.cuda.is_available() else 1\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n\n        named_losses = {\n            \"ppo_loss\": (PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0)\n        }\n        named_losses = update_with_auxiliary_losses(\n            named_losses=named_losses,\n            auxiliary_uuids=auxiliary_uuids,\n            multiple_beliefs=multiple_beliefs,\n        )\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={key: val[0] for key, val in named_losses.items()},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=advance_scene_rollout_period,\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=list(named_losses.keys()),\n                    max_stage_steps=ppo_steps,\n                    loss_weights=[val[1] for val in named_losses.values()],\n                )\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n"
  },
  {
    "path": "projects/tutorials/__init__.py",
    "content": ""
  },
  {
    "path": "projects/tutorials/distributed_objectnav_tutorial.py",
    "content": "# literate: tutorials/distributed-objectnav-tutorial.md\n# %%\n\"\"\"# Tutorial: Distributed training across multiple nodes.\"\"\"\n\n# %%\n\"\"\"\n**Note** The provided commands to execute in this tutorial assume include a configuration script to\n[clone the full library](../installation/installation-allenact.md#full-library). Setting up headless THOR might\nrequire superuser privileges. We also assume [NCCL](https://developer.nvidia.com/nccl) is available for communication\nacross computation nodes and all nodes have a running `ssh` server. \n\nThe below introduced experimental tools and commands for distributed training assume a Linux OS (tested on Ubuntu\n18.04).\n\nIn this tutorial, we:\n\n1. Introduce the available API for training across multiple nodes, as well as experimental scripts for distributed\n configuration, training start and termination, and remote command execution.\n1. Introduce the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenAct`. Note that, in contrast with\nprevious tutorials using AI2-THOR, this time we don't require an xserver (in Linux) to be active.\n1. Show a training example for RoboTHOR ObjectNav on a cluster, with each node having sufficient GPUs and GPU memory to\nhost 60 experience samplers collecting rollout data.\n\nThanks to the massive parallelization of experience collection and model training enabled by\n[DD-PPO](https://arxiv.org/abs/1911.00357), we can greatly speed up training by scaling across multiple nodes:\n\n![training speedup](../img/multinode_training.jpg)\n\n## The task: ObjectNav\n\nIn ObjectNav, the goal for the agent is to navigate to an object (possibly unseen during training) of a known given\nclass and signal task completion when it determines it has reached the goal.\n\n\n## Implementation\n\nFor this tutorial, we'll use the readily available `objectnav_baselines` project, which includes configurations for\na wide variety of object navigation experiments for both iTHOR and RoboTHOR. Since those configuration files are\ndefined for a single-node setup, we will mainly focus on the changes required in the `machine_params` and\n`training_pipeline` methods.\n\nNote that, in order to use the headless version of AI2-THOR, we currently need to install a specific THOR commit,\ndifferent from the default one in `robothor_plugin`. Note that this command is included in the configuration script\nbelow, so **we don't need to run this**:\n\n```bash\npip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48\n```\n\nThe experiment config starts as follows:\n\"\"\"\n\n# %%\nimport math\nfrom typing import Optional, Sequence\n\nimport torch\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    LinearDecay,\n    MultiLinearDecay,\n    TrainingPipeline,\n    PipelineStage,\n)\nfrom projects.objectnav_baselines.experiments.robothor.objectnav_robothor_rgb_resnet18gru_ddppo import (\n    ObjectNavRoboThorRGBPPOExperimentConfig as BaseConfig,\n)\n\n\nclass DistributedObjectNavRoboThorRGBPPOExperimentConfig(BaseConfig):\n    def tag(self) -> str:\n        return \"DistributedObjectNavRoboThorRGBPPO\"\n\n    # %%\n    \"\"\"We override ObjectNavRoboThorBaseConfig's THOR_COMMIT_ID to match the installed headless one:\"\"\"\n\n    # %%\n    THOR_COMMIT_ID = \"91139c909576f3bf95a187c5b02c6fd455d06b48\"\n\n    # %%\n    \"\"\"Also indicate that we're using headless THOR (for `task_sampler_args` methods):\"\"\"\n\n    # %%\n    THOR_IS_HEADLESS = True\n\n    # %%\n    \"\"\"**Temporary hack** Disable the `commit_id` argument passed to the THOR `Controller`'s `init` method:\"\"\"\n\n    # %%\n    def env_args(self):\n        res = super().env_args()\n        res.pop(\"commit_id\", None)\n        return res\n\n    # %%\n    \"\"\"\n    And, of course, define the number of nodes. This will be used by `machine_params` and `training_pipeline` below.\n    We override the existing `ExperimentConfig`'s `init` method to include control on the number of nodes:\n    \"\"\"\n\n    # %%\n    def __init__(\n        self,\n        distributed_nodes: int = 1,\n        num_train_processes: Optional[int] = None,\n        train_gpu_ids: Optional[Sequence[int]] = None,\n        val_gpu_ids: Optional[Sequence[int]] = None,\n        test_gpu_ids: Optional[Sequence[int]] = None,\n    ):\n        super().__init__(\n            num_train_processes=num_train_processes,\n            train_gpu_ids=train_gpu_ids,\n            val_gpu_ids=val_gpu_ids,\n            test_gpu_ids=test_gpu_ids,\n        )\n        self.distributed_nodes = distributed_nodes\n\n    # %%\n    \"\"\"\n    ### Machine parameters\n\n    **Note:** We assume that all nodes are identical (same number and model of GPUs and drivers).\n\n    The `machine_params` method will be invoked by `runner.py` with different arguments, e.g. to determine the\n    configuration for validation or training.\n\n    When working in distributed settings, `AllenAct` needs to know the total number of trainers across all nodes as well\n    as the local number of trainers. This is accomplished through the introduction of a `machine_id` keyword argument,\n    which will be used to define the training parameters as follows:\n    \"\"\"\n\n    # %%\n    def machine_params(self, mode=\"train\", **kwargs):\n        params = super().machine_params(mode, **kwargs)\n\n        if mode == \"train\":\n            params.devices = params.devices * self.distributed_nodes\n            params.nprocesses = params.nprocesses * self.distributed_nodes\n            params.sampler_devices = params.sampler_devices * self.distributed_nodes\n\n            if \"machine_id\" in kwargs:\n                machine_id = kwargs[\"machine_id\"]\n                assert (\n                    0 <= machine_id < self.distributed_nodes\n                ), f\"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]\"\n\n                local_worker_ids = list(\n                    range(\n                        len(self.train_gpu_ids) * machine_id,\n                        len(self.train_gpu_ids) * (machine_id + 1),\n                    )\n                )\n\n                params.set_local_worker_ids(local_worker_ids)\n\n            # Confirm we're setting up train params nicely:\n            print(\n                f\"devices {params.devices}\"\n                f\"\\nnprocesses {params.nprocesses}\"\n                f\"\\nsampler_devices {params.sampler_devices}\"\n                f\"\\nlocal_worker_ids {params.local_worker_ids}\"\n            )\n        elif mode == \"valid\":\n            # Use all GPUs at their maximum capacity for training\n            # (you may run validation in a separate machine)\n            params.nprocesses = (0,)\n\n        return params\n\n    # %%\n    \"\"\"\n    In summary, we need to specify which indices in `devices`, `nprocesses` and `sampler_devices` correspond to the\n    local `machine_id` node (whenever a `machine_id` is given as a keyword argument), otherwise we specify the global\n    configuration.\n\n    ### Training pipeline\n\n    In preliminary ObjectNav experiments, we observe that small batches are useful during the initial training steps in\n    terms of sample efficiency, whereas large batches are preferred during the rest of training.\n    \n    In order to scale to the larger amount of collected data in multi-node settings, we will proceed with a two-stage\n    pipeline:\n    \n    1. In the first stage, we'll enforce a number of updates per amount of collected data similar to the\n    configuration with a single node by enforcing more batches per rollout (for about 30 million steps).\n    1. In the second stage we'll switch to a configuration with larger learning rate and batch size to be\n    used up to the grand total of 300 million experience steps.\n    \n    We first define a helper method to generate a learning rate curve with decay for each stage:\n    \"\"\"\n\n    # %%\n    @staticmethod\n    def lr_scheduler(small_batch_steps, transition_steps, ppo_steps, lr_scaling):\n        safe_small_batch_steps = int(small_batch_steps * 1.02)\n        large_batch_and_lr_steps = ppo_steps - safe_small_batch_steps - transition_steps\n\n        # Learning rate after small batch steps (assuming decay to 0)\n        break1 = 1.0 - safe_small_batch_steps / ppo_steps\n\n        # Initial learning rate for large batch (after transition from initial to large learning rate)\n        break2 = lr_scaling * (\n            1.0 - (safe_small_batch_steps + transition_steps) / ppo_steps\n        )\n        return MultiLinearDecay(\n            [\n                # Base learning rate phase for small batch (with linear decay towards 0)\n                LinearDecay(\n                    steps=safe_small_batch_steps,\n                    startp=1.0,\n                    endp=break1,\n                ),\n                # Allow the optimizer to adapt its statistics to the changes with a larger learning rate\n                LinearDecay(\n                    steps=transition_steps,\n                    startp=break1,\n                    endp=break2,\n                ),\n                # Scaled learning rate phase for large batch (with linear decay towards 0)\n                LinearDecay(\n                    steps=large_batch_and_lr_steps,\n                    startp=break2,\n                    endp=0,\n                ),\n            ]\n        )\n\n    # %%\n    \"\"\"\n    The training pipeline looks like:\n    \"\"\"\n\n    # %%\n    def training_pipeline(self, **kwargs):\n        # These params are identical to the baseline configuration for 60 samplers (1 machine)\n        ppo_steps = int(300e6)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 4\n        num_steps = 128\n        save_interval = 5000000\n        log_interval = 10000 if torch.cuda.is_available() else 1\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n\n        # We add 30 million steps for small batch learning\n        small_batch_steps = int(30e6)\n        # And a short transition phase towards large learning rate\n        # (see comment in the `lr_scheduler` helper method\n        transition_steps = int(2 / 3 * self.distributed_nodes * 1e6)\n\n        # Find exact number of samplers per GPU\n        assert (\n            self.num_train_processes % len(self.train_gpu_ids) == 0\n        ), \"Expected uniform number of samplers per GPU\"\n        samplers_per_gpu = self.num_train_processes // len(self.train_gpu_ids)\n\n        # Multiply num_mini_batch by the largest divisor of\n        # samplers_per_gpu to keep all batches of same size:\n        num_mini_batch_multiplier = [\n            i\n            for i in reversed(\n                range(1, min(samplers_per_gpu // 2, self.distributed_nodes) + 1)\n            )\n            if samplers_per_gpu % i == 0\n        ][0]\n\n        # Multiply update_repeats so that the product of this factor and\n        # num_mini_batch_multiplier is >= self.distributed_nodes:\n        update_repeats_multiplier = int(\n            math.ceil(self.distributed_nodes / num_mini_batch_multiplier)\n        )\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\"ppo_loss\": PPO(**PPOConfig, show_ratios=False)},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                # We increase the number of batches for the first stage to reach an\n                # equivalent number of updates per collected rollout data as in the\n                # 1 node/60 samplers setting\n                PipelineStage(\n                    loss_names=[\"ppo_loss\"],\n                    max_stage_steps=small_batch_steps,\n                    num_mini_batch=num_mini_batch * num_mini_batch_multiplier,\n                    update_repeats=update_repeats * update_repeats_multiplier,\n                ),\n                # The we proceed with the base configuration (leading to larger\n                # batches due to the increased number of samplers)\n                PipelineStage(\n                    loss_names=[\"ppo_loss\"],\n                    max_stage_steps=ppo_steps - small_batch_steps,\n                ),\n            ],\n            # We use the MultiLinearDecay curve defined by the helper function,\n            # setting the learning rate scaling as the square root of the number\n            # of nodes. Linear scaling might also works, but we leave that\n            # check to the reader.\n            lr_scheduler_builder=Builder(\n                LambdaLR,\n                {\n                    \"lr_lambda\": self.lr_scheduler(\n                        small_batch_steps=small_batch_steps,\n                        transition_steps=transition_steps,\n                        ppo_steps=ppo_steps,\n                        lr_scaling=math.sqrt(self.distributed_nodes),\n                    )\n                },\n            ),\n        )\n\n\n# %%\n\"\"\"\n## Multi-node configuration\n\n**Note:** In the following, we'll assume you don't have an available setup for distributed execution, such as\n[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup and run\ndistributed processes, we encourage you to use that. The experimental distributed tools included here are intended for\na rather basic usage pattern that might not suit your needs.\n\nIf we haven't set up AllenAct with the headless version of Ai2-THOR in our nodes, we can define a configuration script\nsimilar to:\n\n```bash\n#!/bin/bash\n\n# Prepare a virtualenv for allenact\nsudo apt-get install -y python3-venv\npython3 -mvenv ~/allenact_venv\nsource ~/allenact_venv/bin/activate\npip install -U pip wheel\n\n# Install AllenAct\ncd ~\ngit clone https://github.com/allenai/allenact.git\ncd allenact\n\n# Install AllenaAct + RoboTHOR plugin dependencies\npip install -r requirements.txt\npip install -r allenact_plugins/robothor_plugin/extra_requirements.txt\n\n# Download + setup datasets\nbash datasets/download_navigation_datasets.sh robothor-objectnav\n\n# Install headless AI2-THOR and required libvulkan1\nsudo apt-get install -y libvulkan1\npip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48\n\n# Download AI2-THOR binaries\npython -c \"from ai2thor.controller import Controller; c=Controller(); c.stop()\"\n\necho DONE\n```\n\nand save it as `headless_robothor_config.sh`. Note that some of the configuration steps in the script assume you have\nsuperuser privileges.\n\nThen, we can just copy this file to the first node in our cluster and run it with:\n\n```bash\nsource <PATH/TO/headless_robothor_config.sh>\n```\n\nIf everything went well, we should be able to\n\n```bash\ncd ~/allenact && source ~/allenact_venv/bin/activate\n```\n\nNote that we might need to install `libvulkan1` in each node (even if the AllenAct setup is shared across nodes) if it\nis not already available.\n\n### Local filesystems\n\nIf our cluster does not use a shared filesystem, we'll need to propagate the setup to the rest of nodes. Assuming\nwe can just `ssh` with the current user to all nodes, we can propagate our config with\n\n```bash\nscripts/dconfig.py --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \\\n --config_script <PATH/TO/headless_robothor_config.sh>\n```\n\nand we can check the state of the installation with the `scripts/dcommand.py` tool:\n\n```bash\nscripts/dcommand.py --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \\\n --command 'tail -n 5 ~/log_allenact_distributed_config'\n```\n\nIf everything went fine, all requirements are ready to start running our experiment.\n\n## Run your experiment\n\n**Note:** In this section, we again assume you don't have an available setup for distributed execution, such as\n[slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup/run\ndistributed processes, we encourage you to use that. The experimental distributed tools included here are intended for\na rather basic usage pattern that might not suit your needs.\n\nOur experimental extension to AllenAct's `main.py` script allows using practically identical commands to the ones\nused in a single-node setup to start our experiments. From the root `allenact` directory, we can simply invoke\n\n```bash\nscripts/dmain.py projects/tutorials/distributed_objectnav_tutorial.py \\\n --config_kwargs '{\"distributed_nodes\":3}' \\\n --runs_on <COMMA_SEPARATED_LIST_OF_IP_ADDRESSES> \\\n --env_activate_path ~/allenact_venv/bin/activate \\\n --allenact_path ~/allenact \\\n --distributed_ip_and_port <FIRST_IP_ADDRESS_IN_RUNS_ON_LIST>:<FREE_PORT_NUMBER_FOR_THIS_IP_ADDRESS>\n```\n\nThis script will do several things for you, including synchronization of the changes in the `allenact` directory\nto all machines, enabling virtual environments in each node, sharing the same random seed for all `main.py` instances,\nassigning `--machine_id` parameters required for multi-node training, and redirecting the process output to a log file\nunder the output results folder.\n\nNote that by changing the value associated with the `distributed_nodes` key in the `config_kwargs` map and the `runs_on`\nlist of IPs, we can easily scale our training to e.g. 1, 3, or 8 nodes as shown in the chart above. Note that for this\ncall to work unmodified, you should have sufficient GPUs/GPU memory to host 60 samplers per node.\n\n## Track and stop your experiment\n\nYou might have noticed that, when your experiment started with the above command, a file was created under\n`~/.allenact`. This file includes IP addresses and screen session IDs for all nodes. It can be used\nby the already introduced `scripts/dcommand.py` script, if we omit the `--runs_on` argument, to call a command on each\nnode via ssh; but most importantly it is used by the `scripts/dkill.py` script to terminate all screen sessions hosting\nour training processes.\n\n### Experiment tracking\n\nA simple way to check all machines are training, assuming you have `nvidia-smi` installed in all nodes, is to just call\n\n```bash\nscripts/dcommand.py\n```\n\nfrom the root `allenact` directory. If everything is working well, the GPU usage stats from `nvidia-smi` should reflect\nongoing activity. You can also add different commands to be executed by each node. It is of course also possible to run\ntensorboard on any of the nodes, if that's your preference.\n\n### Experiment termination\n\nJust call\n\n```bash\nscripts/dkill.py\n```\n\nAfter killing all involved screen sessions, you will be asked about whether you also want to delete the \"killfile\"\nstored under the `~/.allenact` directory (which might be your preferred option once all processes are terminated). \n\nWe hope this tutorial will help you start quickly testing new ideas! Even if we've only explored moderates settings of\nup to 480 experience samplers, you might want to consider some additional changes (like the\n[choice for the optimizer](https://arxiv.org/abs/2103.07013)) if you plan to run at larger scale. \n\"\"\"\n"
  },
  {
    "path": "projects/tutorials/gym_mujoco_tutorial.py",
    "content": "# literate: tutorials/gym-mujoco-tutorial.md\n# %%\n\"\"\"# Tutorial: OpenAI gym MuJoCo environment.\"\"\"\n\n# %%\n\"\"\"\n**Note** The provided commands to execute in this tutorial assume you have\n[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the\n`gym_plugin`. The latter can be installed by\n\n```bash\npip install -r allenact_plugins/gym_plugin/extra_requirements.txt\n```\n\nThe environments for this tutorial use [MuJoCo](http://www.mujoco.org/)(**Mu**lti-**Jo**int dynamics in **Co**ntact) \nphysics simulator, which is also required to be installed properly with instructions \n[here](https://github.com/openai/mujoco-py).\n\n## The task\n\nFor this tutorial, we'll focus on one of the continuous-control environments under the `mujoco` group of `gym`\nenvironments: [Ant-v2](https://gym.openai.com/envs/Ant-v2/). In this task, the goal\nis to make a four-legged creature, \"ant\", walk forward as fast as possible. A random agent of \"Ant-v2\" is shown below.\n\n![The Ant-v2 task](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_random.gif).\n\nTo achieve the goal, we need to provide continuous control for the agent moving forward with four legs with the \n`x` velocity as high as possible for at most 1000 episodes steps. The agent is failed, or done, if the `z` position \nis out of the range [0.2, 1.0]. The dimension of the action space is 8 and 111 for the dimension of the observation \nspace that maps to different body parts, including 3D position `(x,y,z)`, orientation(quaternion `x`,`y`,`z`,`w`) \nof the torso, and the joint angles, 3D velocity `(x,y,z)`, 3D angular velocity `(x,y,z)`, and joint velocities. \nThe rewards for the agent \"ant\" are composed of the forward rewards, healthy rewards, control cost, and contact cost. \n\n## Implementation\n\nFor this tutorial, we'll use the readily available `gym_plugin`, which includes a\n[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a\n[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and\n[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a\n[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`\nenvironment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).\nThe experiment config, similar to the one used for the\n[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:\n\"\"\"\n\n# %%\nfrom typing import Dict, Optional, List, Any, cast\n\nimport gym\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO\n\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor\n\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\nfrom allenact.utils.viz_utils import VizSuite, AgentViewViz\n\n\nclass HandManipulateTutorialExperimentConfig(ExperimentConfig):\n    @classmethod\n    def tag(cls) -> str:\n        return \"GymMuJoCoTutorial\"\n\n    # %%\n    \"\"\"\n    ### Sensors and Model\n\n    As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide\n    full observations from the state of the `gym` environment to our model.\n    \"\"\"\n\n    # %%\n    SENSORS = [\n        GymMuJoCoSensor(\"Ant-v2\", uuid=\"gym_mujoco_data\"),\n    ]\n\n    # %%\n    \"\"\"\n    We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,\n    [MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since\n    this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`\n    instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a\n    [Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        \"\"\"We define our `ActorCriticModel` agent using a lightweight\n        implementation with separate MLPs for actors and critic,\n        MemorylessActorCritic.\n\n        Since this is a model for continuous control, note that the\n        superclass of our model is `ActorCriticModel[GaussianDistr]`\n        instead of `ActorCriticModel[CategoricalDistr]`, since we'll use\n        a Gaussian distribution to sample actions.\n        \"\"\"\n        return MemorylessActorCritic(\n            input_uuid=\"gym_mujoco_data\",\n            action_space=gym.spaces.Box(\n                -3.0, 3.0, (8,), \"float32\"\n            ),  # 8 actors, each in the range [-3.0, 3.0]\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    # %%\n    \"\"\"\n    ### Task samplers\n    We use an available `TaskSampler` implementation for `gym` environments that allows to sample\n    [GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):\n    [GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task\n    sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created\n    above, which contain a custom identifier for the actual observation space (`gym_mujoco_data`) also used by the model.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(gym_env_type=\"Ant-v2\", **kwargs)\n\n    # %%\n    \"\"\"\n    For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three\n    modes, `train, valid, test`:\n    \"\"\"\n\n    # %%\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"train\", seeds=seeds\n        )\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"valid\", seeds=seeds\n        )\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"test\", seeds=seeds)\n\n    # %%\n    \"\"\"\n    Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,\n    during testing (or validation), we sample a fixed number of tasks.\n    \"\"\"\n\n    # %%\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 4\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"Ant-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    # %%\n    \"\"\"\n    Note that we just sample 4 tasks for validation and testing in this case, which suffice to illustrate the model's\n    success.\n\n    ### Machine parameters\n\n    In this tutorial, we just train the model on the CPU. We allocate a larger number of samplers for training (8) than \n    for validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also \n    include a video visualizer (`AgentViewViz`) in test mode.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        visualizer = None\n        if mode == \"test\":\n            visualizer = VizSuite(\n                mode=mode,\n                video_viz=AgentViewViz(\n                    label=\"episode_vid\",\n                    max_clip_length=400,\n                    vector_task_source=(\"render\", {\"mode\": \"rgb_array\"}),\n                    fps=30,\n                ),\n            )\n        return {\n            \"nprocesses\": 8 if mode == \"train\" else 1,  # rollout\n            \"devices\": [],\n            \"visualizer\": visualizer,\n        }\n\n    # %%\n    \"\"\"\n    ### Training pipeline\n\n    The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate\n    and 10 single-batch update repeats per rollout. The reward should exceed 4,000\n    in 20M steps in the test. In order to make the \"ant\" run with an obvious fast speed, we train the agents using PPO\n    with 3e7 steps. \n    \"\"\"\n\n    # %%\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        lr = 3e-4\n        ppo_steps = int(3e7)\n        clip_param = 0.2\n        value_loss_coef = 0.5\n        entropy_coef = 0.0\n        num_mini_batch = 4  # optimal 64\n        update_repeats = 10\n        max_grad_norm = 0.5\n        num_steps = 2048\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        advance_scene_rollout_period = None\n        save_interval = 200000\n        metric_accumulate_interval = 50000\n        return TrainingPipeline(\n            named_losses=dict(\n                ppo_loss=PPO(\n                    clip_param=clip_param,\n                    value_loss_coef=value_loss_coef,\n                    entropy_coef=entropy_coef,\n                ),\n            ),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps),\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=advance_scene_rollout_period,\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            lr_scheduler_builder=Builder(\n                LambdaLR,\n                {\"lr_lambda\": LinearDecay(steps=ppo_steps, startp=1, endp=0)},\n            ),\n        )\n\n\n# %%\n\"\"\"\n## Training and validation\n\nWe have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_mujoco_tutorial.py`.\nTo start training from scratch, we just need to invoke\n\n```bash\nPYTHONPATH=. python allenact/main.py gym_mujoco_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_mujoco_output -s 0 -e\n```\n\nfrom the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the\n[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.\n\nIf we have Tensorboard installed, we can track progress with\n```bash\ntensorboard --logdir /PATH/TO/gym_mujoco_output\n```\nwhich will default to the URL [http://localhost:6006/](http://localhost:6006/).\n\nAfter 30,000,000 steps, the script will terminate. If everything went well, the `valid` success rate should be 1 \nand the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a \nlittle below 1,000.\n\n## Testing\n\nThe training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the\nsubfolders in the path to the checkpoints, saved under the output folder.\nIn order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the \ndirectory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:\n```bash\nPYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \\\n    -b projects/tutorials \\\n    -m 1 \\\n    -o /PATH/TO/gym_mujoco_output \\\n    -s 0 \\\n    -e \\\n    --eval \\\n    --checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE \n```\n\nIf everything went well, the `test` success rate should converge to 1, the `test` success rate should be 1 \nand the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a \nlittle below 1,000. The `gif` results can be seen in the image tab of Tensorboard while testing. \nThe output should be something like this:\n\n![results](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.png).\n\nAnd the `gif` results can be seen in the image tab of Tensorboard while testing.\n\n![mp4 demo](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.gif)\n\nIf the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to \"None\"`, e.g. when running\nremotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display\navailable:\n\n```bash\nDISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \\\n    -b projects/tutorials \\\n    -m 1 \\\n    -o /PATH/TO/gym_mujoco_output \\\n    -s 0 \\\n    -e \\\n    --eval \\\n    --checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE \n```\n\"\"\"\n"
  },
  {
    "path": "projects/tutorials/gym_tutorial.py",
    "content": "# literate: tutorials/gym-tutorial.md\n# %%\n\"\"\"# Tutorial: OpenAI gym for continuous control.\"\"\"\n\n# %%\n\"\"\"\n**Note** The provided commands to execute in this tutorial assume you have\n[installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the\n`gym_plugin`. The latter can be installed by\n\n```bash\npip install -r allenact_plugins/gym_plugin/extra_requirements.txt\n```\n\nIn this tutorial, we:\n\n1. Introduce the `gym_plugin`, which enables some of the tasks in [OpenAI's gym](https://gym.openai.com/) for training\nand inference within AllenAct.\n1. Show an example of continuous control with an arbitrary action space covering 2 policies for one of the `gym` tasks.\n\n\n## The task\n\nFor this tutorial, we'll focus on one of the continuous-control environments under the `Box2D` group of `gym`\nenvironments: [LunarLanderContinuous-v2](https://gym.openai.com/envs/LunarLanderContinuous-v2/). In this task, the goal\nis to smoothly land a lunar module in a landing pad, as shown below.\n\n![The LunarLanderContinuous-v2 task](../img/lunar_lander_continuous_demo.png).\n\nTo achieve this goal, we need to provide continuous control for a main engine and directional one (2 real values). In\norder to solve the task, the expected reward is of at least 200 points. The controls for main and directional engines\nare both in the range [-1.0, 1.0] and the observation space is composed of 8 scalars indicating `x` and `y` positions,\n`x` and `y` velocities, lander angle and angular velocity, and left and right ground contact. Note that these 8 scalars\nprovide a full observation of the state.\n\n\n## Implementation\n\nFor this tutorial, we'll use the readily available `gym_plugin`, which includes a\n[wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a\n[task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and\n[task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a\n[sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym`\nenvironment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic).\n\nThe experiment config, similar to the one used for the\n[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows:\n\"\"\"\n\n# %%\nfrom typing import Dict, Optional, List, Any, cast\n\nimport gym\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic\nfrom allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor\nfrom allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\nfrom allenact.utils.viz_utils import VizSuite, AgentViewViz\n\n\nclass GymTutorialExperimentConfig(ExperimentConfig):\n    @classmethod\n    def tag(cls) -> str:\n        return \"GymTutorial\"\n\n    # %%\n    \"\"\"\n    ### Sensors and Model\n    \n    As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide\n    full observations from the state of the `gym` environment to our model.\n    \"\"\"\n\n    # %%\n    SENSORS = [\n        GymBox2DSensor(\"LunarLanderContinuous-v2\", uuid=\"gym_box_data\"),\n    ]\n\n    # %%\n    \"\"\"\n    We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic,\n    [MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since\n    this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]`\n    instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a\n    [Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return MemorylessActorCritic(\n            input_uuid=\"gym_box_data\",\n            action_space=gym.spaces.Box(\n                -1.0, 1.0, (2,)\n            ),  # 2 actors, each in the range [-1.0, 1.0]\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            action_std=0.5,\n        )\n\n    # %%\n    \"\"\"\n    ### Task samplers\n    We use an available `TaskSampler` implementation for `gym` environments that allows to sample\n    [GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask):\n    [GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task\n    sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created\n    above, which contain a custom identifier for the actual observation space (`gym_box_data`) also used by the model.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return GymTaskSampler(**kwargs)\n\n    # %%\n    \"\"\"\n    For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three\n    modes, `train, valid, test`:\n    \"\"\"\n\n    # %%\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"train\", seeds=seeds\n        )\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(\n            process_ind=process_ind, mode=\"valid\", seeds=seeds\n        )\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"test\", seeds=seeds)\n\n    # %%\n    \"\"\"\n    Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while,\n    during testing (or validation), we sample a fixed number of tasks.\n    \"\"\"\n\n    # %%\n    def _get_sampler_args(\n        self, process_ind: int, mode: str, seeds: List[int]\n    ) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 3\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            gym_env_types=[\"LunarLanderContinuous-v2\"],\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            max_tasks=max_tasks,  # see above\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            seed=seeds[process_ind],\n        )\n\n    # %%\n    \"\"\"\n    Note that we just sample 3 tasks for validation and testing in this case, which suffice to illustrate the model's\n    success.\n    \n    ### Machine parameters\n\n    Given the simplicity of the task and model, we can just train the model on the CPU. During training, success should\n    reach 100% in less than 10 minutes, whereas solving the task (evaluation reward > 200) might take about 20 minutes\n    (on a laptop CPU).\n    \n    We allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to\n    CPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        visualizer = None\n        if mode == \"test\":\n            visualizer = VizSuite(\n                mode=mode,\n                video_viz=AgentViewViz(\n                    label=\"episode_vid\",\n                    max_clip_length=400,\n                    vector_task_source=(\"render\", {\"mode\": \"rgb_array\"}),\n                    fps=30,\n                ),\n            )\n        return {\n            \"nprocesses\": 8 if mode == \"train\" else 1,\n            \"devices\": [],\n            \"visualizer\": visualizer,\n        }\n\n    # %%\n    \"\"\"\n    ### Training pipeline\n    \n    The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate\n    and 80 single-batch update repeats per rollout:\n    \"\"\"\n\n    # %%\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        ppo_steps = int(1.2e6)\n        return TrainingPipeline(\n            named_losses=dict(\n                ppo_loss=PPO(\n                    clip_param=0.2,\n                    value_loss_coef=0.5,\n                    entropy_coef=0.0,\n                ),\n            ),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps),\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-3)),\n            num_mini_batch=1,\n            update_repeats=80,\n            max_grad_norm=100,\n            num_steps=2000,\n            gamma=0.99,\n            use_gae=False,\n            gae_lambda=0.95,\n            advance_scene_rollout_period=None,\n            save_interval=200000,\n            metric_accumulate_interval=50000,\n            lr_scheduler_builder=Builder(\n                LambdaLR,\n                {\"lr_lambda\": LinearDecay(steps=ppo_steps)},  # type:ignore\n            ),\n        )\n\n\n# %%\n\"\"\"\n## Training and validation\n\nWe have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_tutorial.py`.\nTo start training from scratch, we just need to invoke\n\n```bash\nPYTHONPATH=. python allenact/main.py gym_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_output -s 54321 -e\n```\n\nfrom the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the\n[Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters.\n\nIf we have Tensorboard installed, we can track progress with\n```bash\ntensorboard --logdir /PATH/TO/gym_output\n```\nwhich will default to the URL [http://localhost:6006/](http://localhost:6006/).\n\nAfter 1,200,000 steps, the script will terminate. If everything went well, the `valid` success rate should quickly\nconverge to 1 and the mean reward to above 250, while the average episode length should stay below or near 300.\n\n## Testing\n\nThe training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the\nsubfolders in the path to the checkpoints, saved under the output folder.\nIn order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the \ndirectory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option:\n```bash\nPYTHONPATH=. python allenact/main.py gym_tutorial \\\n    -b projects/tutorials \\\n    -m 1 \\\n    -o /PATH/TO/gym_output \\\n    -s 54321 \\\n    -e \\\n    --eval \\\n    --checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \\\n    --approx_ckpt_step_interval 800000 # Skip some checkpoints\n```\n\nThe option `--approx_ckpt_step_interval 800000` tells AllenAct that we only want to evaluate checkpoints\nwhich were saved every ~800000 steps, this lets us avoid evaluating every saved checkpoint. If everything went well, \nthe `test` success rate should converge to 1, the episode length below or near 300 steps, and the mean reward to above \n250. The images tab in tensorboard will contain videos for the sampled test episodes.\n\n![video_results](../img/lunar_lander_continuous_test.png).\n\nIf the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to \"None\"`, e.g. when running\nremotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display\navailable:\n\n```bash\nDISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_tutorial \\\n    -b projects/tutorials \\\n    -m 1 \\\n    -o /PATH/TO/gym_output \\\n    -s 54321 \\\n    -e \\  \n    --eval \\\n    --checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \\\n    --approx_ckpt_step_interval 800000 \n```\n\"\"\"\n"
  },
  {
    "path": "projects/tutorials/minigrid_offpolicy_tutorial.py",
    "content": "# literate: tutorials/offpolicy-tutorial.md\n# %%\n\"\"\"# Tutorial: Off-policy training.\"\"\"\n\n# %%\n\"\"\"\n\n**Note** The provided commands to execute in this tutorial assume you have\n[installed the full library](../installation/installation-allenact.md#full-library) and the `extra_requirements`\nfor the `babyai_plugin` and `minigrid_plugin`. The latter can be installed with:\n\n```bash\npip install -r allenact_plugins/babyai_plugin/extra_requirements.txt; pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt\n```\n\nIn this tutorial we'll learn how to train an agent from an external dataset by imitating expert actions via\nBehavior Cloning. We'll use a [BabyAI agent](/api/allenact_plugins/babyai_plugin/babyai_models#BabyAIRecurrentACModel) to solve\n`GoToLocal` tasks on [MiniGrid](https://github.com/maximecb/gym-minigrid); see the\n`projects/babyai_baselines/experiments/go_to_local` directory for more details.\n\nThis tutorial assumes `AllenAct`'s [abstractions](../getting_started/abstractions.md) are known.\n\n## The task\n\nIn a `GoToLocal` task, the agent immersed in a grid world has to navigate to a specific object in the presence of\nmultiple distractors, requiring the agent to understand `go to` instructions like \"go to the red ball\". For further\ndetails, please consult the [original paper](https://arxiv.org/abs/1810.08272).\n\n## Getting the dataset\n\nWe will use a large dataset (**more than 4 GB**) including expert demonstrations for `GoToLocal` tasks. To download\nthe data we'll run\n\n```bash\nPYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py GoToLocal\n```\n\nfrom the project's root directory, which will download `BabyAI-GoToLocal-v0.pkl` and `BabyAI-GoToLocal-v0_valid.pkl` to\nthe `allenact_plugins/babyai_plugin/data/demos` directory.\n\nWe will also generate small versions of the datasets, which will be useful if running on CPU, by calling\n\n```bash\nPYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py\n```\nfrom the project's root directory, which will generate `BabyAI-GoToLocal-v0-small.pkl` under the same\n`allenact_plugins/babyai_plugin/data/demos` directory.\n\n## Data storage\n\nIn order to train with an off-policy dataset, we need to define an `ExperienceStorage`. In AllenAct, an\n `ExperienceStorage` object has two primary functions:\n1. It stores/manages relevant data (e.g. similarly to the `Dataset` class in PyTorch).\n2. It loads stored data into batches that will be used for loss computation (e.g. similarly to the `Dataloader` \nclass in PyTorch).\nUnlike a PyTorch `Dataset` however, an `ExperienceStorage` object can build its dataset **at runtime** by processing\n rollouts from the agent. This flexibility allows for us to, for exmaple, implement the experience replay datastructure\n used in deep Q-learning. For this tutorial we won't need this additional functionality as our off-policy dataset\n is a fixed collection of expert trajectories.    \n\nAn example of a `ExperienceStorage` for BabyAI expert demos might look as follows:\n\"\"\"\n\n# %% import_summary allenact_plugins.minigrid_plugin.minigrid_offpolicy.MiniGridExpertTrajectoryStorage\n\n# %%\n\"\"\"\nA complete example can be found in\n[MiniGridExpertTrajectoryStorage](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridExpertTrajectoryStorage).\n\n## Loss function\n\nOff-policy losses must implement the\n[`GenericAbstractLoss`](/api/allenact/base_abstractions/misc/#genericabstractloss)\ninterface. In this case, we minimize the cross-entropy between the actor's policy and the expert action:\n\"\"\"\n\n# %% import allenact_plugins.minigrid_plugin.minigrid_offpolicy.MiniGridOffPolicyExpertCELoss\n\n# %%\n\"\"\"\nA complete example can be found in\n[MiniGridOffPolicyExpertCELoss](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridOffPolicyExpertCELoss).\nNote that in this case we train the entire actor, but it would also be possible to forward data through a different\nsubgraph of the ActorCriticModel.\n\n## Experiment configuration\n\nFor the experiment configuration, we'll build on top of an existing\n[base BabyAI GoToLocal Experiment Config](/api/projects/babyai_baselines/experiments/go_to_local/base/#basebabyaigotolocalexperimentconfig).\nThe complete `ExperimentConfig` file for off-policy training is\n[here](/api/projects/tutorials/minigrid_offpolicy_tutorial/#bcoffpolicybabyaigotolocalexperimentconfig), but let's\nfocus on the most relevant aspect to enable this type of training:\nproviding an [OffPolicyPipelineComponent](/api/allenact/utils/experiment_utils/#offpolicypipelinecomponent) object as input to a\n`PipelineStage` when instantiating the `TrainingPipeline` in the `training_pipeline` method.\n\"\"\"\n\n# %% hide\nimport os\nfrom typing import Optional, List, Tuple\n\nimport torch\nfrom gym_minigrid.minigrid import MiniGridEnv\n\nfrom allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage\nfrom allenact.utils.experiment_utils import (\n    PipelineStage,\n    StageComponent,\n    TrainingSettings,\n)\nfrom allenact_plugins.babyai_plugin.babyai_constants import (\n    BABYAI_EXPERT_TRAJECTORIES_DIR,\n)\nfrom allenact_plugins.minigrid_plugin.minigrid_offpolicy import (\n    MiniGridOffPolicyExpertCELoss,\n    MiniGridExpertTrajectoryStorage,\n)\nfrom projects.babyai_baselines.experiments.go_to_local.base import (\n    BaseBabyAIGoToLocalExperimentConfig,\n)\n\n\n# %%\nclass BCOffPolicyBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig):\n    \"\"\"BC Off-policy imitation.\"\"\"\n\n    DATASET: Optional[List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]] = None\n\n    GPU_ID = 0 if torch.cuda.is_available() else None\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToLocalBCOffPolicy\"\n\n    @classmethod\n    def METRIC_ACCUMULATE_INTERVAL(cls):\n        # See BaseBabyAIGoToLocalExperimentConfig for how this is used.\n        return 1\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=-1)\n\n        num_mini_batch = ppo_info[\"num_mini_batch\"]\n        update_repeats = ppo_info[\"update_repeats\"]\n\n        # fmt: off\n        return cls._training_pipeline(\n            named_losses={\n                \"offpolicy_expert_ce_loss\": MiniGridOffPolicyExpertCELoss(\n                    total_episodes_in_epoch=int(1e6)\n                ),\n            },\n            named_storages={\n                \"onpolicy\": RolloutBlockStorage(),\n                \"minigrid_offpolicy_expert\": MiniGridExpertTrajectoryStorage(\n                    data_path=os.path.join(\n                                BABYAI_EXPERT_TRAJECTORIES_DIR,\n                                \"BabyAI-GoToLocal-v0{}.pkl\".format(\n                                    \"\" if torch.cuda.is_available() else \"-small\"\n                                ),\n                            ),\n                    num_samplers=cls.NUM_TRAIN_SAMPLERS,\n                    rollout_len=cls.ROLLOUT_STEPS,\n                    instr_len=cls.INSTR_LEN,\n                ),\n            },\n            pipeline_stages=[\n                # Single stage, only with off-policy training\n                PipelineStage(\n                    loss_names=[\"offpolicy_expert_ce_loss\"],                                              # no on-policy losses\n                    max_stage_steps=total_train_steps,                          # keep sampling episodes in the stage\n                    stage_components=[\n                        StageComponent(\n                            uuid=\"offpolicy\",\n                            storage_uuid=\"minigrid_offpolicy_expert\",\n                            loss_names=[\"offpolicy_expert_ce_loss\"],\n                            training_settings=TrainingSettings(\n                                update_repeats=num_mini_batch * update_repeats,\n                                num_mini_batch=1,\n                            )\n                        )\n                    ],\n                ),\n            ],\n            # As we don't have any on-policy losses, we set the next\n            # two values to zero to ensure we don't attempt to\n            # compute gradients for on-policy rollouts:\n            num_mini_batch=0,\n            update_repeats=0,\n            total_train_steps=total_train_steps,\n        )\n        # fmt: on\n\n\n# %%\n\"\"\"\nYou'll have noted that it is possible to combine on-policy and off-policy training in the same stage, even though here\nwe apply pure off-policy training.\n\n## Training\n\nWe recommend using a machine with a CUDA-capable GPU for this experiment. In order to start training, we just need to\ninvoke\n\n```bash\nPYTHONPATH=. python allenact/main.py -b projects/tutorials minigrid_offpolicy_tutorial -m 8 -o <OUTPUT_PATH>\n```\n\nNote that with the `-m 8` option we limit to 8 the number of on-policy task sampling processes used between off-policy\nupdates.\n\nIf everything goes well, the training success should quickly reach values around 0.7-0.8 on GPU and converge to values\nclose to 1 if given sufficient time to train.\n\nIf running tensorboard, you'll notice a separate group of scalars named `train-offpolicy-losses` and \n `train-offpolicy-misc` with losses, approximate \"experiences per second\" (i.e. the number of off-policy experiences/steps\n being used to update the model per second), and other tracked values in addition to the standard `train-onpolicy-*`\n  used for on-policy training. In the `train-metrics` and `train-misc` sections you'll find the metrics \n  quantifying the performance of the agent throughout training and some other plots showing training details.\n  *Note that the x-axis for these plots is different than for the `train-offpolicy-*` sections*. This\n  is because these plots use the number of rollout steps as the x-axis (i.e. steps that the trained agent\n  takes interactively) while the `train-offpolicy-*` plots uses the number of offpolicy \"experiences\" that have\n  been shown to the agent.\n  \n\nA view of the training progress about 5 hours after starting on a CUDA-capable GPU should look similar to the below\n(note that training reached >99% success after about 50 minutes).\n\n![off-policy progress](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/minigrid-offpolicy/minigrid-offpolicy-tutorial-tb.png)\n\"\"\"\n"
  },
  {
    "path": "projects/tutorials/minigrid_tutorial.py",
    "content": "# literate: tutorials/minigrid-tutorial.md\n# %%\n\"\"\"# Tutorial: Navigation in MiniGrid.\"\"\"\n\n# %%\n\"\"\"\nIn this tutorial, we will train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the\n[MiniGrid](https://github.com/maximecb/gym-minigrid) environment. We will demonstrate how to:\n\n* Write an experiment configuration file with a simple training pipeline from scratch.\n* Use one of the supported environments with minimal user effort.\n* Train, validate and test your experiment from the command line.\n\nThis tutorial assumes the [installation instructions](../installation/installation-allenact.md) have already been\nfollowed and that, to some extent, this framework's [abstractions](../getting_started/abstractions.md) are known.\nThe `extra_requirements` for `minigrid_plugin` and `babyai_plugin` can be installed with.\n\n```bash\npip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt; pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt\n```\n\n## The task\nA `MiniGrid-Empty-Random-5x5-v0` task consists of a grid of dimensions 5x5 where an agent spawned at a random\nlocation and orientation has to navigate to the visitable bottom right corner cell of the grid by sequences of three\npossible actions (rotate left/right and move forward). A visualization of the environment with expert steps in a random\n`MiniGrid-Empty-Random-5x5-v0` task looks like\n\n![MiniGridEmptyRandom5x5 task example](../img/minigrid_environment.png)\n\nThe observation for the agent is a subset of the entire grid, simulating a simplified limited field of view, as\ndepicted by the highlighted rectangle (observed subset of the grid) around the agent (red arrow). Gray cells correspond\nto walls.\n\n## Experiment configuration file\n\nOur complete experiment consists of:\n\n* Training a basic actor-critic agent with memory to solve randomly sampled navigation tasks.\n* Validation on a fixed set of tasks (running in parallel with training).\n* A second stage where we test saved checkpoints with a larger fixed set of tasks.\n\nThe entire configuration for the experiment, including training, validation, and testing, is encapsulated in a single \nclass implementing the `ExperimentConfig` abstraction. For this tutorial, we will follow the config under\n`projects/tutorials/minigrid_tutorial.py`. \n\nThe `ExperimentConfig` abstraction is used by the\n[OnPolicyTrainer](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicytrainer) class (for training) and the\n[OnPolicyInference](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicyinference) class (for validation and testing)\ninvoked through the entry script `main.py` that calls an orchestrating\n[OnPolicyRunner](../api/allenact/algorithms/onpolicy_sync/runner.md#onpolicyrunner) class. It includes:\n\n* A `tag` method to identify the experiment.\n* A `create_model` method to instantiate actor-critic models.\n* A `make_sampler_fn` method to instantiate task samplers.\n* Three `{train,valid,test}_task_sampler_args` methods describing initialization parameters for task samplers used in\ntraining, validation, and testing; including assignment of workers to devices for simulation.\n* A `machine_params` method with configuration parameters that will be used for training, validation, and testing.\n* A `training_pipeline` method describing a possibly multi-staged training pipeline with different types of losses,\nan optimizer, and other parameters like learning rates, batch sizes, etc.\n\n### Preliminaries\n\nWe first import everything we'll need to define our experiment.\n\"\"\"\n\n# %%\nfrom typing import Dict, Optional, List, Any, cast\n\nimport gym\nfrom gym_minigrid.envs import EmptyRandomEnv5x5\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\nfrom allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvRNN\nfrom allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor\nfrom allenact_plugins.minigrid_plugin.minigrid_tasks import (\n    MiniGridTaskSampler,\n    MiniGridTask,\n)\n\n# %%\n\"\"\"\nWe now create the `MiniGridTutorialExperimentConfig` class which we will use to define our experiment. \nFor pedagogical reasons, we will add methods to this class one at a time below with a description of what\nthese classes do.  \n\"\"\"\n\n\n# %%\nclass MiniGridTutorialExperimentConfig(ExperimentConfig):\n\n    # %%\n    \"\"\"An experiment is identified by a `tag`.\"\"\"\n\n    # %%\n    @classmethod\n    def tag(cls) -> str:\n        return \"MiniGridTutorial\"\n\n    # %%\n    \"\"\"\n    ### Sensors and Model\n    \n    A readily available Sensor type for MiniGrid,\n    [EgocentricMiniGridSensor](../api/allenact_plugins/minigrid_plugin/minigrid_sensors.md#egocentricminigridsensor),\n    allows us to extract observations in a format consumable by an `ActorCriticModel` agent:\n    \"\"\"\n\n    # %%\n    SENSORS = [\n        EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),\n    ]\n\n    # %%\n    \"\"\"\n    The three `view_channels` include objects, colors and states corresponding to a partial observation of the environment\n    as an image tensor, equivalent to that from `ImgObsWrapper` in\n    [MiniGrid](https://github.com/maximecb/gym-minigrid#wrappers). The\n    relatively large `agent_view_size` means the view will only be clipped by the environment walls in the forward and\n    lateral directions with respect to the agent's orientation.\n    \n    We define our `ActorCriticModel` agent using a lightweight implementation with recurrent memory for MiniGrid\n    environments, [MiniGridSimpleConvRNN](../api/allenact_plugins/minigrid_plugin/minigrid_models.md#minigridsimpleconvrnn):\n    \"\"\"\n\n    # %%\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return MiniGridSimpleConvRNN(\n            action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())),\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            num_objects=cls.SENSORS[0].num_objects,\n            num_colors=cls.SENSORS[0].num_colors,\n            num_states=cls.SENSORS[0].num_states,\n        )\n\n    # %%\n    \"\"\"\n    ### Task samplers\n    \n    We use an available TaskSampler implementation for MiniGrid environments that allows to sample both random and\n    deterministic `MiniGridTasks`,\n    [MiniGridTaskSampler](../api/allenact_plugins/minigrid_plugin/minigrid_tasks.md#minigridtasksampler):\n    \"\"\"\n\n    # %%\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return MiniGridTaskSampler(**kwargs)\n\n    # %%\n    \"\"\"\n    This task sampler will during training (or validation/testing), randomly initialize new tasks for the agent to complete.\n    While it is not quite as important for this task type (as we test our agent in the same setting it is trained on) there\n    are a lot of good reasons we would like to sample tasks differently during training than during validation or testing.\n    One good reason, that is applicable in this tutorial, is that, during training, we would like to be able to sample tasks\n    forever while, during testing, we would like to sample a fixed number of tasks (as otherwise we would never finish\n    testing!). In `allenact` this is made possible by defining different arguments for the task sampler:\n    \"\"\"\n\n    # %%\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"train\")\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"valid\")\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"test\")\n\n    # %%\n    \"\"\"\n    where, for convenience, we have defined a `_get_sampler_args` method:\n    \"\"\"\n\n    # %%\n    def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 20 + 20 * (mode == \"test\")  # 20 tasks for valid, 40 for test\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            max_tasks=max_tasks,  # see above\n            env_class=self.make_env,  # builder for third-party environment (defined below)\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            env_info=dict(),  # parameters for environment builder (none for now)\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n        )\n\n    @staticmethod\n    def make_env(*args, **kwargs):\n        return EmptyRandomEnv5x5()\n\n    # %%\n    \"\"\"\n    Note that the `env_class` argument to the Task Sampler is the one determining which task type we are going to train the\n    model for (in this case, `MiniGrid-Empty-Random-5x5-v0` from\n    [gym-minigrid](https://github.com/maximecb/gym-minigrid#empty-environment))\n    . The sparse reward is\n    [given by the environment](https://github.com/maximecb/gym-minigrid/blob/6e22a44dc67414b647063692258a4f95ce789161/gym_minigrid/minigrid.py#L819)\n    , and the maximum task length is 100. For training, we opt for a default random sampling, whereas for validation and\n    test we define fixed sets of randomly sampled tasks without needing to explicitly define a dataset.\n    \n    In this toy example, the maximum number of different tasks is 32. For validation we sample 320 tasks using 16 samplers,\n    or 640 for testing, so we can be fairly sure that all possible tasks are visited at least once during evaluation.\n    \n    ### Machine parameters\n    \n    Given the simplicity of the task and model, we can quickly train the model on the CPU:\n    \"\"\"\n\n    # %%\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        return {\n            \"nprocesses\": 128 if mode == \"train\" else 16,\n            \"devices\": [],\n        }\n\n    # %%\n    \"\"\"\n    We allocate a larger number of samplers for training (128) than for validation or testing (16), and we default to CPU\n    usage by returning an empty list of `devices`.\n    \n    ### Training pipeline\n    \n    The last definition required before starting to train is a training pipeline. In this case, we just use a single PPO\n    stage with linearly decaying learning rate:\n    \"\"\"\n\n    # %%\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        ppo_steps = int(150000)\n        return TrainingPipeline(\n            named_losses=dict(ppo_loss=PPO(**PPOConfig)),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps)\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),\n            num_mini_batch=4,\n            update_repeats=3,\n            max_grad_norm=0.5,\n            num_steps=16,\n            gamma=0.99,\n            use_gae=True,\n            gae_lambda=0.95,\n            advance_scene_rollout_period=None,\n            save_interval=10000,\n            metric_accumulate_interval=1,\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}  # type:ignore\n            ),\n        )\n\n\n# %%\n\"\"\"\nYou can see that we use a `Builder` class to postpone the construction of some of the elements, like the optimizer,\nfor which the model weights need to be known.\n\n## Training and validation\n\nWe have a complete implementation of this experiment's configuration class in `projects/tutorials/minigrid_tutorial.py`.\nTo start training from scratch, we just need to invoke\n\n```bash\nPYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o /PATH/TO/minigrid_output -s 12345\n```\n\nfrom the `allenact` root directory.\n\n* With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file \nwill be found in the `projects/tutorials` directory.\n* With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers).\n* With `-o minigrid_output` we set the output folder into which results and logs will be saved.\n* With `-s 12345` we set the random seed.\n\nIf we have Tensorboard installed, we can track progress with\n```bash\ntensorboard --logdir /PATH/TO/minigrid_output\n```\nwhich will default to the URL [http://localhost:6006/](http://localhost:6006/).\n\nAfter 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder.\nThe training curves should look similar to:\n\n![training curves](../img/minigrid_train.png)\n\nIf everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4.\n(For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the\nnot-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example\nwith a different random seed). The validation curves should look similar to:\n\n![validation curves](../img/minigrid_valid.png)\n\n## Testing\n\nThe training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the\nsubfolders in the path to the checkpoints, saved under the output folder.\nIn order to evaluate (i.e. test) a particular checkpoint, we need to pass the `--eval` flag and specify the checkpoint with the\n`--checkpoint CHECKPOINT_PATH` option:\n```bash\nPYTHONPATH=. python allenact/main.py minigrid_tutorial \\\n    -b projects/tutorials \\\n    -m 1 \\\n    -o /PATH/TO/minigrid_output \\\n    -s 12345 \\\n    --eval \\\n    --checkpoint /PATH/TO/minigrid_output/checkpoints/MiniGridTutorial/YOUR_START_DATE/exp_MiniGridTutorial__stage_00__steps_000000151552.pt\n```\n\nAgain, if everything went well, the `test` success rate should converge to 1 and the mean episode length to a value\nbelow 4. Detailed results are saved under a `metrics` subfolder in the output folder.\nThe test curves should look similar to:\n\n![test curves](../img/minigrid_test.png)\n\"\"\"\n"
  },
  {
    "path": "projects/tutorials/minigrid_tutorial_conds.py",
    "content": "from typing import Dict, Optional, List, Any, cast, Callable, Union, Tuple\n\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom gym_minigrid.envs import EmptyRandomEnv5x5\nfrom gym_minigrid.minigrid import MiniGridEnv\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.imitation import Imitation\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig\nfrom allenact.algorithms.onpolicy_sync.policy import ActorCriticModel, DistributionType\nfrom allenact.base_abstractions.distributions import (\n    CategoricalDistr,\n    ConditionalDistr,\n    SequentialDistr,\n)\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler\nfrom allenact.base_abstractions.misc import ActorCriticOutput, Memory, RLStepResult\nfrom allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor\nfrom allenact.embodiedai.models.basic_models import RNNStateEncoder\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvBase\nfrom allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor\nfrom allenact_plugins.minigrid_plugin.minigrid_tasks import (\n    MiniGridTaskSampler,\n    MiniGridTask,\n)\n\n\nclass ConditionedLinearActorCriticHead(nn.Module):\n    def __init__(\n        self, input_size: int, master_actions: int = 2, subpolicy_actions: int = 2\n    ):\n        super().__init__()\n        self.input_size = input_size\n        self.master_and_critic = nn.Linear(input_size, master_actions + 1)\n        self.embed_higher = nn.Embedding(num_embeddings=2, embedding_dim=input_size)\n        self.actor = nn.Linear(2 * input_size, subpolicy_actions)\n\n        nn.init.orthogonal_(self.master_and_critic.weight)\n        nn.init.constant_(self.master_and_critic.bias, 0)\n        nn.init.orthogonal_(self.actor.weight)\n        nn.init.constant_(self.actor.bias, 0)\n\n    def lower_policy(self, *args, **kwargs):\n        assert \"higher\" in kwargs\n        assert \"state_embedding\" in kwargs\n        emb = self.embed_higher(kwargs[\"higher\"])\n        logits = self.actor(torch.cat([emb, kwargs[\"state_embedding\"]], dim=-1))\n        return CategoricalDistr(logits=logits)\n\n    def forward(self, x):\n        out = self.master_and_critic(x)\n\n        master_logits = out[..., :-1]\n        values = out[..., -1:]\n        # noinspection PyArgumentList\n\n        cond1 = ConditionalDistr(\n            distr_conditioned_on_input_fn_or_instance=CategoricalDistr(\n                logits=master_logits\n            ),\n            action_group_name=\"higher\",\n        )\n        cond2 = ConditionalDistr(\n            distr_conditioned_on_input_fn_or_instance=lambda *args, **kwargs: ConditionedLinearActorCriticHead.lower_policy(\n                self, *args, **kwargs\n            ),\n            action_group_name=\"lower\",\n            state_embedding=x,\n        )\n\n        return (\n            SequentialDistr(cond1, cond2),\n            values.view(*values.shape[:2], -1),  # [steps, samplers, flattened]\n        )\n\n\nclass ConditionedLinearActorCritic(ActorCriticModel[SequentialDistr]):\n    def __init__(\n        self,\n        input_uuid: str,\n        action_space: gym.spaces.Dict,\n        observation_space: gym.spaces.Dict,\n    ):\n        super().__init__(action_space=action_space, observation_space=observation_space)\n\n        assert (\n            input_uuid in observation_space.spaces\n        ), \"ConditionedLinearActorCritic expects only a single observational input.\"\n        self.input_uuid = input_uuid\n\n        box_space: gym.spaces.Box = observation_space[self.input_uuid]\n        assert isinstance(box_space, gym.spaces.Box), (\n            \"ConditionedLinearActorCritic requires that\"\n            \"observation space corresponding to the input uuid is a Box space.\"\n        )\n        assert len(box_space.shape) == 1\n        self.in_dim = box_space.shape[0]\n        self.head = ConditionedLinearActorCriticHead(\n            input_size=self.in_dim,\n            master_actions=action_space[\"higher\"].n,\n            subpolicy_actions=action_space[\"lower\"].n,\n        )\n\n    # noinspection PyMethodMayBeStatic\n    def _recurrent_memory_specification(self):\n        return None\n\n    def forward(self, observations, memory, prev_actions, masks):\n        dists, values = self.head(observations[self.input_uuid])\n\n        # noinspection PyArgumentList\n        return (\n            ActorCriticOutput(\n                distributions=dists,\n                values=values,\n                extras={},\n            ),\n            None,\n        )\n\n\nclass ConditionedRNNActorCritic(ActorCriticModel[SequentialDistr]):\n    def __init__(\n        self,\n        input_uuid: str,\n        action_space: gym.spaces.Dict,\n        observation_space: gym.spaces.Dict,\n        hidden_size: int = 128,\n        num_layers: int = 1,\n        rnn_type: str = \"GRU\",\n        head_type: Callable[\n            ..., ActorCriticModel[SequentialDistr]\n        ] = ConditionedLinearActorCritic,\n    ):\n        super().__init__(action_space=action_space, observation_space=observation_space)\n        self.hidden_size = hidden_size\n        self.rnn_type = rnn_type\n\n        assert (\n            input_uuid in observation_space.spaces\n        ), \"LinearActorCritic expects only a single observational input.\"\n        self.input_uuid = input_uuid\n\n        box_space: gym.spaces.Box = observation_space[self.input_uuid]\n        assert isinstance(box_space, gym.spaces.Box), (\n            \"RNNActorCritic requires that\"\n            \"observation space corresponding to the input uuid is a Box space.\"\n        )\n        assert len(box_space.shape) == 1\n        self.in_dim = box_space.shape[0]\n\n        self.state_encoder = RNNStateEncoder(\n            input_size=self.in_dim,\n            hidden_size=hidden_size,\n            num_layers=num_layers,\n            rnn_type=rnn_type,\n            trainable_masked_hidden_state=True,\n        )\n\n        self.head_uuid = \"{}_{}\".format(\"rnn\", input_uuid)\n\n        self.ac_nonrecurrent_head: ActorCriticModel[SequentialDistr] = head_type(\n            input_uuid=self.head_uuid,\n            action_space=action_space,\n            observation_space=gym.spaces.Dict(\n                {\n                    self.head_uuid: gym.spaces.Box(\n                        low=np.float32(0.0), high=np.float32(1.0), shape=(hidden_size,)\n                    )\n                }\n            ),\n        )\n\n        self.memory_key = \"rnn\"\n\n    @property\n    def recurrent_hidden_state_size(self) -> int:\n        return self.hidden_size\n\n    @property\n    def num_recurrent_layers(self) -> int:\n        return self.state_encoder.num_recurrent_layers\n\n    def _recurrent_memory_specification(self):\n        return {\n            self.memory_key: (\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n        }\n\n    def forward(  # type:ignore\n        self,\n        observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]],\n        memory: Memory,\n        prev_actions: torch.Tensor,\n        masks: torch.FloatTensor,\n    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:\n        rnn_out, mem_return = self.state_encoder(\n            x=observations[self.input_uuid],\n            hidden_states=memory.tensor(self.memory_key),\n            masks=masks,\n        )\n\n        # noinspection PyCallingNonCallable\n        out, _ = self.ac_nonrecurrent_head(\n            observations={self.head_uuid: rnn_out},\n            memory=None,\n            prev_actions=prev_actions,\n            masks=masks,\n        )\n\n        # noinspection PyArgumentList\n        return (\n            out,\n            memory.set_tensor(self.memory_key, mem_return),\n        )\n\n\nclass ConditionedMiniGridSimpleConvRNN(MiniGridSimpleConvBase):\n    def __init__(\n        self,\n        action_space: gym.spaces.Dict,\n        observation_space: gym.spaces.Dict,\n        num_objects: int,\n        num_colors: int,\n        num_states: int,\n        object_embedding_dim: int = 8,\n        hidden_size=512,\n        num_layers=1,\n        rnn_type=\"GRU\",\n        head_type: Callable[\n            ..., ActorCriticModel[SequentialDistr]\n        ] = ConditionedLinearActorCritic,\n        **kwargs,\n    ):\n        super().__init__(**prepare_locals_for_super(locals()))\n\n        self._hidden_size = hidden_size\n        agent_view_x, agent_view_y, view_channels = observation_space[\n            \"minigrid_ego_image\"\n        ].shape\n        self.actor_critic = ConditionedRNNActorCritic(\n            input_uuid=self.ac_key,\n            action_space=action_space,\n            observation_space=gym.spaces.Dict(\n                {\n                    self.ac_key: gym.spaces.Box(\n                        low=np.float32(-1.0),\n                        high=np.float32(1.0),\n                        shape=(\n                            self.object_embedding_dim\n                            * agent_view_x\n                            * agent_view_y\n                            * view_channels,\n                        ),\n                    )\n                }\n            ),\n            hidden_size=hidden_size,\n            num_layers=num_layers,\n            rnn_type=rnn_type,\n            head_type=head_type,\n        )\n        self.memory_key = \"rnn\"\n\n        self.train()\n\n    @property\n    def num_recurrent_layers(self):\n        return self.actor_critic.num_recurrent_layers\n\n    @property\n    def recurrent_hidden_state_size(self):\n        return self._hidden_size\n\n    def _recurrent_memory_specification(self):\n        return {\n            self.memory_key: (\n                (\n                    (\"layer\", self.num_recurrent_layers),\n                    (\"sampler\", None),\n                    (\"hidden\", self.recurrent_hidden_state_size),\n                ),\n                torch.float32,\n            )\n        }\n\n\nclass ConditionedMiniGridTask(MiniGridTask):\n    _ACTION_NAMES = (\"left\", \"right\", \"forward\", \"pickup\")\n    _ACTION_IND_TO_MINIGRID_IND = tuple(\n        MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES\n    )\n\n    @property\n    def action_space(self) -> gym.spaces.Dict:\n        return gym.spaces.Dict(\n            higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)\n        )\n\n    def _step(self, action: Dict[str, int]) -> RLStepResult:\n        assert len(action) == 2, \"got action={}\".format(action)\n        minigrid_obs, reward, self._minigrid_done, info = self.env.step(\n            action=(\n                self._ACTION_IND_TO_MINIGRID_IND[action[\"lower\"] + 2 * action[\"higher\"]]\n            )\n        )\n\n        # self.env.render()\n\n        return RLStepResult(\n            observation=self.get_observations(minigrid_output_obs=minigrid_obs),\n            reward=reward,\n            done=self.is_done(),\n            info=info,\n        )\n\n    def query_expert(self, **kwargs) -> Tuple[int, bool]:\n        if kwargs[\"expert_sensor_group_name\"] == \"higher\":\n            if self._minigrid_done:\n                raise ValueError(\"Episode is completed, but expert is still queried.\")\n                # return 0, False\n            self.cached_expert = super().query_expert(**kwargs)\n            if self.cached_expert[1]:\n                return self.cached_expert[0] // 2, True\n            else:\n                return 0, False\n        else:\n            assert hasattr(self, \"cached_expert\")\n            if self.cached_expert[1]:\n                res = (self.cached_expert[0] % 2, True)\n            else:\n                res = (0, False)\n            del self.cached_expert\n            return res\n\n\nclass MiniGridTutorialExperimentConfig(ExperimentConfig):\n    @classmethod\n    def tag(cls) -> str:\n        return \"MiniGridTutorial\"\n\n    SENSORS = [\n        EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),\n        ExpertActionSensor(\n            action_space=gym.spaces.Dict(\n                higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)\n            )\n        ),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return ConditionedMiniGridSimpleConvRNN(\n            action_space=gym.spaces.Dict(\n                higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)\n            ),\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            num_objects=cls.SENSORS[0].num_objects,\n            num_colors=cls.SENSORS[0].num_colors,\n            num_states=cls.SENSORS[0].num_states,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return MiniGridTaskSampler(**kwargs)\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"train\")\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"valid\")\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"test\")\n\n    def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 20 + 20 * (\n                mode == \"test\"\n            )  # 20 tasks for valid, 40 for test (per sampler)\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            max_tasks=max_tasks,  # see above\n            env_class=self.make_env,  # builder for third-party environment (defined below)\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            env_info=dict(),  # parameters for environment builder (none for now)\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            task_class=ConditionedMiniGridTask,\n        )\n\n    @staticmethod\n    def make_env(*args, **kwargs):\n        return EmptyRandomEnv5x5()\n\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        return {\n            \"nprocesses\": 128 if mode == \"train\" else 16,\n            \"devices\": [],\n        }\n\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        ppo_steps = int(150000)\n        return TrainingPipeline(\n            named_losses=dict(\n                imitation_loss=Imitation(\n                    cls.SENSORS[1]\n                ),  # 0 is Minigrid, 1 is ExpertActionSensor\n                ppo_loss=PPO(**PPOConfig, entropy_method_name=\"conditional_entropy\"),\n            ),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(\n                    teacher_forcing=LinearDecay(\n                        startp=1.0,\n                        endp=0.0,\n                        steps=ppo_steps // 2,\n                    ),\n                    loss_names=[\"imitation_loss\", \"ppo_loss\"],\n                    max_stage_steps=ppo_steps,\n                )\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),\n            num_mini_batch=4,\n            update_repeats=3,\n            max_grad_norm=0.5,\n            num_steps=16,\n            gamma=0.99,\n            use_gae=True,\n            gae_lambda=0.95,\n            advance_scene_rollout_period=None,\n            save_interval=10000,\n            metric_accumulate_interval=1,\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}  # type:ignore\n            ),\n        )\n"
  },
  {
    "path": "projects/tutorials/navtopartner_robothor_rgb_ppo.py",
    "content": "from math import ceil\nfrom typing import Dict, Any, List, Optional\n\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    PipelineStage,\n    TrainingPipeline,\n    LinearDecay,\n)\nfrom allenact.utils.multi_agent_viz_utils import MultiTrajectoryViz\nfrom allenact.utils.viz_utils import VizSuite, AgentViewViz\nfrom allenact_plugins.robothor_plugin.robothor_models import (\n    NavToPartnerActorCriticSimpleConvRNN,\n)\nfrom allenact_plugins.robothor_plugin.robothor_sensors import RGBSensorMultiRoboThor\nfrom allenact_plugins.robothor_plugin.robothor_task_samplers import (\n    NavToPartnerTaskSampler,\n)\nfrom allenact_plugins.robothor_plugin.robothor_tasks import NavToPartnerTask\nfrom allenact_plugins.robothor_plugin.robothor_viz import ThorMultiViz\n\n\nclass NavToPartnerRoboThorRGBPPOExperimentConfig(ExperimentConfig):\n    \"\"\"A Multi-Agent Navigation experiment configuration in RoboThor.\"\"\"\n\n    # Task Parameters\n    MAX_STEPS = 500\n    REWARD_CONFIG = {\n        \"step_penalty\": -0.01,\n        \"max_success_distance\": 0.75,\n        \"success_reward\": 5.0,\n    }\n\n    # Simulator Parameters\n    CAMERA_WIDTH = 300\n    CAMERA_HEIGHT = 300\n    SCREEN_SIZE = 224\n\n    # Training Engine Parameters\n    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None\n    NUM_PROCESSES = 20\n    TRAINING_GPUS: List[int] = [0]\n    VALIDATION_GPUS: List[int] = [0]\n    TESTING_GPUS: List[int] = [0]\n\n    SENSORS = [\n        RGBSensorMultiRoboThor(\n            agent_count=2,\n            height=SCREEN_SIZE,\n            width=SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb\",\n        ),\n    ]\n\n    OBSERVATIONS = [\n        \"rgb\",\n    ]\n\n    ENV_ARGS = dict(\n        width=CAMERA_WIDTH,\n        height=CAMERA_HEIGHT,\n        rotateStepDegrees=30.0,\n        visibilityDistance=1.0,\n        gridSize=0.25,\n        agentCount=2,\n    )\n\n    @classmethod\n    def tag(cls):\n        return \"NavToPartnerRobothorRGBPPO\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        ppo_steps = int(1000000)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 3\n        num_steps = 30\n        save_interval = 200000\n        log_interval = 1\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\"ppo_loss\": PPO(**PPOConfig)},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps)\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n\n    def split_num_processes(self, ndevices):\n        assert self.NUM_PROCESSES >= ndevices, \"NUM_PROCESSES {} < ndevices {}\".format(\n            self.NUM_PROCESSES, ndevices\n        )\n        res = [0] * ndevices\n        for it in range(self.NUM_PROCESSES):\n            res[it % ndevices] += 1\n        return res\n\n    viz: Optional[VizSuite] = None\n\n    def get_viz(self, mode):\n        if self.viz is not None:\n            return self.viz\n\n        self.viz = VizSuite(\n            mode=mode,\n            # Basic 2D trajectory visualizer (task output source):\n            base_trajectory=MultiTrajectoryViz(),  # plt_colormaps=[\"cool\", \"cool\"]),\n            # Egocentric view visualizer (vector task source):\n            egeocentric=AgentViewViz(max_video_length=100, max_episodes_in_group=1),\n            # Specialized 2D trajectory visualizer (task output source):\n            thor_trajectory=ThorMultiViz(\n                figsize=(16, 8),\n                viz_rows_cols=(448, 448),\n                scenes=(\"FloorPlan_Train{}_{}\", 1, 1, 1, 1),\n            ),\n        )\n\n        return self.viz\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        visualizer = None\n        if mode == \"train\":\n            devices = (\n                [\"cpu\"] if not torch.cuda.is_available() else list(self.TRAINING_GPUS)\n            )\n            nprocesses = (\n                4\n                if not torch.cuda.is_available()\n                else self.split_num_processes(len(devices))\n            )\n        elif mode == \"valid\":\n            nprocesses = 0\n            devices = [\"cpu\"] if not torch.cuda.is_available() else self.VALIDATION_GPUS\n        elif mode == \"test\":\n            nprocesses = 1\n            devices = [\"cpu\"] if not torch.cuda.is_available() else self.TESTING_GPUS\n            visualizer = self.get_viz(mode=mode)\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        return {\n            \"nprocesses\": nprocesses,\n            \"devices\": devices,\n            \"visualizer\": visualizer,\n        }\n\n    # TODO Define Model\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return NavToPartnerActorCriticSimpleConvRNN(\n            action_space=gym.spaces.Tuple(\n                [\n                    gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),\n                    gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),\n                ]\n            ),\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            hidden_size=512,\n        )\n\n    # Define Task Sampler\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return NavToPartnerTaskSampler(**kwargs)\n\n    # Utility Functions for distributing scenes between GPUs\n    @staticmethod\n    def _partition_inds(n: int, num_parts: int):\n        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(\n            np.int32\n        )\n\n    def _get_sampler_args_for_scene_split(\n        self,\n        scenes: List[str],\n        process_ind: int,\n        total_processes: int,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        if total_processes > len(scenes):  # oversample some scenes -> bias\n            if total_processes % len(scenes) != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisible by the number of scenes\"\n                )\n            scenes = scenes * int(ceil(total_processes / len(scenes)))\n            scenes = scenes[: total_processes * (len(scenes) // total_processes)]\n        else:\n            if len(scenes) % total_processes != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisor of the number of scenes\"\n                )\n        inds = self._partition_inds(len(scenes), total_processes)\n\n        return {\n            \"scenes\": scenes[inds[process_ind] : inds[process_ind + 1]],\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Tuple(\n                [\n                    gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),\n                    gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())),\n                ]\n            ),\n            \"seed\": seeds[process_ind] if seeds is not None else None,\n            \"deterministic_cudnn\": deterministic_cudnn,\n            \"rewards_config\": self.REWARD_CONFIG,\n        }\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        scenes = [\"FloorPlan_Train1_1\"]\n\n        res = self._get_sampler_args_for_scene_split(\n            scenes,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"env_args\"] = {\n            **self.ENV_ARGS,\n            \"x_display\": (\n                (\"0.%d\" % devices[process_ind % len(devices)])\n                if devices is not None and len(devices) > 0\n                else None\n            ),\n        }\n        return res\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        scenes = [\"FloorPlan_Train1_1\"]\n\n        res = self._get_sampler_args_for_scene_split(\n            scenes,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"env_args\"] = {\n            **self.ENV_ARGS,\n            \"x_display\": (\n                (\"0.%d\" % devices[process_ind % len(devices)])\n                if devices is not None and len(devices) > 0\n                else None\n            ),\n        }\n        res[\"max_tasks\"] = 20\n        return res\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        scenes = [\"FloorPlan_Train1_1\"]\n\n        res = self._get_sampler_args_for_scene_split(\n            scenes,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"env_args\"] = {\n            **self.ENV_ARGS,\n            \"x_display\": (\n                (\"0.%d\" % devices[process_ind % len(devices)])\n                if devices is not None and len(devices) > 0\n                else None\n            ),\n        }\n        res[\"max_tasks\"] = 4\n        return res\n"
  },
  {
    "path": "projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py",
    "content": "import torch\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.imitation import Imitation\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    PipelineStage,\n    TrainingPipeline,\n    LinearDecay,\n)\nfrom allenact.base_abstractions.sensor import ExpertActionSensor\nfrom projects.tutorials.object_nav_ithor_ppo_one_object import (\n    ObjectNavThorPPOExperimentConfig,\n    ObjectNaviThorGridTask,\n)\n\n\nclass ObjectNavThorDaggerThenPPOExperimentConfig(ObjectNavThorPPOExperimentConfig):\n    \"\"\"A simple object navigation experiment in THOR.\n\n    Training with DAgger and then PPO.\n    \"\"\"\n\n    SENSORS = ObjectNavThorPPOExperimentConfig.SENSORS + [\n        ExpertActionSensor(\n            action_space=len(ObjectNaviThorGridTask.class_action_names()),\n        ),\n    ]\n\n    @classmethod\n    def tag(cls):\n        return \"ObjectNavThorDaggerThenPPO\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        dagger_steos = int(1e4)\n        ppo_steps = int(1e6)\n        lr = 2.5e-4\n        num_mini_batch = 2 if not torch.cuda.is_available() else 6\n        update_repeats = 4\n        num_steps = 128\n        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks\n        save_interval = 10000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 1.0\n        max_grad_norm = 0.5\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\n                \"ppo_loss\": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),\n                \"imitation_loss\": Imitation(),  # We add an imitation loss.\n            },\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"imitation_loss\"],\n                    teacher_forcing=LinearDecay(\n                        startp=1.0,\n                        endp=0.0,\n                        steps=dagger_steos,\n                    ),\n                    max_stage_steps=dagger_steos,\n                ),\n                PipelineStage(\n                    loss_names=[\"ppo_loss\"],\n                    max_stage_steps=ppo_steps,\n                ),\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n"
  },
  {
    "path": "projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.py",
    "content": "from projects.tutorials.object_nav_ithor_dagger_then_ppo_one_object import (\n    ObjectNavThorDaggerThenPPOExperimentConfig,\n)\nfrom allenact.utils.viz_utils import (\n    VizSuite,\n    TrajectoryViz,\n    AgentViewViz,\n    ActorViz,\n    TensorViz1D,\n)\nfrom allenact_plugins.ithor_plugin.ithor_viz import ThorViz\n\n\nclass ObjectNavThorDaggerThenPPOVizExperimentConfig(\n    ObjectNavThorDaggerThenPPOExperimentConfig\n):\n    \"\"\"A simple object navigation experiment in THOR.\n\n    Training with DAgger and then PPO + using viz for test.\n    \"\"\"\n\n    TEST_SAMPLES_IN_SCENE = 4\n\n    @classmethod\n    def tag(cls):\n        return \"ObjectNavThorDaggerThenPPOViz\"\n\n    viz = None\n\n    def get_viz(self, mode):\n        if self.viz is not None:\n            return self.viz\n\n        self.viz = VizSuite(\n            mode=mode,\n            base_trajectory=TrajectoryViz(\n                path_to_target_location=None,\n                path_to_rot_degrees=(\"rotation\",),\n            ),\n            egeocentric=AgentViewViz(max_video_length=100),\n            action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),\n            taken_action_logprobs=TensorViz1D(),\n            episode_mask=TensorViz1D(rollout_source=(\"masks\",)),\n            thor_trajectory=ThorViz(\n                path_to_target_location=None,\n                figsize=(8, 8),\n                viz_rows_cols=(448, 448),\n            ),\n        )\n\n        return self.viz\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        params = super().machine_params(mode, **kwargs)\n\n        if mode == \"test\":\n            params.set_visualizer(self.get_viz(mode))\n\n        return params\n"
  },
  {
    "path": "projects/tutorials/object_nav_ithor_ppo_one_object.py",
    "content": "from math import ceil\nfrom typing import Dict, Any, List, Optional\n\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    PipelineStage,\n    TrainingPipeline,\n    LinearDecay,\n)\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    RGBSensorThor,\n    GoalObjectTypeThorSensor,\n)\nfrom allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler\nfrom allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask\nfrom allenact_plugins.navigation_plugin.objectnav.models import ObjectNavActorCritic\n\n\nclass ObjectNavThorPPOExperimentConfig(ExperimentConfig):\n    \"\"\"A simple object navigation experiment in THOR.\n\n    Training with PPO.\n    \"\"\"\n\n    # A simple setting, train/valid/test are all the same single scene\n    # and we're looking for a single object\n    OBJECT_TYPES = [\"Tomato\"]\n    TRAIN_SCENES = [\"FloorPlan1_physics\"]\n    VALID_SCENES = [\"FloorPlan1_physics\"]\n    TEST_SCENES = [\"FloorPlan1_physics\"]\n\n    # Setting up sensors and basic environment details\n    SCREEN_SIZE = 224\n    SENSORS = [\n        RGBSensorThor(\n            height=SCREEN_SIZE,\n            width=SCREEN_SIZE,\n            use_resnet_normalization=True,\n        ),\n        GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),\n    ]\n\n    ENV_ARGS = {\n        \"player_screen_height\": SCREEN_SIZE,\n        \"player_screen_width\": SCREEN_SIZE,\n        \"quality\": \"Very Low\",\n    }\n\n    MAX_STEPS = 128\n    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None\n    VALID_SAMPLES_IN_SCENE = 10\n    TEST_SAMPLES_IN_SCENE = 100\n\n    @classmethod\n    def tag(cls):\n        return \"ObjectNavThorPPO\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        ppo_steps = int(1e6)\n        lr = 2.5e-4\n        num_mini_batch = 2 if not torch.cuda.is_available() else 6\n        update_repeats = 4\n        num_steps = 128\n        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks\n        save_interval = 10000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 1.0\n        max_grad_norm = 0.5\n\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=metric_accumulate_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\n                \"ppo_loss\": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),\n            },\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"ppo_loss\"],\n                    max_stage_steps=ppo_steps,\n                ),\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs):\n        num_gpus = torch.cuda.device_count()\n        has_gpu = num_gpus != 0\n\n        if mode == \"train\":\n            nprocesses = 20 if has_gpu else 4\n            gpu_ids = [0] if has_gpu else []\n        elif mode == \"valid\":\n            nprocesses = 1\n            gpu_ids = [1 % num_gpus] if has_gpu else []\n        elif mode == \"test\":\n            nprocesses = 1\n            gpu_ids = [0] if has_gpu else []\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=gpu_ids,\n        )\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return ObjectNavActorCritic(\n            action_space=gym.spaces.Discrete(\n                len(ObjectNaviThorGridTask.class_action_names())\n            ),\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            rgb_uuid=cls.SENSORS[0].uuid,\n            depth_uuid=None,\n            goal_sensor_uuid=\"goal_object_type_ind\",\n            hidden_size=512,\n            object_type_embedding_dim=8,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return ObjectNavTaskSampler(**kwargs)\n\n    @staticmethod\n    def _partition_inds(n: int, num_parts: int):\n        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(\n            np.int32\n        )\n\n    def _get_sampler_args_for_scene_split(\n        self,\n        scenes: List[str],\n        process_ind: int,\n        total_processes: int,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        if total_processes > len(scenes):  # oversample some scenes -> bias\n            if total_processes % len(scenes) != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisible by the number of scenes\"\n                )\n            scenes = scenes * int(ceil(total_processes / len(scenes)))\n            scenes = scenes[: total_processes * (len(scenes) // total_processes)]\n        else:\n            if len(scenes) % total_processes != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisor of the number of scenes\"\n                )\n        inds = self._partition_inds(len(scenes), total_processes)\n\n        return {\n            \"scenes\": scenes[inds[process_ind] : inds[process_ind + 1]],\n            \"object_types\": self.OBJECT_TYPES,\n            \"env_args\": self.ENV_ARGS,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(\n                len(ObjectNaviThorGridTask.class_action_names())\n            ),\n            \"seed\": seeds[process_ind] if seeds is not None else None,\n            \"deterministic_cudnn\": deterministic_cudnn,\n        }\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            self.TRAIN_SCENES,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_period\"] = \"manual\"\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        return res\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            self.VALID_SCENES,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_period\"] = self.VALID_SAMPLES_IN_SCENE\n        res[\"max_tasks\"] = self.VALID_SAMPLES_IN_SCENE * len(res[\"scenes\"])\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        return res\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            self.TEST_SCENES,\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_period\"] = self.TEST_SAMPLES_IN_SCENE\n        res[\"max_tasks\"] = self.TEST_SAMPLES_IN_SCENE * len(res[\"scenes\"])\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        return res\n"
  },
  {
    "path": "projects/tutorials/pointnav_habitat_rgb_ddppo.py",
    "content": "import os\nfrom typing import Dict, Any, List, Optional, Sequence\n\nimport gym\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\nfrom torchvision import models\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams\nfrom allenact.base_abstractions.preprocessor import SensorPreprocessorGraph\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    PipelineStage,\n    TrainingPipeline,\n    LinearDecay,\n    evenly_distribute_count_into_bins,\n)\nfrom allenact_plugins.habitat_plugin.habitat_constants import (\n    HABITAT_DATASETS_DIR,\n    HABITAT_CONFIGS_DIR,\n)\nfrom allenact_plugins.habitat_plugin.habitat_sensors import (\n    RGBSensorHabitat,\n    TargetCoordinatesSensorHabitat,\n)\nfrom allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler\nfrom allenact_plugins.habitat_plugin.habitat_utils import (\n    construct_env_configs,\n    get_habitat_config,\n)\nfrom allenact_plugins.navigation_plugin.objectnav.models import (\n    ResnetTensorNavActorCritic,\n)\nfrom allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask\n\n\nclass PointNavHabitatRGBPPOTutorialExperimentConfig(ExperimentConfig):\n    \"\"\"A Point Navigation experiment configuration in Habitat.\"\"\"\n\n    # Task Parameters\n    MAX_STEPS = 500\n    REWARD_CONFIG = {\n        \"step_penalty\": -0.01,\n        \"goal_success_reward\": 10.0,\n        \"failed_stop_reward\": 0.0,\n        \"shaping_weight\": 1.0,\n    }\n    DISTANCE_TO_GOAL = 0.2\n\n    # Simulator Parameters\n    CAMERA_WIDTH = 640\n    CAMERA_HEIGHT = 480\n    SCREEN_SIZE = 224\n\n    # Training Engine Parameters\n    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None\n    NUM_PROCESSES = max(5 * torch.cuda.device_count() - 1, 4)\n    TRAINING_GPUS = list(range(torch.cuda.device_count()))\n    VALIDATION_GPUS = [torch.cuda.device_count() - 1]\n    TESTING_GPUS = [torch.cuda.device_count() - 1]\n\n    task_data_dir_template = os.path.join(\n        HABITAT_DATASETS_DIR, \"pointnav/gibson/v1/{}/{}.json.gz\"\n    )\n    TRAIN_SCENES = task_data_dir_template.format(*([\"train\"] * 2))\n    VALID_SCENES = task_data_dir_template.format(*([\"val\"] * 2))\n    TEST_SCENES = task_data_dir_template.format(*([\"test\"] * 2))\n\n    CONFIG = get_habitat_config(\n        os.path.join(HABITAT_CONFIGS_DIR, \"tasks/pointnav_gibson.yaml\")\n    )\n    CONFIG.defrost()\n    CONFIG.NUM_PROCESSES = NUM_PROCESSES\n    CONFIG.SIMULATOR_GPU_IDS = TRAINING_GPUS\n    CONFIG.DATASET.SCENES_DIR = \"habitat/habitat-api/data/scene_datasets/\"\n    CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = [\"*\"]\n    CONFIG.DATASET.DATA_PATH = TRAIN_SCENES\n    CONFIG.SIMULATOR.AGENT_0.SENSORS = [\"RGB_SENSOR\"]\n    CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH\n    CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT\n    CONFIG.SIMULATOR.TURN_ANGLE = 30\n    CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25\n    CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS\n\n    CONFIG.TASK.TYPE = \"Nav-v0\"\n    CONFIG.TASK.SUCCESS_DISTANCE = DISTANCE_TO_GOAL\n    CONFIG.TASK.SENSORS = [\"POINTGOAL_WITH_GPS_COMPASS_SENSOR\"]\n    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = \"POLAR\"\n    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2\n    CONFIG.TASK.GOAL_SENSOR_UUID = \"pointgoal_with_gps_compass\"\n    CONFIG.TASK.MEASUREMENTS = [\"DISTANCE_TO_GOAL\", \"SUCCESS\", \"SPL\"]\n    CONFIG.TASK.SPL.TYPE = \"SPL\"\n    CONFIG.TASK.SPL.SUCCESS_DISTANCE = DISTANCE_TO_GOAL\n    CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = DISTANCE_TO_GOAL\n\n    CONFIG.MODE = \"train\"\n\n    SENSORS = [\n        RGBSensorHabitat(\n            height=SCREEN_SIZE,\n            width=SCREEN_SIZE,\n            use_resnet_normalization=True,\n        ),\n        TargetCoordinatesSensorHabitat(coordinate_dims=2),\n    ]\n\n    PREPROCESSORS = [\n        Builder(\n            ResNetPreprocessor,\n            {\n                \"input_height\": SCREEN_SIZE,\n                \"input_width\": SCREEN_SIZE,\n                \"output_width\": 7,\n                \"output_height\": 7,\n                \"output_dims\": 512,\n                \"pool\": False,\n                \"torchvision_resnet_model\": models.resnet18,\n                \"input_uuids\": [\"rgb_lowres\"],\n                \"output_uuid\": \"rgb_resnet\",\n            },\n        ),\n    ]\n\n    OBSERVATIONS = [\n        \"rgb_resnet\",\n        \"target_coordinates_ind\",\n    ]\n\n    TRAIN_CONFIGS = construct_env_configs(CONFIG)\n\n    @classmethod\n    def tag(cls):\n        return \"PointNavHabitatRGBPPO\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        ppo_steps = int(250000000)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 3\n        num_steps = 30\n        save_interval = 5000000\n        log_interval = 10000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\"ppo_loss\": PPO(**PPOConfig)},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps)\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        if mode == \"train\":\n            workers_per_device = 1\n            gpu_ids = (\n                []\n                if not torch.cuda.is_available()\n                else self.TRAINING_GPUS * workers_per_device\n            )\n            nprocesses = (\n                1\n                if not torch.cuda.is_available()\n                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))\n            )\n        elif mode == \"valid\":\n            nprocesses = 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS\n        elif mode == \"test\":\n            nprocesses = 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        sensor_preprocessor_graph = (\n            SensorPreprocessorGraph(\n                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,\n                preprocessors=self.PREPROCESSORS,\n            )\n            if mode == \"train\"\n            or (\n                (isinstance(nprocesses, int) and nprocesses > 0)\n                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)\n            )\n            else None\n        )\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=gpu_ids,\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n\n    # Define Model\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return ResnetTensorNavActorCritic(\n            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            goal_sensor_uuid=\"target_coordinates_ind\",\n            rgb_resnet_preprocessor_uuid=\"rgb_resnet\",\n            hidden_size=512,\n            goal_dims=32,\n        )\n\n    # Define Task Sampler\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return PointNavTaskSampler(**kwargs)\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        config = self.TRAIN_CONFIGS[process_ind]\n        return {\n            \"env_config\": config,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            \"distance_to_goal\": self.DISTANCE_TO_GOAL,  # type:ignore\n        }\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        config = self.CONFIG.clone()\n        config.defrost()\n        config.DATASET.DATA_PATH = self.VALID_SCENES\n        config.MODE = \"validate\"\n        config.freeze()\n        return {\n            \"env_config\": config,\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            \"distance_to_goal\": self.DISTANCE_TO_GOAL,  # type:ignore\n        }\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        raise NotImplementedError(\"Testing not implemented for this tutorial.\")\n"
  },
  {
    "path": "projects/tutorials/pointnav_ithor_rgb_ddppo.py",
    "content": "import glob\nimport os\nfrom math import ceil\nfrom typing import Dict, Any, List, Optional, Sequence\n\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\nfrom torchvision import models\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams\nfrom allenact.base_abstractions.preprocessor import SensorPreprocessorGraph\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    PipelineStage,\n    TrainingPipeline,\n    LinearDecay,\n    evenly_distribute_count_into_bins,\n)\nfrom allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor\nfrom allenact_plugins.navigation_plugin.objectnav.models import (\n    ResnetTensorNavActorCritic,\n)\nfrom allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor\nfrom allenact_plugins.robothor_plugin.robothor_task_samplers import (\n    PointNavDatasetTaskSampler,\n)\nfrom allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask\n\n\nclass PointNaviThorRGBPPOExperimentConfig(ExperimentConfig):\n    \"\"\"A Point Navigation experiment configuration in iTHOR.\"\"\"\n\n    # Task Parameters\n    MAX_STEPS = 500\n    REWARD_CONFIG = {\n        \"step_penalty\": -0.01,\n        \"goal_success_reward\": 10.0,\n        \"failed_stop_reward\": 0.0,\n        \"shaping_weight\": 1.0,\n    }\n\n    # Simulator Parameters\n    CAMERA_WIDTH = 640\n    CAMERA_HEIGHT = 480\n    SCREEN_SIZE = 224\n\n    # Training Engine Parameters\n    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None\n    NUM_PROCESSES = 60\n    TRAINING_GPUS = list(range(torch.cuda.device_count()))\n    VALIDATION_GPUS = [torch.cuda.device_count() - 1]\n    TESTING_GPUS = [torch.cuda.device_count() - 1]\n\n    # Dataset Parameters\n    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/ithor-objectnav/train\")\n    VAL_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/ithor-objectnav/val\")\n\n    SENSORS = [\n        RGBSensorThor(\n            height=SCREEN_SIZE,\n            width=SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GPSCompassSensorRoboThor(),\n    ]\n\n    PREPROCESSORS = [\n        Builder(\n            ResNetPreprocessor,\n            {\n                \"input_height\": SCREEN_SIZE,\n                \"input_width\": SCREEN_SIZE,\n                \"output_width\": 7,\n                \"output_height\": 7,\n                \"output_dims\": 512,\n                \"pool\": False,\n                \"torchvision_resnet_model\": models.resnet18,\n                \"input_uuids\": [\"rgb_lowres\"],\n                \"output_uuid\": \"rgb_resnet\",\n            },\n        ),\n    ]\n\n    OBSERVATIONS = [\n        \"rgb_resnet\",\n        \"target_coordinates_ind\",\n    ]\n\n    ENV_ARGS = dict(\n        width=CAMERA_WIDTH,\n        height=CAMERA_HEIGHT,\n        rotateStepDegrees=30.0,\n        visibilityDistance=1.0,\n        gridSize=0.25,\n    )\n\n    @classmethod\n    def tag(cls):\n        return \"PointNavithorRGBPPO\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        ppo_steps = int(250000000)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 3\n        num_steps = 30\n        save_interval = 5000000\n        log_interval = 10000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\"ppo_loss\": PPO(**PPOConfig)},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps)\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        sampler_devices: Sequence[int] = []\n        if mode == \"train\":\n            workers_per_device = 1\n            gpu_ids = (\n                []\n                if not torch.cuda.is_available()\n                else self.TRAINING_GPUS * workers_per_device\n            )\n            nprocesses = (\n                1\n                if not torch.cuda.is_available()\n                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))\n            )\n            sampler_devices = self.TRAINING_GPUS\n        elif mode == \"valid\":\n            nprocesses = 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS\n        elif mode == \"test\":\n            nprocesses = 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        sensor_preprocessor_graph = (\n            SensorPreprocessorGraph(\n                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,\n                preprocessors=self.PREPROCESSORS,\n            )\n            if mode == \"train\"\n            or (\n                (isinstance(nprocesses, int) and nprocesses > 0)\n                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)\n            )\n            else None\n        )\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=gpu_ids,\n            sampler_devices=(\n                sampler_devices if mode == \"train\" else gpu_ids\n            ),  # ignored with > 1 gpu_ids\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n\n    # Define Model\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return ResnetTensorNavActorCritic(\n            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            goal_sensor_uuid=\"target_coordinates_ind\",\n            rgb_resnet_preprocessor_uuid=\"rgb_resnet\",\n            hidden_size=512,\n            goal_dims=32,\n        )\n\n    # Define Task Sampler\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return PointNavDatasetTaskSampler(**kwargs)\n\n    # Utility Functions for distributing scenes between GPUs\n    @staticmethod\n    def _partition_inds(n: int, num_parts: int):\n        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(\n            np.int32\n        )\n\n    def _get_sampler_args_for_scene_split(\n        self,\n        scenes_dir: str,\n        process_ind: int,\n        total_processes: int,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        path = os.path.join(scenes_dir, \"*.json.gz\")\n        scenes = [scene.split(\"/\")[-1].split(\".\")[0] for scene in glob.glob(path)]\n        if len(scenes) == 0:\n            raise RuntimeError(\n                (\n                    \"Could find no scene dataset information in directory {}.\"\n                    \" Are you sure you've downloaded them? \"\n                    \" If not, see https://allenact.org/installation/download-datasets/ information\"\n                    \" on how this can be done.\"\n                ).format(scenes_dir)\n            )\n        if total_processes > len(scenes):  # oversample some scenes -> bias\n            if total_processes % len(scenes) != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisible by the number of scenes\"\n                )\n            scenes = scenes * int(ceil(total_processes / len(scenes)))\n            scenes = scenes[: total_processes * (len(scenes) // total_processes)]\n        else:\n            if len(scenes) % total_processes != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisor of the number of scenes\"\n                )\n        inds = self._partition_inds(len(scenes), total_processes)\n\n        return {\n            \"scenes\": scenes[inds[process_ind] : inds[process_ind + 1]],\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            \"seed\": seeds[process_ind] if seeds is not None else None,\n            \"deterministic_cudnn\": deterministic_cudnn,\n            \"rewards_config\": self.REWARD_CONFIG,\n        }\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.TRAIN_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.TRAIN_DATASET_DIR\n        res[\"loop_dataset\"] = True\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        res[\"allow_flipping\"] = True\n        return res\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.VAL_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.VAL_DATASET_DIR\n        res[\"loop_dataset\"] = False\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        return res\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.VAL_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.VAL_DATASET_DIR\n        res[\"loop_dataset\"] = False\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        return res\n"
  },
  {
    "path": "projects/tutorials/running_inference_tutorial.py",
    "content": "# literate: tutorials/running-inference-on-a-pretrained-model.md\n# %%\n\"\"\"# Tutorial: Inference with a pre-trained model.\"\"\"\n\n# %%\n\"\"\"\nIn this tutorial we will run inference on a pre-trained model for the PointNav task\nin the RoboTHOR environment. In this task the agent is tasked with going to a specific location\nwithin a realistic 3D environment.\n\nFor information on how to train a PointNav Model see [this tutorial](training-a-pointnav-model.md)\n\nWe will need to [install the full AllenAct library](../installation/installation-allenact.md#full-library),\nthe `robothor_plugin` requirements via\n\n```bash\npip install -r allenact_plugins/robothor_plugin/extra_requirements.txt\n```\n\nand [download the \nRoboTHOR Pointnav dataset](../installation/download-datasets.md) before we get started.\n\nFor this tutorial we will download the weights of a model trained on the debug dataset.\nThis can be done with a handy script in the `pretrained_model_ckpts` directory:\n```bash\nbash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-pointnav-rgb-resnet\n```\nThis will download the weights for an RGB model that has been\ntrained on the PointNav task in RoboTHOR to `pretrained_model_ckpts/robothor-pointnav-rgb-resnet`\n\n\nNext we need to run the inference, using the PointNav experiment config from the\n[tutorial on making a PointNav experiment](training-a-pointnav-model.md).\nWe can do this with the following command:\n\n```bash\nPYTHONPATH=. python allenact/main.py -o <PATH_TO_OUTPUT> -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> -c <PATH_TO_CHECKPOINT> --eval\n```\n\nWhere `<PATH_TO_OUTPUT>` is the location where the results of the test will be dumped, `<PATH_TO_CHECKPOINT>` is the\nlocation of the downloaded model weights, and `<BASE_DIRECTORY_OF_YOUR_EXPERIMENT>` is a path to the directory where\nour experiment definition is stored.\n\nFor our current setup the following command would work:\n\n```bash\nPYTHONPATH=. python allenact/main.py \\\ntraining_a_pointnav_model \\\n-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \\\n-b projects/tutorials \\\n-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \\\n--eval\n```\n\nFor testing on all saved checkpoints we pass a directory to `--checkpoint` rather than just a single file:\n\n```bash\nPYTHONPATH=. python allenact/main.py \\\ntraining_a_pointnav_model \\\n-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \\\n-b projects/tutorials  \\\n-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30\n--eval\n```\n## Visualization\n\nWe also show examples of visualizations that can be extracted from the `\"valid\"` and `\"test\"` modes. Currently,\nvisualization is still undergoing design changes and does not support multi-agent tasks, but the available functionality\nis sufficient for pointnav in RoboThor.\n\nFollowing up on the example above, we can make a specialized pontnav `ExperimentConfig` where we instantiate\nthe base visualization class, `VizSuite`, defined in\n[`allenact.utils.viz_utils`](https://github.com/allenai/allenact/tree/master/allenact/utils/viz_utils.py), when in `test` mode.\n\nEach visualization type can be thought of as a plugin to the base `VizSuite`. For example, all `episode_ids` passed to\n`VizSuite` will be processed with each of the instantiated visualization types (possibly with the exception of the\n`AgentViewViz`). In the example below we show how to instantiate different visualization types from 4 different data\nsources.\n\nThe data sources available to `VizSuite` are:\n\n* Task output (e.g. 2D trajectories)\n* Vector task (e.g. egocentric views)\n* Rollout storage (e.g. recurrent memory, taken action logprobs...)\n* `ActorCriticOutput` (e.g. action probabilities)\n\nThe visualization types included below are:\n\n* `TrajectoryViz`: Generic 2D trajectory view.\n* `AgentViewViz`: RGB egocentric view.\n* `ActorViz`: Action probabilities from `ActorCriticOutput[CategoricalDistr]`.\n* `TensorViz1D`: Evolution of a point from RolloutStorage over time.\n* `TensorViz2D`: Evolution of a vector from RolloutStorage over time.\n* `ThorViz`: Specialized 2D trajectory view\n[for RoboThor](https://github.com/allenai/allenact/tree/master/allenact_plugins/robothor_plugin/robothor_viz.py).\n\nNote that we need to explicitly set the `episode_ids` that we wish to visualize. For `AgentViewViz` we have the option\nof using a different (typically shorter) list of episodes or enforce the ones used for the rest of visualizations.\n\"\"\"\n\n# %% hide\nfrom typing import Optional\n\nfrom allenact.utils.viz_utils import (\n    VizSuite,\n    TrajectoryViz,\n    ActorViz,\n    AgentViewViz,\n    TensorViz1D,\n    TensorViz2D,\n)\nfrom allenact_plugins.robothor_plugin.robothor_viz import ThorViz\nfrom projects.tutorials.training_a_pointnav_model import (\n    PointNavRoboThorRGBPPOExperimentConfig,\n)\n\n\n# %%\nclass PointNavRoboThorRGBPPOVizExperimentConfig(PointNavRoboThorRGBPPOExperimentConfig):\n    \"\"\"ExperimentConfig used to demonstrate how to set up visualization code.\n\n    # Attributes\n\n    viz_ep_ids : Scene names that will be visualized.\n    viz_video_ids : Scene names that will have videos visualizations associated with them.\n    \"\"\"\n\n    viz_ep_ids = [\n        \"FloorPlan_Train1_1_3\",\n        \"FloorPlan_Train1_1_4\",\n        \"FloorPlan_Train1_1_5\",\n        \"FloorPlan_Train1_1_6\",\n    ]\n    viz_video_ids = [[\"FloorPlan_Train1_1_3\"], [\"FloorPlan_Train1_1_4\"]]\n\n    viz: Optional[VizSuite] = None\n\n    def get_viz(self, mode):\n        if self.viz is not None:\n            return self.viz\n\n        self.viz = VizSuite(\n            episode_ids=self.viz_ep_ids,\n            mode=mode,\n            # Basic 2D trajectory visualizer (task output source):\n            base_trajectory=TrajectoryViz(\n                path_to_target_location=(\n                    \"task_info\",\n                    \"target\",\n                ),\n            ),\n            # Egocentric view visualizer (vector task source):\n            egeocentric=AgentViewViz(\n                max_video_length=100, episode_ids=self.viz_video_ids\n            ),\n            # Default action probability visualizer (actor critic output source):\n            action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),\n            # Default taken action logprob visualizer (rollout storage source):\n            taken_action_logprobs=TensorViz1D(),\n            # Same episode mask visualizer (rollout storage source):\n            episode_mask=TensorViz1D(rollout_source=(\"masks\",)),\n            # Default recurrent memory visualizer (rollout storage source):\n            rnn_memory=TensorViz2D(\n                rollout_source=(\"memory_first_last\", \"single_belief\")\n            ),\n            # Specialized 2D trajectory visualizer (task output source):\n            thor_trajectory=ThorViz(\n                figsize=(16, 8),\n                viz_rows_cols=(448, 448),\n                scenes=(\"FloorPlan_Train{}_{}\", 1, 1, 1, 1),\n            ),\n        )\n\n        return self.viz\n\n    def machine_params(self, mode=\"train\", **kwargs):\n        res = super().machine_params(mode, **kwargs)\n        if mode == \"test\":\n            res.set_visualizer(self.get_viz(mode))\n\n        return res\n\n\n# %%\n\"\"\"\nRunning test on the same downloaded models, but using the visualization-enabled `ExperimentConfig` with\n \n```bash\nPYTHONPATH=. python allenact/main.py \\\nrunning_inference_tutorial \\\n-o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \\\n-b projects/tutorials \\\n-c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \\\n--eval\n```\n\ngenerates different types of visualization and logs them in tensorboard. If everything is properly setup and\ntensorboard includes the `robothor-pointnav-rgb-resnet` folder, under the `IMAGES` tab, we should see something similar\nto\n\n![Visualization example](../img/viz_pretrained_2videos.jpg)\n\"\"\"\n"
  },
  {
    "path": "projects/tutorials/training_a_pointnav_model.py",
    "content": "# literate: tutorials/training-a-pointnav-model.md\n# %%\n\"\"\"# Tutorial: PointNav in RoboTHOR.\"\"\"\n\n# %%\n\"\"\"\n![RoboTHOR Robot](../img/RoboTHOR_robot.jpg)\n## Introduction\nOne of the most obvious tasks that an embodied agent should master is navigating the world it inhabits.\nBefore we can teach a robot to cook or clean it first needs to be able to move around. The simplest\nway to formulate \"moving around\" into a task is by making your agent find a beacon somewhere in the environment.\nThis beacon transmits its location, such that at any time, the agent can get the direction and euclidian distance\nto the beacon. This particular task is often called Point Navigation, or **PointNav** for short.\n\n#### PointNav\nAt first glance, this task seems trivial. If the agent is given the direction and distance of the target at\nall times, can it not simply follow this signal directly? The answer is no, because agents are often trained\non this task in environments that emulate real-world buildings which are not wide-open spaces, but rather\ncontain many smaller rooms. Because of this, the agent has to learn to navigate human spaces and use doors\nand hallways to efficiently navigate from one side of the building to the other. This task becomes particularly\ndifficult when the agent is tested in an environment that it is not trained in. If the agent does not know\nhow the floor plan of an environment looks, it has to learn to predict the design of man-made structures,\nto efficiently navigate across them, much like how people instinctively know how to move around a building\nthey have never seen before based on their experience navigating similar buildings.\n\n#### What is an environment anyways?\nEnvironments are worlds in which embodied agents exist. If our embodied agent is simply a neural network that is being\ntrained in a simulator, then that simulator is its environment. Similarly, if our agent is a\nphysical robot then its environment is the real world. The agent interacts with the environment by taking one\nof several available actions (such as \"move forward\", or \"turn left\"). After each action, the environment\nproduces a new frame that the agent can analyze to determine its next step. For many tasks, including PointNav\nthe agent also has a special \"stop\" action which indicates that the agent thinks it has reached the target.\nAfter this action is called the agent will be reset to a new location, regardless if it reached the\ntarget. The hope is that after enough training the agent will learn to correctly assess that it has successfully\nnavigated to the target.\n\n![RoboTHOR Sim vs. Real](../img/RoboTHOR_sim_real.jpg)\n\nThere are many simulators designed for the training\nof embodied agents. In this tutorial, we will be using a simulator called [RoboTHOR](https://ai2thor.allenai.org/robothor/), \nwhich is designed specifically to train models that can easily be transferred to a real robot, by providing a\nphoto-realistic virtual environment and a real-world replica of the environment that researchers can have access to. \nRoboTHOR contains 60 different virtual scenes with different floor plans and furniture and 15 validation scenes.\n\nIt is also important to mention that **AllenAct**\nhas a class abstraction called Environment. This is not the actual simulator game engine or robotics controller,\nbut rather a shallow wrapper that provides a uniform interface to the actual environment.\n\n#### Learning algorithm\nFinally, let us briefly touch on the algorithm that we will use to train our embodied agent to navigate. While\n*AllenAct* offers us great flexibility to train models using complex pipelines, we will be using a simple\npure reinforcement learning approach for this tutorial. More specifically, we will be using DD-PPO,\na decentralized and distributed variant of the ubiquitous PPO algorithm. For those unfamiliar with Reinforcement\nLearning we highly recommend [this tutorial](http://karpathy.github.io/2016/05/31/rl/) by Andrej Karpathy, and [this\nbook](http://www.incompleteideas.net/book/the-book-2nd.html) by Sutton and Barto. Essentially what we are doing\nis letting our agent explore the environment on its own, rewarding it for taking actions that bring it closer\nto its goal and penalizing it for actions that take it away from its goal. We then optimize the agent's model\nto maximize this reward.\n\n## Requirements\nTo train the model on the PointNav task, we need to [install the RoboTHOR environment](../installation/installation-framework.md) \nand [download the RoboTHOR PointNav dataset](../installation/download-datasets.md)\n\nThe dataset contains a list of episodes with thousands of randomly generated starting positions and target locations for each of the scenes\nas well as a precomputed cache of distances, containing the shortest path from each point in a scene, to every other point in that scene. \nThis is used to reward the agent for moving closer to the target in terms of geodesic distance - the actual path distance (as opposed to a \nstraight line distance).\n\n## Config File Setup\nNow comes the most important part of the tutorial, we are going to write an experiment config file. \nIf this is your first experience with experiment config files in AllenAct, we suggest that you\nfirst see our how-to on [defining an experiment](../howtos/defining-an-experiment.md) which will\nwalk you through creating a simplified experiment config file.\n\nUnlike a library that can be imported into python, **AllenAct** is structured as a framework with a runner script called\n`main.py` which will run the experiment specified in a config file. This design forces us to keep meticulous records of \nexactly which settings were used to produce a particular result,\nwhich can be very useful given how expensive RL models are to train.\n\nThe `projects/` directory is home to different projects using `AllenAct`. Currently it is populated with baselines\nof popular tasks and tutorials.\n\nWe already have all the code for this tutorial stored in `projects/tutorials/training_a_pointnav_model.py`. We will\nbe using this file to run our experiments, but you can create a new directory in `projects/` and start writing your\nexperiment there.\n\nWe start off by importing everything we will need:\n\"\"\"\n\n# %%\nimport glob\nimport os\nfrom math import ceil\nfrom typing import Dict, Any, List, Optional, Sequence\n\nimport gym\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import LambdaLR\nfrom torchvision import models\n\nfrom allenact.algorithms.onpolicy_sync.losses import PPO\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams\nfrom allenact.base_abstractions.preprocessor import SensorPreprocessorGraph\nfrom allenact.base_abstractions.sensor import SensorSuite\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor\nfrom allenact.utils.experiment_utils import (\n    Builder,\n    PipelineStage,\n    TrainingPipeline,\n    LinearDecay,\n    evenly_distribute_count_into_bins,\n)\nfrom allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor\nfrom allenact_plugins.navigation_plugin.objectnav.models import (\n    ResnetTensorNavActorCritic,\n)\nfrom allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor\nfrom allenact_plugins.robothor_plugin.robothor_task_samplers import (\n    PointNavDatasetTaskSampler,\n)\nfrom allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask\n\n# %%\n\"\"\"Next we define a new experiment config class:\"\"\"\n\n\n# %%\nclass PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig):\n    \"\"\"A Point Navigation experiment configuration in RoboThor.\"\"\"\n\n    # %%\n    \"\"\"\n    We then define the task parameters. For PointNav, these include the maximum number of steps our agent\n    can take before being reset (this prevents the agent from wandering on forever), and a configuration\n    for the reward function that we will be using. \n    \"\"\"\n\n    # %%\n    # Task Parameters\n    MAX_STEPS = 500\n    REWARD_CONFIG = {\n        \"step_penalty\": -0.01,\n        \"goal_success_reward\": 10.0,\n        \"failed_stop_reward\": 0.0,\n        \"shaping_weight\": 1.0,\n    }\n\n    # %%\n    \"\"\"\n    In this case, we set the maximum number of steps to 500.\n    We give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal\n    in as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination.\n    If the agent selects the `stop` action without reaching the target we do not punish it (although this is\n    sometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves\n    closer to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should\n    be and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around\n    with them.\n    \n    Next, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render\n    every frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set\n    to a 224 by 224 box).\n    \"\"\"\n\n    # %%\n    # Simulator Parameters\n    CAMERA_WIDTH = 640\n    CAMERA_HEIGHT = 480\n    SCREEN_SIZE = 224\n\n    # %%\n    \"\"\"\n    Next, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel\n    processes that will be used to train the model. In general, more processes result in faster training,\n    but since each process is a unique instance of the environment in which we are training they can take up a\n    lot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may\n    need to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to\n    be the number of training scenes in RoboTHOR, which allows each process to load only a single scene into\n    memory, saving time and space.\n    \n    `TRAINING_GPUS` takes the ids of the GPUS on which\n    the model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which\n    the validation and testing will occur. During training, a validation process is constantly running and evaluating\n    the current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea.\n    If our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default\n    to running everything on the CPU with only 1 process.\n    \"\"\"\n\n    # %%\n    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None\n    NUM_PROCESSES = 20\n    TRAINING_GPUS: Sequence[int] = [0]\n    VALIDATION_GPUS: Sequence[int] = [0]\n    TESTING_GPUS: Sequence[int] = [0]\n\n    # %%\n    \"\"\"\n    Since we are using a dataset to train our model we need to define the path to where we have stored it. If we\n    download the dataset instructed above we can define the path as follows\n    \"\"\"\n\n    # %%\n    TRAIN_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-pointnav/debug\")\n    VAL_DATASET_DIR = os.path.join(os.getcwd(), \"datasets/robothor-pointnav/debug\")\n\n    # %%\n    \"\"\"\n    Next, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the\n    raw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we\n    specified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks\n    the point our agent needs to move to. It tells us the direction and distance to our goal at every time step.\n    \"\"\"\n\n    # %%\n    SENSORS = [\n        RGBSensorThor(\n            height=SCREEN_SIZE,\n            width=SCREEN_SIZE,\n            use_resnet_normalization=True,\n            uuid=\"rgb_lowres\",\n        ),\n        GPSCompassSensorRoboThor(),\n    ]\n\n    # %%\n    \"\"\"\n    For the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct*\n    the preprocessor abstraction is designed with large models with frozen weights in mind. These models often\n    hail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a\n    complex embedding, which then gets stored and used as input to our trainable model instead of the original image.\n    Most other preprocessing work is done in the sensor classes (as we just saw with the RGB\n    sensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should\n    use this abstraction.\n    \"\"\"\n\n    # %%\n    PREPROCESSORS = [\n        Builder(\n            ResNetPreprocessor,\n            {\n                \"input_height\": SCREEN_SIZE,\n                \"input_width\": SCREEN_SIZE,\n                \"output_width\": 7,\n                \"output_height\": 7,\n                \"output_dims\": 512,\n                \"pool\": False,\n                \"torchvision_resnet_model\": models.resnet18,\n                \"input_uuids\": [\"rgb_lowres\"],\n                \"output_uuid\": \"rgb_resnet\",\n            },\n        ),\n    ]\n\n    # %%\n    \"\"\"\n    Next, we must define all of the observation inputs that our model will use. These are just\n    the hardcoded ids of the sensors we are using in the experiment.\n    \"\"\"\n\n    # %%\n    OBSERVATIONS = [\n        \"rgb_resnet\",\n        \"target_coordinates_ind\",\n    ]\n\n    # %%\n    \"\"\"\n    Finally, we must define the settings of our simulator. We set the camera dimensions to the values\n    we defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a\n    turn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the\n    agent moves forward, it will do so by 0.25 meters. \n    \"\"\"\n\n    # %%\n    ENV_ARGS = dict(\n        width=CAMERA_WIDTH,\n        height=CAMERA_HEIGHT,\n        rotateStepDegrees=30.0,\n        visibilityDistance=1.0,\n        gridSize=0.25,\n        agentMode=\"bot\",\n    )\n\n    # %%\n    \"\"\"\n    Now we move on to the methods that we must define to finish implementing an experiment config. Firstly we\n    have a simple method that just returns the name of the experiment.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def tag(cls):\n        return \"PointNavRobothorRGBPPO\"\n\n    # %%\n    \"\"\"\n    Next, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms\n    we will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4.\n    We specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters\n    respectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval`\n    sets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how\n    often we save the model weights and run validation on them.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        ppo_steps = int(250000000)\n        lr = 3e-4\n        num_mini_batch = 1\n        update_repeats = 3\n        num_steps = 30\n        save_interval = 5000000\n        log_interval = 1000\n        gamma = 0.99\n        use_gae = True\n        gae_lambda = 0.95\n        max_grad_norm = 0.5\n        return TrainingPipeline(\n            save_interval=save_interval,\n            metric_accumulate_interval=log_interval,\n            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),\n            num_mini_batch=num_mini_batch,\n            update_repeats=update_repeats,\n            max_grad_norm=max_grad_norm,\n            num_steps=num_steps,\n            named_losses={\"ppo_loss\": PPO(**PPOConfig)},\n            gamma=gamma,\n            use_gae=use_gae,\n            gae_lambda=gae_lambda,\n            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,\n            pipeline_stages=[\n                PipelineStage(loss_names=[\"ppo_loss\"], max_stage_steps=ppo_steps)\n            ],\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}\n            ),\n        )\n\n    # %%\n    \"\"\"\n    The `machine_params` method returns the hardware parameters of each\n    process, based on the list of devices we defined above.\n    \"\"\"\n\n    # %%\n    def machine_params(self, mode=\"train\", **kwargs):\n        sampler_devices: List[int] = []\n        if mode == \"train\":\n            workers_per_device = 1\n            gpu_ids = (\n                []\n                if not torch.cuda.is_available()\n                else list(self.TRAINING_GPUS) * workers_per_device\n            )\n            nprocesses = (\n                8\n                if not torch.cuda.is_available()\n                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))\n            )\n            sampler_devices = list(self.TRAINING_GPUS)\n        elif mode == \"valid\":\n            nprocesses = 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS\n        elif mode == \"test\":\n            nprocesses = 1\n            gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS\n        else:\n            raise NotImplementedError(\"mode must be 'train', 'valid', or 'test'.\")\n\n        sensor_preprocessor_graph = (\n            SensorPreprocessorGraph(\n                source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,\n                preprocessors=self.PREPROCESSORS,\n            )\n            if mode == \"train\"\n            or (\n                (isinstance(nprocesses, int) and nprocesses > 0)\n                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)\n            )\n            else None\n        )\n\n        return MachineParams(\n            nprocesses=nprocesses,\n            devices=gpu_ids,\n            sampler_devices=(\n                sampler_devices if mode == \"train\" else gpu_ids\n            ),  # ignored with > 1 gpu_ids\n            sensor_preprocessor_graph=sensor_preprocessor_graph,\n        )\n\n    # %%\n    \"\"\"\n    Now we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch,\n    so any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which\n    unsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different\n    actions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space`\n    We specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define\n    the size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and\n    distance to the target) with `goal_dims`.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return ResnetTensorNavActorCritic(\n            action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            observation_space=kwargs[\"sensor_preprocessor_graph\"].observation_spaces,\n            goal_sensor_uuid=\"target_coordinates_ind\",\n            rgb_resnet_preprocessor_uuid=\"rgb_resnet\",\n            hidden_size=512,\n            goal_dims=32,\n        )\n\n    # %%\n    \"\"\"\n    We also need to define the task sampler that we will be using. This is a piece of code that generates instances\n    of tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting\n    our tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets\n    the agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the\n    `stop` action.\n    \"\"\"\n\n    # %%\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return PointNavDatasetTaskSampler(**kwargs)\n\n    # %%\n    \"\"\"\n    You might notice that we did not specify the task sampler's arguments, but are rather passing them in. The\n    reason for this is that each process will have its own task sampler, and we need to specify exactly which scenes\n    each process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this.\n    \"\"\"\n\n    # %%\n    @staticmethod\n    def _partition_inds(n: int, num_parts: int):\n        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(\n            np.int32\n        )\n\n    def _get_sampler_args_for_scene_split(\n        self,\n        scenes_dir: str,\n        process_ind: int,\n        total_processes: int,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        path = os.path.join(scenes_dir, \"*.json.gz\")\n        scenes = [scene.split(\"/\")[-1].split(\".\")[0] for scene in glob.glob(path)]\n        if len(scenes) == 0:\n            raise RuntimeError(\n                (\n                    \"Could find no scene dataset information in directory {}.\"\n                    \" Are you sure you've downloaded them? \"\n                    \" If not, see https://allenact.org/installation/download-datasets/ information\"\n                    \" on how this can be done.\"\n                ).format(scenes_dir)\n            )\n        if total_processes > len(scenes):  # oversample some scenes -> bias\n            if total_processes % len(scenes) != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisible by the number of scenes\"\n                )\n            scenes = scenes * int(ceil(total_processes / len(scenes)))\n            scenes = scenes[: total_processes * (len(scenes) // total_processes)]\n        else:\n            if len(scenes) % total_processes != 0:\n                print(\n                    \"Warning: oversampling some of the scenes to feed all processes.\"\n                    \" You can avoid this by setting a number of workers divisor of the number of scenes\"\n                )\n        inds = self._partition_inds(len(scenes), total_processes)\n\n        return {\n            \"scenes\": scenes[inds[process_ind] : inds[process_ind + 1]],\n            \"max_steps\": self.MAX_STEPS,\n            \"sensors\": self.SENSORS,\n            \"action_space\": gym.spaces.Discrete(len(PointNavTask.class_action_names())),\n            \"seed\": seeds[process_ind] if seeds is not None else None,\n            \"deterministic_cudnn\": deterministic_cudnn,\n            \"rewards_config\": self.REWARD_CONFIG,\n        }\n\n    # %%\n    \"\"\"\n    The very last things we need to define are the sampler arguments themselves. We define them separately for a train,\n    validation, and test sampler, but in this case, they are almost the same. The arguments need to include the location\n    of the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above\n    and are just referencing here. The only consequential differences between these task samplers are the path to the dataset\n    we are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since\n    we want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of\n    RoboTHOR are private we are also testing on our validation set.\n    \"\"\"\n\n    # %%\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.TRAIN_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.TRAIN_DATASET_DIR\n        res[\"loop_dataset\"] = True\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        res[\"allow_flipping\"] = True\n        return res\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.VAL_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.VAL_DATASET_DIR\n        res[\"loop_dataset\"] = False\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        res[\"env_args\"][\"x_display\"] = (\n            (\"0.%d\" % devices[process_ind % len(devices)])\n            if devices is not None and len(devices) > 0\n            else None\n        )\n        return res\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        res = self._get_sampler_args_for_scene_split(\n            os.path.join(self.VAL_DATASET_DIR, \"episodes\"),\n            process_ind,\n            total_processes,\n            seeds=seeds,\n            deterministic_cudnn=deterministic_cudnn,\n        )\n        res[\"scene_directory\"] = self.VAL_DATASET_DIR\n        res[\"loop_dataset\"] = False\n        res[\"env_args\"] = {}\n        res[\"env_args\"].update(self.ENV_ARGS)\n        return res\n\n\n# %%\n\"\"\"\nThis is it! If we copy all of the code into a file we should be able to run our experiment!\n\n## Training Model On Debug Dataset\nWe can test if our installation worked properly by training our model on a small dataset of 4 episodes. This\nshould take about 20 minutes on a computer with a NVIDIA GPU.\n\nWe can now train a model by running:\n```bash\nPYTHONPATH=. python allenact/main.py -o <PATH_TO_OUTPUT> -c -b <BASE_DIRECTORY_OF_YOUR_EXPERIMENT> <EXPERIMENT_NAME>\n```\nIf using the same configuration as we have set up, the following command should work:\n```bash\nPYTHONPATH=. python allenact/main.py training_a_pointnav_model -o storage/robothor-pointnav-rgb-resnet-resnet -b projects/tutorials\n```\nIf we start up a tensorboard server during training and specify that `output_dir=storage` the output should look\nsomething like this:\n![tensorboard output](../img/point-nav-baseline-tb.png)\n\n## Training Model On Full Dataset\nWe can also train the model on the full dataset by changing back our dataset path and running the same command as above.\nBut be aware, training this takes nearly 2 days on a machine with 8 GPU.\n\n## Testing Model\nTo test the performance of a model please refer to [this tutorial](running-inference-on-a-pretrained-model.md).\n\n## Conclusion\nIn this tutorial, we learned how to create a new PointNav experiment using **AllenAct**. There are many simple\nand obvious ways to modify the experiment from here - changing the model, the learning algorithm and the environment\neach requires very few lines of code changed in the above file, allowing us to explore our embodied ai research ideas\nacross different frameworks with ease.\n\"\"\"\n"
  },
  {
    "path": "requirements.txt",
    "content": "certifi==2020.12.5\nchardet==4.0.0\ncloudpickle==1.6.0\ncycler==0.10.0\ndecorator==4.4.2\nfilelock==3.0.12\nfuture==0.18.2\ngym==0.17.3\nidna==2.10\nimageio==2.9.0\nimageio-ffmpeg==0.4.3\nkiwisolver==1.3.1\nmatplotlib==3.3.3\nmoviepy==1.0.3\nnetworkx==2.5\nnumpy==1.19.5\nopencv-python==4.5.1.48\nPillow>=8.2.0,<9.0.0\nproglog==0.1.9\nprotobuf==3.14.0\npyglet==1.5.0\npyparsing==2.4.7\npython-dateutil>=2.8.1\nrequests==2.25.1\nscipy==1.5.4\nsetproctitle==1.2.1\nsix>=1.15.0\ntensorboardX==2.1\ntorch>=1.6.0,!=1.8.0,<2.0.0\ntorchvision>=0.7.0,<0.10.0\ntqdm==4.56.0\nurllib3==1.26.5\nattr\nattrs\nwandb"
  },
  {
    "path": "scripts/auto_format.sh",
    "content": "#!/bin/bash\n\n# Move to the directory containing the directory that this file is in\ncd \"$( cd \"$( dirname \"${BASH_SOURCE[0]}/..\" )\" >/dev/null 2>&1 && pwd )\" || exit\n\necho RUNNING BLACK\nblack . --exclude src --exclude external_projects\necho BLACK DONE\necho \"\"\n\necho RUNNING DOCFORMATTER\nfind . -name \"*.py\" | grep -v ^./src | grep -v ^./external_projects | grep -v used_configs | xargs docformatter --in-place -r\necho DOCFORMATTER DONE\n\necho ALL DONE"
  },
  {
    "path": "scripts/build_docs.py",
    "content": "import glob\nimport os\nimport shutil\nimport sys\nfrom pathlib import Path\nfrom subprocess import check_output\nfrom threading import Thread\nfrom typing import Dict, Union, Optional, Set, List, Sequence, Mapping\n\nfrom git import Git\nfrom ruamel.yaml import YAML  # type: ignore\n\nfrom constants import ABS_PATH_OF_TOP_LEVEL_DIR\n\n# TODO: the scripts directory shouldn't be a module (as it conflicts with\n#  some local developmment workflows) but we do want to import scripts/literate.py.\n#  Temporary solution is just to modify the sys.path when this script is run.\nsys.path.append(os.path.abspath(os.path.dirname(Path(__file__))))\n\nfrom literate import literate_python_to_markdown\n\n\nclass StringColors:\n    HEADER = \"\\033[95m\"\n    OKBLUE = \"\\033[94m\"\n    OKGREEN = \"\\033[92m\"\n    WARNING = \"\\033[93m\"\n    FAIL = \"\\033[91m\"\n    ENDC = \"\\033[0m\"\n    BOLD = \"\\033[1m\"\n    UNDERLINE = \"\\033[4m\"\n\n\nexclude_files = [\n    \".DS_Store\",\n    \"__init__.py\",\n    \"__init__.pyc\",\n    \"README.md\",\n    \"version.py\",\n    \"run.py\",\n    \"setup.py\",\n    \"main.py\",\n]\n\n\ndef render_file(\n    relative_src_path: str, src_file: str, to_file: str, modifier=\"\"\n) -> None:\n    \"\"\"Shells out to pydocmd, which creates a .md file from the docstrings of\n    python functions and classes in the file we specify.\n\n    The modifer specifies the depth at which to generate docs for\n    classes and functions in the file. More information here:\n    https://pypi.org/project/pydoc-markdown/\n    \"\"\"\n    # First try literate\n    was_literate = False\n    try:\n        was_literate = literate_python_to_markdown(\n            path=os.path.join(relative_src_path, src_file)\n        )\n    except Exception as _:\n        pass\n\n    if was_literate:\n        return\n\n    # Now do standard pydocmd\n    relative_src_namespace = relative_src_path.replace(\"/\", \".\")\n    src_base = src_file.replace(\".py\", \"\")\n\n    if relative_src_namespace == \"\":\n        namespace = f\"{src_base}{modifier}\"\n    else:\n        namespace = f\"{relative_src_namespace}.{src_base}{modifier}\"\n\n    pydoc_config = \"\"\"'{\n        renderer: {\n            type: markdown,\n            code_headers: true,\n            descriptive_class_title: false,\n            add_method_class_prefix: true,\n            source_linker: {type: github, repo: allenai/allenact},\n            header_level_by_type: {\n                Module: 1,\n                Class: 2,\n                Method: 3,\n                Function: 3,\n                Data: 3,\n            }\n        }\n    }'\"\"\"\n    pydoc_config = \" \".join(pydoc_config.split())\n    args = [\"pydoc-markdown\", \"-m\", namespace, pydoc_config]\n    try:\n        call_result = check_output([\" \".join(args)], shell=True, env=os.environ).decode(\n            \"utf-8\"\n        )\n\n        # noinspection PyShadowingNames\n        with open(to_file, \"w\") as f:\n            doc_split = call_result.split(\"\\n\")\n            # github_path = \"https://github.com/allenai/allenact/tree/master/\"\n            # path = (\n            #     github_path + namespace.replace(\".\", \"/\") + \".py\"\n            # )\n            # mdlink = \"[[source]]({})\".format(path)\n            mdlink = \"\"  # Removing the above source link for now.\n            call_result = \"\\n\".join([doc_split[0] + \" \" + mdlink] + doc_split[1:])\n            call_result = call_result.replace(\"_DOC_COLON_\", \":\")\n            f.write(call_result)\n        print(\n            f\"{StringColors.OKGREEN}[SUCCESS]{StringColors.ENDC} built docs for {src_file} -> {to_file}.\"\n        )\n    except Exception as _:\n        cmd = \" \".join(args)\n        print(\n            f\"{StringColors.WARNING}[SKIPPING]{StringColors.ENDC} could not\"\n            f\" build docs for {src_file} (missing an import?). CMD: '{cmd}'\"\n        )\n\n\n# noinspection PyShadowingNames\ndef build_docs_for_file(\n    relative_path: str, file_name: str, docs_dir: str, threads: List\n) -> Dict[str, str]:\n    \"\"\"Build docs for an individual python file.\"\"\"\n    clean_filename = file_name.replace(\".py\", \"\")\n    markdown_filename = f\"{clean_filename}.md\"\n\n    output_path = os.path.join(docs_dir, relative_path, markdown_filename)\n    nav_path = os.path.join(\"api\", relative_path, markdown_filename)\n\n    thread = Thread(target=render_file, args=(relative_path, file_name, output_path))\n    thread.start()\n    threads.append(thread)\n\n    return {os.path.basename(clean_filename): nav_path}\n\n\n# noinspection PyShadowingNames\ndef build_docs(\n    base_dir: Union[Path, str],\n    root_path: Union[Path, str],\n    docs_dir: Union[Path, str],\n    threads: List,\n    allowed_dirs: Optional[Set[str]] = None,\n):\n    base_dir, root_path, docs_dir = str(base_dir), str(root_path), str(docs_dir)\n\n    nav_root = []\n\n    for child in os.listdir(root_path):\n        relative_path = os.path.join(root_path, child)\n\n        if (\n            (allowed_dirs is not None)\n            and (os.path.isdir(relative_path))\n            and (os.path.abspath(relative_path) not in allowed_dirs)\n            # or \".git\" in relative_path\n            # or \".idea\" in relative_path\n            # or \"__pycache__\" in relative_path\n            # or \"tests\" in relative_path\n            # or \"mypy_cache\" in relative_path\n        ):\n            print(\"SKIPPING {}\".format(relative_path))\n            continue\n\n        # without_allenact = str(root_path).replace(\"allenact/\", \"\")\n        new_path = os.path.relpath(root_path, base_dir).replace(\".\", \"\")\n        target_dir = os.path.join(docs_dir, new_path)\n        if not os.path.exists(target_dir):\n            os.mkdir(target_dir)\n\n        if os.path.isdir(relative_path):\n            nav_subsection = build_docs(\n                base_dir,\n                relative_path,\n                docs_dir,\n                threads=threads,\n                allowed_dirs=allowed_dirs,\n            )\n            if not nav_subsection:\n                continue\n            nav_root.append({child: nav_subsection})\n\n        else:\n            if child in exclude_files or not child.endswith(\".py\"):\n                continue\n\n            nav = build_docs_for_file(new_path, child, docs_dir, threads=threads)\n            nav_root.append(nav)\n\n    return nav_root\n\n\ndef project_readme_paths_to_nav_structure(project_readmes):\n    nested_dict = {}\n    for fp in project_readmes:\n        has_seen_project_dir = False\n        sub_nested_dict = nested_dict\n\n        split_fp = os.path.dirname(fp).split(\"/\")\n        for i, yar in enumerate(split_fp):\n            has_seen_project_dir = has_seen_project_dir or yar == \"projects\"\n            if not has_seen_project_dir or yar == \"projects\":\n                continue\n\n            if yar not in sub_nested_dict:\n                if i == len(split_fp) - 1:\n                    sub_nested_dict[yar] = fp.replace(\"docs/\", \"\")\n                    break\n                else:\n                    sub_nested_dict[yar] = {}\n\n            sub_nested_dict = sub_nested_dict[yar]\n\n    def recursively_create_nav_structure(nested_dict):\n        if isinstance(nested_dict, str):\n            return nested_dict\n\n        to_return = []\n        for key in nested_dict:\n            to_return.append({key: recursively_create_nav_structure(nested_dict[key])})\n        return to_return\n\n    return recursively_create_nav_structure(nested_dict)\n\n\ndef pruned_nav_entries(nav_entries):\n    if isinstance(nav_entries, str):\n        if os.path.exists(os.path.join(\"docs\", nav_entries)):\n            return nav_entries\n        else:\n            return None\n    elif isinstance(nav_entries, Sequence):\n        new_entries = []\n        for entry in nav_entries:\n            entry = pruned_nav_entries(entry)\n            if entry:\n                new_entries.append(entry)\n        return new_entries\n    elif isinstance(nav_entries, Mapping):\n        new_entries = {}\n        for k, entry in nav_entries.items():\n            entry = pruned_nav_entries(entry)\n            if entry:\n                new_entries[k] = entry\n        return new_entries\n    else:\n        raise NotImplementedError()\n\n\ndef main():\n    os.chdir(ABS_PATH_OF_TOP_LEVEL_DIR)\n\n    print(\"Copying all README.md files to docs.\")\n    with open(\"README.md\") as f:\n        readme_content = f.readlines()\n    readme_content = [x.replace(\"docs/\", \"\") for x in readme_content]\n    with open(\"docs/index.md\", \"w\") as f:\n        f.writelines(readme_content)\n\n    project_readmes = []\n    for readme_file_path in glob.glob(\"projects/**/README.md\", recursive=True):\n        if \"docs/\" not in readme_file_path:\n            new_path = os.path.join(\"docs\", readme_file_path)\n            os.makedirs(os.path.dirname(new_path), exist_ok=True)\n            shutil.copy(readme_file_path, new_path)\n            project_readmes.append(new_path)\n\n    print(\"Copying LICENSE file to docs.\")\n    shutil.copy(\"LICENSE\", \"docs/LICENSE.md\")\n\n    print(\"Copying CONTRIBUTING.md file to docs.\")\n    shutil.copy(\"CONTRIBUTING.md\", \"docs/CONTRIBUTING.md\")\n\n    # print(\"Copying CNAME file to docs.\")\n    # shutil.copy(\"CNAME\", \"docs/CNAME\")\n\n    print(\"Building the docs.\")\n    parent_folder_path = Path(__file__).parent.parent\n    yaml_path = parent_folder_path / \"mkdocs.yml\"\n    source_path = parent_folder_path\n    docs_dir = str(parent_folder_path / \"docs\" / \"api\")\n    if not os.path.exists(docs_dir):\n        os.mkdir(docs_dir)\n\n    # Adding project readmes to the yaml\n    yaml = YAML()\n    mkdocs_yaml = yaml.load(yaml_path)\n    site_nav = mkdocs_yaml[\"nav\"]\n    # TODO Find a way to do the following in a way that results in nice titles.\n    # projects_key = \"Projects using allenact\"\n    # nav_obj = None\n    # for obj in site_nav:\n    #     if projects_key in obj:\n    #         nav_obj = obj\n    #         break\n    # nav_obj[projects_key] = project_readme_paths_to_nav_structure(project_readmes)\n\n    with open(yaml_path, \"w\") as f:\n        yaml.dump(mkdocs_yaml, f)\n\n    # Get directories to ignore\n    git_dirs = set(\n        os.path.abspath(os.path.split(p)[0]) for p in Git(\".\").ls_files().split(\"\\n\")\n    )\n    ignore_rel_dirs = [\n        \"docs\",\n        \"scripts\",\n        \"experiments\",\n        \"src\",\n        \".pip_src\",\n        \"dist\",\n        \"build\",\n    ]\n    ignore_abs_dirs = set(\n        os.path.abspath(os.path.join(str(parent_folder_path), rel_dir))\n        for rel_dir in ignore_rel_dirs\n    )\n    for d in ignore_abs_dirs:\n        if d in git_dirs:\n            git_dirs.remove(d)\n\n    threads: List = []\n    nav_entries = build_docs(\n        parent_folder_path,\n        source_path,\n        docs_dir,\n        threads=threads,\n        allowed_dirs=git_dirs,\n    )\n    nav_entries.sort(key=lambda x: list(x)[0], reverse=False)\n\n    for thread in threads:\n        thread.join()\n\n    nav_entries = pruned_nav_entries(nav_entries)\n\n    docs_key = \"API\"\n\n    # Find the yaml corresponding to the API\n    nav_obj = None\n    for obj in site_nav:\n        if docs_key in obj:\n            nav_obj = obj\n            break\n\n    nav_obj[docs_key] = nav_entries\n\n    with open(yaml_path, \"w\") as f:\n        yaml.dump(mkdocs_yaml, f)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "scripts/build_docs.sh",
    "content": "#!/usr/bin/env bash\n\nset -e\n\n# Add allenact to the python path\nexport PYTHONPATH=$PYTHONPATH:$PWD\n\n# Alter the relative path of the README image for the docs.\n#sed -i '1s/docs/./' docs/README.md\npython scripts/build_docs.py\n\n"
  },
  {
    "path": "scripts/dcommand.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"Tool to run command on multiple nodes through SSH.\"\"\"\n\nimport argparse\nimport glob\nimport os\n\n\ndef get_argument_parser():\n    \"\"\"Creates the argument parser.\"\"\"\n\n    # noinspection PyTypeChecker\n    parser = argparse.ArgumentParser(\n        description=\"dcommand\",\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n    )\n\n    parser.add_argument(\n        \"--runs_on\",\n        required=False,\n        type=str,\n        default=None,\n        help=\"Comma-separated IP addresses of machines. If empty, the tool will scan for lists of IP addresses\"\n        \" in `screen_ids_file`s in the `~/.allenact` directory.\",\n    )\n\n    parser.add_argument(\n        \"--ssh_cmd\",\n        required=False,\n        type=str,\n        default=\"ssh {addr}\",\n        help=\"SSH command. Useful to utilize a pre-shared key with 'ssh -i path/to/mykey.pem ubuntu@{addr}'.\",\n    )\n\n    parser.add_argument(\n        \"--command\",\n        required=False,\n        default=\"nvidia-smi | head -n 35\",\n        type=str,\n        help=\"Command to be run through ssh onto each machine\",\n    )\n\n    return parser\n\n\ndef get_args():\n    \"\"\"Creates the argument parser and parses any input arguments.\"\"\"\n\n    parser = get_argument_parser()\n    args = parser.parse_args()\n\n    return args\n\n\ndef wrap_double(text):\n    return f'\"{text}\"'\n\n\ndef wrap_single(text):\n    return f\"'{text}'\"\n\n\ndef wrap_single_nested(text, quote=r\"'\\''\"):\n    return f\"{quote}{text}{quote}\"\n\n\nif __name__ == \"__main__\":\n    args = get_args()\n\n    all_addresses = []\n    if args.runs_on is not None:\n        all_addresses = args.runs_on.split(\",\")\n    else:\n        all_files = sorted(\n            glob.glob(os.path.join(os.path.expanduser(\"~\"), \".allenact\", \"*.killfile\")),\n            reverse=True,\n        )\n        if len(all_files) == 0:\n            print(\n                f\"No screen_ids_file found under {os.path.join(os.path.expanduser('~'), '.allenact')}\"\n            )\n\n        for killfile in all_files:\n            with open(killfile, \"r\") as f:\n                # Each line contains 'IP_address screen_ID'\n                nodes = [tuple(line[:-1].split(\" \")) for line in f.readlines()]\n\n            all_addresses.extend(node[0] for node in nodes)\n\n            use_addresses = \"\"\n            while use_addresses not in [\"y\", \"n\"]:\n                use_addresses = input(\n                    f\"Run on {all_addresses} from {killfile}? [Y/n] \"\n                ).lower()\n                if use_addresses == \"\":\n                    use_addresses = \"y\"\n\n            if use_addresses == \"n\":\n                all_addresses.clear()\n            else:\n                break\n\n    print(f\"Running on IP addresses {all_addresses}\")\n\n    for it, addr in enumerate(all_addresses):\n        ssh_command = f\"{args.ssh_cmd.format(addr=addr)} {wrap_single(args.command)}\"\n\n        print(f\"{it} {addr} SSH command {ssh_command}\")\n        os.system(ssh_command)\n\n    print(\"DONE\")\n"
  },
  {
    "path": "scripts/dconfig.py",
    "content": "#!/usr/bin/env python3\n\nimport os\nimport argparse\n\n\ndef get_argument_parser():\n    \"\"\"Creates the argument parser.\"\"\"\n\n    # noinspection PyTypeChecker\n    parser = argparse.ArgumentParser(\n        description=\"dconfig\",\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n    )\n\n    parser.add_argument(\n        \"--runs_on\",\n        required=True,\n        type=str,\n        help=\"Comma-separated IP addresses of machines\",\n    )\n\n    parser.add_argument(\n        \"--config_script\",\n        required=True,\n        type=str,\n        help=\"Path to bash script with configuration\",\n    )\n\n    parser.add_argument(\n        \"--ssh_cmd\",\n        required=False,\n        type=str,\n        default=\"ssh -f {addr}\",\n        help=\"SSH command. Useful to utilize a pre-shared key with 'ssh -i path/to/mykey.pem -f ubuntu@{addr}'. \"\n        \"The option `-f` should be used, since we want a non-interactive session\",\n    )\n\n    parser.add_argument(\n        \"--distribute_public_rsa_key\",\n        dest=\"distribute_public_rsa_key\",\n        action=\"store_true\",\n        required=False,\n        help=\"if you pass the `--distribute_public_rsa_key` flag, the manager node's public key will be added to the \"\n        \"authorized keys of all workers (this is necessary in default-configured EC2 instances to use \"\n        \"`scripts/dmain.py`)\",\n    )\n    parser.set_defaults(distribute_public_rsa_key=False)\n\n    return parser\n\n\ndef get_args():\n    \"\"\"Creates the argument parser and parses any input arguments.\"\"\"\n\n    parser = get_argument_parser()\n    args = parser.parse_args()\n\n    return args\n\n\ndef wrap_double(text):\n    return f'\"{text}\"'\n\n\ndef wrap_single(text):\n    return f\"'{text}'\"\n\n\ndef wrap_single_nested(text, quote=r\"'\\''\"):\n    return f\"{quote}{text}{quote}\"\n\n\nif __name__ == \"__main__\":\n    args = get_args()\n\n    all_addresses = args.runs_on.split(\",\")\n    print(f\"Running on addresses {all_addresses}\")\n\n    remote_config_script = f\"{args.config_script}.distributed\"\n    for it, addr in enumerate(all_addresses):\n        if args.distribute_public_rsa_key:\n            key_command = (\n                f\"{args.ssh_cmd.format(addr=addr)} \"\n                f\"{wrap_double('echo $(cat ~/.ssh/id_rsa.pub) >> ~/.ssh/authorized_keys')}\"\n            )\n            print(f\"Key command {key_command}\")\n            os.system(f\"{key_command}\")\n\n        scp_cmd = (\n            args.ssh_cmd.replace(\"ssh \", \"scp \")\n            .replace(\"-f\", args.config_script)\n            .format(addr=addr)\n        )\n        print(f\"SCP command {scp_cmd}:{remote_config_script}\")\n        os.system(f\"{scp_cmd}:{remote_config_script}\")\n\n        screen_name = f\"allenact_config_machine{it}\"\n        bash_command = wrap_single_nested(\n            f\"source {remote_config_script} &>> log_allenact_distributed_config\"\n        )\n        screen_command = wrap_single(\n            f\"screen -S {screen_name} -dm bash -c {bash_command}\"\n        )\n\n        ssh_command = f\"{args.ssh_cmd.format(addr=addr)} {screen_command}\"\n\n        print(f\"SSH command {ssh_command}\")\n        os.system(ssh_command)\n        print(f\"{addr} {screen_name}\")\n\n    print(\"DONE\")\n"
  },
  {
    "path": "scripts/dkill.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"Tool to terminate multi-node (distributed) training.\"\"\"\n\nimport os\nimport argparse\nimport glob\n\n\ndef get_argument_parser():\n    \"\"\"Creates the argument parser.\"\"\"\n\n    # noinspection PyTypeChecker\n    parser = argparse.ArgumentParser(\n        description=\"dkill\",\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n    )\n\n    parser.add_argument(\n        \"--screen_ids_file\",\n        required=False,\n        type=str,\n        default=None,\n        help=\"Path to file generated by dmain.py with IPs and screen ids for nodes running process.\"\n        \" If empty, the tool will scan the `~/.allenact` directory for `screen_ids_file`s.\",\n    )\n\n    parser.add_argument(\n        \"--ssh_cmd\",\n        required=False,\n        type=str,\n        default=\"ssh {addr}\",\n        help=\"SSH command. Useful to utilize a pre-shared key with 'ssh -i mykey.pem ubuntu@{addr}'. \",\n    )\n\n    return parser\n\n\ndef get_args():\n    \"\"\"Creates the argument parser and parses any input arguments.\"\"\"\n\n    parser = get_argument_parser()\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == \"__main__\":\n    args = get_args()\n\n    all_files = (\n        [args.screen_ids_file]\n        if args.screen_ids_file is not None\n        else sorted(\n            glob.glob(os.path.join(os.path.expanduser(\"~\"), \".allenact\", \"*.killfile\")),\n            reverse=True,\n        )\n    )\n\n    if len(all_files) == 0:\n        print(\n            f\"No screen_ids_file found under {os.path.join(os.path.expanduser('~'), '.allenact')}\"\n        )\n\n    for killfile in all_files:\n        with open(killfile, \"r\") as f:\n            nodes = [tuple(line[:-1].split(\" \")) for line in f.readlines()]\n\n        do_kill = \"\"\n        while do_kill not in [\"y\", \"n\"]:\n            do_kill = input(\n                f\"Stopping processes on {nodes} from {killfile}? [y/N] \"\n            ).lower()\n            if do_kill == \"\":\n                do_kill = \"n\"\n\n        if do_kill == \"y\":\n            for it, node in enumerate(nodes):\n                addr, screen_name = node\n\n                print(f\"Killing screen {screen_name} on {addr}\")\n\n                ssh_command = (\n                    f\"{args.ssh_cmd.format(addr=addr)} '\"\n                    f\"screen -S {screen_name} -p 0 -X quit ; \"\n                    f\"sleep 1 ; \"\n                    f\"echo Master processes left running: ; \"\n                    f\"ps aux | grep Master: | grep -v grep ; \"\n                    f\"echo ; \"\n                    f\"'\"\n                )\n\n                # print(f\"SSH command {ssh_command}\")\n                os.system(ssh_command)\n\n            do_delete = \"\"\n            while do_delete not in [\"y\", \"n\"]:\n                do_delete = input(f\"Delete file {killfile}? [y/N] \").lower()\n                if do_delete == \"\":\n                    do_delete = \"n\"\n\n            if do_delete == \"y\":\n                os.system(f\"rm {killfile}\")\n                print(f\"Deleted {killfile}\")\n\n    print(\"DONE\")\n"
  },
  {
    "path": "scripts/dmain.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"Entry point to multi-node (distributed) training for a user given experiment\nname.\"\"\"\n\nimport os\nimport random\nimport string\nimport subprocess\nimport sys\nimport time\nfrom pathlib import Path\nfrom typing import Optional\n\n# Add to PYTHONPATH the path of the parent directory of the current file's directory\nsys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(Path(__file__)))))\n\nfrom allenact.main import get_argument_parser as get_main_arg_parser\nfrom allenact.utils.system import init_logging, get_logger\nfrom constants import ABS_PATH_OF_TOP_LEVEL_DIR\n\n\ndef get_argument_parser():\n    \"\"\"Creates the argument parser.\"\"\"\n\n    parser = get_main_arg_parser()\n    parser.description = f\"distributed {parser.description}\"\n\n    parser.add_argument(\n        \"--runs_on\",\n        required=True,\n        type=str,\n        help=\"Comma-separated IP addresses of machines\",\n    )\n\n    parser.add_argument(\n        \"--ssh_cmd\",\n        required=False,\n        type=str,\n        default=\"ssh -f {addr}\",\n        help=\"SSH command. Useful to utilize a pre-shared key with 'ssh -i mykey.pem -f ubuntu@{addr}'. \"\n        \"The option `-f` should be used for non-interactive session\",\n    )\n\n    parser.add_argument(\n        \"--env_activate_path\",\n        required=True,\n        type=str,\n        help=\"Path to the virtual environment's `activate` script. It must be the same across all machines\",\n    )\n\n    parser.add_argument(\n        \"--allenact_path\",\n        required=False,\n        type=str,\n        default=\"allenact\",\n        help=\"Path to allenact top directory. It must be the same across all machines\",\n    )\n\n    # Required distributed_ip_and_port\n    idx = [a.dest for a in parser._actions].index(\"distributed_ip_and_port\")\n    parser._actions[idx].required = True\n\n    return parser\n\n\ndef get_args():\n    \"\"\"Creates the argument parser and parses any input arguments.\"\"\"\n\n    parser = get_argument_parser()\n    args = parser.parse_args()\n\n    return args\n\n\ndef get_raw_args():\n    raw_args = sys.argv[1:]\n    filtered_args = []\n    remove: Optional[str] = None\n    enclose_in_quotes: Optional[str] = None\n    for arg in raw_args:\n        if remove is not None:\n            remove = None\n        elif enclose_in_quotes is not None:\n            # Within backslash expansion: close former single, open double, create single, close double, reopen single\n            inner_quote = r\"\\'\\\"\\'\\\"\\'\"\n            # Convert double quotes into backslash double for later expansion\n            filtered_args.append(\n                inner_quote + arg.replace('\"', r\"\\\"\").replace(\"'\", r\"\\\"\") + inner_quote\n            )\n            enclose_in_quotes = None\n        elif arg in [\n            \"--runs_on\",\n            \"--ssh_cmd\",\n            \"--env_activate_path\",\n            \"--allenact_path\",\n            \"--extra_tag\",\n            \"--machine_id\",\n        ]:\n            remove = arg\n        elif arg == \"--config_kwargs\":\n            enclose_in_quotes = arg\n            filtered_args.append(arg)\n        else:\n            filtered_args.append(arg)\n    return filtered_args\n\n\ndef wrap_single(text):\n    return f\"'{text}'\"\n\n\ndef wrap_single_nested(text):\n    # Close former single, start backslash expansion (via $), create new single quote for expansion:\n    quote_enter = r\"'$'\\'\"\n    # New closing single quote for expansion, close backslash expansion, reopen former single:\n    quote_leave = r\"\\'''\"\n    return f\"{quote_enter}{text}{quote_leave}\"\n\n\ndef wrap_double(text):\n    return f'\"{text}\"'\n\n\ndef id_generator(size=4, chars=string.ascii_uppercase + string.digits):\n    return \"\".join(random.choice(chars) for _ in range(size))\n\n\n# Assume we can ssh into each of the `runs_on` machines through port 22\nif __name__ == \"__main__\":\n    # Tool must be called from AllenAct project's root directory\n    cwd = os.path.abspath(os.getcwd())\n    assert cwd == ABS_PATH_OF_TOP_LEVEL_DIR, (\n        f\"`dmain.py` called from {cwd}.\"\n        f\"\\nIt should be called from AllenAct's top level directory {ABS_PATH_OF_TOP_LEVEL_DIR}.\"\n    )\n\n    args = get_args()\n\n    init_logging(args.log_level)\n\n    raw_args = get_raw_args()\n\n    if args.seed is None:\n        seed = random.randint(0, 2**31 - 1)\n        raw_args.extend([\"-s\", f\"{seed}\"])\n        get_logger().info(f\"Using random seed {seed} in all workers (none was given)\")\n\n    all_addresses = args.runs_on.split(\",\")\n    get_logger().info(f\"Running on IP addresses {all_addresses}\")\n\n    assert args.distributed_ip_and_port.split(\":\")[0] in all_addresses, (\n        f\"Missing listener IP address {args.distributed_ip_and_port.split(':')[0]}\"\n        f\" in list of worker addresses {all_addresses}\"\n    )\n\n    time_str = time.strftime(\"%Y-%m-%d_%H-%M-%S\", time.localtime(time.time()))\n\n    global_job_id = id_generator()\n    killfilename = os.path.join(\n        os.path.expanduser(\"~\"), \".allenact\", f\"{time_str}_{global_job_id}.killfile\"\n    )\n    os.makedirs(os.path.dirname(killfilename), exist_ok=True)\n\n    code_src = \".\"\n\n    with open(killfilename, \"w\") as killfile:\n        for it, addr in enumerate(all_addresses):\n            code_tget = f\"{addr}:{args.allenact_path}/\"\n            get_logger().info(f\"rsync {code_src} to {code_tget}\")\n            os.system(f\"rsync -rz {code_src} {code_tget}\")\n\n            job_id = id_generator()\n\n            command = \" \".join(\n                [\"python\", \"main.py\"]\n                + raw_args\n                + [\n                    \"--extra_tag\",\n                    f\"{args.extra_tag}{'__' if len(args.extra_tag) > 0 else ''}machine{it}\",\n                ]\n                + [\"--machine_id\", f\"{it}\"]\n            )\n\n            logfile = (\n                f\"{args.output_dir}/log_{time_str}_{global_job_id}_{job_id}_machine{it}\"\n            )\n\n            env_and_command = wrap_single_nested(\n                f\"for NCCL_SOCKET_IFNAME in $(route | grep default) ; do : ; done && export NCCL_SOCKET_IFNAME\"\n                f\" && cd {args.allenact_path}\"\n                f\" && mkdir -p {args.output_dir}\"\n                f\" && source {args.env_activate_path} &>> {logfile}\"\n                f\" && echo pwd=$(pwd) &>> {logfile}\"\n                f\" && echo output_dir={args.output_dir} &>> {logfile}\"\n                f\" && echo python_version=$(python --version) &>> {logfile}\"\n                f\" && echo python_path=$(which python) &>> {logfile}\"\n                f\" && set | grep NCCL_SOCKET_IFNAME &>> {logfile}\"\n                f\" && echo &>> {logfile}\"\n                f\" && {command} &>> {logfile}\"\n            )\n\n            screen_name = f\"allenact_{time_str}_{global_job_id}_{job_id}_machine{it}\"\n            screen_command = wrap_single(\n                f\"screen -S {screen_name} -dm bash -c {env_and_command}\"\n            )\n\n            ssh_command = f\"{args.ssh_cmd.format(addr=addr)} {screen_command}\"\n\n            get_logger().debug(f\"SSH command {ssh_command}\")\n            subprocess.run(ssh_command, shell=True, executable=\"/bin/bash\")\n            get_logger().info(f\"{addr} {screen_name}\")\n\n            killfile.write(f\"{addr} {screen_name}\\n\")\n\n    get_logger().info(\"\")\n    get_logger().info(f\"Running screen ids saved to {killfilename}\")\n    get_logger().info(\"\")\n\n    get_logger().info(\"DONE\")\n"
  },
  {
    "path": "scripts/literate.py",
    "content": "\"\"\"Helper functions used to create literate documentation from python files.\"\"\"\n\nimport importlib\nimport inspect\nimport os\nfrom typing import Optional, Sequence, List, cast\n\nfrom typing.io import TextIO\n\nfrom constants import ABS_PATH_OF_DOCS_DIR, ABS_PATH_OF_TOP_LEVEL_DIR\n\n\ndef get_literate_output_path(file: TextIO) -> Optional[str]:\n    for l in file:\n        l = l.strip()\n        if l != \"\":\n            if l.lower().startswith((\"# literate\", \"#literate\")):\n                parts = l.split(\":\")\n                if len(parts) == 1:\n                    assert (\n                        file.name[-3:].lower() == \".py\"\n                    ), \"Can only run literate on python (*.py) files.\"\n                    return file.name[:-3] + \".md\"\n                elif len(parts) == 2:\n                    rel_outpath = parts[1].strip()\n                    outpath = os.path.abspath(\n                        os.path.join(ABS_PATH_OF_DOCS_DIR, rel_outpath)\n                    )\n                    assert outpath.startswith(\n                        ABS_PATH_OF_DOCS_DIR\n                    ), f\"Path {outpath} is not allowed, must be within {ABS_PATH_OF_DOCS_DIR}.\"\n                    return outpath\n                else:\n                    raise NotImplementedError(\n                        f\"Line '{l}' is not of the correct format.\"\n                    )\n            else:\n                return None\n    return None\n\n\ndef source_to_markdown(dot_path: str, summarize: bool = False):\n    importlib.invalidate_caches()\n    module_path, obj_name = \".\".join(dot_path.split(\".\")[:-1]), dot_path.split(\".\")[-1]\n    module = importlib.import_module(module_path)\n    obj = getattr(module, obj_name)\n    source = inspect.getsource(obj)\n\n    if not summarize:\n        return source\n    elif inspect.isclass(obj):\n        lines = source.split(\"\\n\")\n        newlines = [lines[0]]\n        whitespace_len = float(\"inf\")\n        k = 1\n        started = False\n        while k < len(lines):\n            l = lines[k]\n            lstripped = l.lstrip()\n            if started:\n                newlines.append(l)\n                started = \"):\" not in l and \"->\" not in l\n                if not started:\n                    newlines.append(l[: cast(int, whitespace_len)] + \"    ...\\n\")\n\n            if (\n                l.lstrip().startswith(\"def \")\n                and len(l) - len(lstripped) <= whitespace_len\n            ):\n                whitespace_len = len(l) - len(lstripped)\n                newlines.append(l)\n                started = \"):\" not in l and \"->\" not in l\n                if not started:\n                    newlines.append(l[:whitespace_len] + \"    ...\\n\")\n            k += 1\n        return \"\\n\".join(newlines).strip()\n    elif inspect.isfunction(obj):\n        return source.split(\"\\n\")[0] + \"\\n    ...\"\n    else:\n        return\n\n\ndef _strip_empty_lines(lines: Sequence[str]) -> List[str]:\n    lines = list(lines)\n    if len(lines) == 0:\n        return lines\n\n    for i in range(len(lines)):\n        if lines[i].strip() != \"\":\n            lines = lines[i:]\n            break\n\n    for i in reversed(list(range(len(lines)))):\n        if lines[i].strip() != \"\":\n            lines = lines[: i + 1]\n            break\n    return lines\n\n\ndef literate_python_to_markdown(path: str) -> bool:\n    assert path[-3:].lower() == \".py\", \"Can only run literate on python (*.py) files.\"\n\n    with open(path, \"r\") as file:\n        output_path = get_literate_output_path(file)\n\n        if output_path is None:\n            return False\n\n        output_lines = [\n            f\"<!-- DO NOT EDIT THIS FILE. --> \",\n            f\"<!-- THIS FILE WAS AUTOGENERATED FROM\"\n            f\" 'ALLENACT_BASE_DIR/{os.path.relpath(path, ABS_PATH_OF_TOP_LEVEL_DIR)}', EDIT IT INSTEAD. -->\\n\",\n        ]\n        md_lines: List[str] = []\n        code_lines = md_lines\n\n        lines = file.readlines()\n        mode = None\n\n        for line in lines:\n            line = line.rstrip()\n            stripped_line = line.strip()\n            if (mode is None or mode == \"change\") and line.strip() == \"\":\n                continue\n\n            if mode == \"markdown\":\n                if stripped_line in ['\"\"\"', \"'''\"]:\n                    output_lines.extend(_strip_empty_lines(md_lines) + [\"\"])\n                    md_lines.clear()\n                    mode = None\n                elif stripped_line.endswith(('\"\"\"', \"'''\")):\n                    output_lines.extend(\n                        _strip_empty_lines(md_lines) + [stripped_line[:-3]]\n                    )\n                    md_lines.clear()\n                    mode = None\n                    # TODO: Does not account for the case where a string is ended with a comment.\n                else:\n                    md_lines.append(line.strip())\n            elif stripped_line.startswith((\"# %%\", \"#%%\")):\n                last_mode = mode\n                mode = \"change\"\n                if last_mode == \"code\":\n                    output_lines.extend(\n                        [\"```python\"] + _strip_empty_lines(code_lines) + [\"```\"]\n                    )\n                    code_lines.clear()\n\n                if \" import \" in stripped_line:\n                    path = stripped_line.split(\" import \")[-1].strip()\n                    output_lines.append(\n                        \"```python\\n\" + source_to_markdown(path) + \"\\n```\"\n                    )\n                elif \" import_summary \" in stripped_line:\n                    path = stripped_line.split(\" import_summary \")[-1].strip()\n                    output_lines.append(\n                        \"```python\\n\"\n                        + source_to_markdown(path, summarize=True)\n                        + \"\\n```\"\n                    )\n                elif \" hide\" in stripped_line:\n                    mode = \"hide\"\n            elif mode == \"hide\":\n                continue\n            elif mode == \"change\":\n                if stripped_line.startswith(('\"\"\"', \"'''\")):\n                    mode = \"markdown\"\n                    if len(stripped_line) != 3:\n                        if stripped_line.endswith(('\"\"\"', \"'''\")):\n                            output_lines.append(stripped_line[3:-3])\n                            mode = \"change\"\n                        else:\n                            output_lines.append(stripped_line[3:])\n                else:\n                    mode = \"code\"\n                    code_lines.append(line)\n            elif mode == \"code\":\n                code_lines.append(line)\n            else:\n                raise NotImplementedError(\n                    f\"mode {mode} is not implemented. Last 5 lines: \"\n                    + \"\\n\".join(output_lines[-5:])\n                )\n\n        if mode == \"code\" and len(code_lines) != 0:\n            output_lines.extend(\n                [\"```python\"] + _strip_empty_lines(code_lines) + [\"```\"]\n            )\n\n    with open(output_path, \"w\") as f:\n        f.writelines([l + \"\\n\" for l in output_lines])\n\n    return True\n\n\nif __name__ == \"__main__\":\n    # print(\n    #     source_to_markdown(\n    #         \"allenact_plugins.minigrid_plugin.minigrid_offpolicy.ExpertTrajectoryIterator\",\n    #         True\n    #     )\n    # )\n\n    literate_python_to_markdown(\n        os.path.join(\n            ABS_PATH_OF_TOP_LEVEL_DIR,\n            \"projects/tutorials/training_a_pointnav_model.py\",\n        )\n    )\n"
  },
  {
    "path": "scripts/release.py",
    "content": "import os\nimport sys\nfrom pathlib import Path\nfrom subprocess import getoutput\n\n\ndef make_package(name, verbose=False):\n    \"\"\"Prepares sdist for allenact or allenact_plugins.\"\"\"\n\n    orig_dir = os.getcwd()\n    base_dir = os.path.join(os.path.abspath(os.path.dirname(Path(__file__))), \"..\")\n    os.chdir(base_dir)\n\n    with open(\".VERSION\", \"r\") as f:\n        __version__ = f.readline().strip()\n\n    # generate sdist via setuptools\n    output = getoutput(f\"{sys.executable} {name}/setup.py sdist\")\n    if verbose:\n        print(output)\n\n    os.chdir(os.path.join(base_dir, \"dist\"))\n\n    # uncompress the tar.gz sdist\n    output = getoutput(f\"tar zxvf {name}-{__version__}.tar.gz\")\n    if verbose:\n        print(output)\n\n    # copy setup.py to the top level of the package (required by pip install)\n    output = getoutput(\n        f\"cp {name}-{__version__}/{name}/setup.py {name}-{__version__}/setup.py\"\n    )\n    if verbose:\n        print(output)\n\n    # create new source file with version\n    getoutput(\n        f\"printf '__version__ = \\\"{__version__}\\\"\\n' >> {name}-{__version__}/{name}/_version.py\"\n    )\n    # include it in sources\n    getoutput(\n        f'printf \"\\n{name}/_version.py\" >> {name}-{__version__}/{name}.egg-info/SOURCES.txt'\n    )\n\n    # recompress tar.gz\n    output = getoutput(f\"tar zcvf {name}-{__version__}.tar.gz {name}-{__version__}/\")\n    if verbose:\n        print(output)\n\n    # remove temporary directory\n    output = getoutput(f\"rm -r {name}-{__version__}\")\n    if verbose:\n        print(output)\n\n    os.chdir(orig_dir)\n\n\nif __name__ == \"__main__\":\n    verbose = False\n    make_package(\"allenact\", verbose)\n    make_package(\"allenact_plugins\", verbose)\n"
  },
  {
    "path": "scripts/run_tests.sh",
    "content": "#!/usr/bin/env bash\n\necho RUNNING PYTEST WITH COVERAGE\npipenv run coverage run -m --source=. pytest tests/\necho DONE\necho \"\"\n\necho GENERATING COVERAGE HTML\ncoverage html\necho HTML GENERATED\n\nif [ \"$(uname)\" == \"Darwin\" ]; then\n    echo OPENING COVERAGE INFO\n    open htmlcov/index.html\nfi"
  },
  {
    "path": "scripts/startx.py",
    "content": "import atexit\nimport os\nimport platform\nimport re\nimport shlex\nimport subprocess\nimport tempfile\n\n\n# Turning off automatic black formatting for this script as it breaks quotes.\n\n# fmt: off\n\ndef pci_records():\n    records = []\n    command = shlex.split(\"lspci -vmm\")\n    output = subprocess.check_output(command).decode()\n\n    for devices in output.strip().split(\"\\n\\n\"):\n        record = {}\n        records.append(record)\n        for row in devices.split(\"\\n\"):\n            key, value = row.split(\"\\t\")\n            record[key.split(\":\")[0]] = value\n\n    return records\n\ndef generate_xorg_conf(devices):\n    xorg_conf = []\n\n    device_section = \"\"\"\nSection \"Device\"\n    Identifier     \"Device{device_id}\"\n    Driver         \"nvidia\"\n    VendorName     \"NVIDIA Corporation\"\n    BusID          \"{bus_id}\"\nEndSection\n\"\"\"\n    server_layout_section = \"\"\"\nSection \"ServerLayout\"\n    Identifier     \"Layout0\"\n    {screen_records}\nEndSection\n\"\"\"\n    screen_section = \"\"\"\nSection \"Screen\"\n    Identifier     \"Screen{screen_id}\"\n    Device         \"Device{device_id}\"\n    DefaultDepth    24\n    Option         \"AllowEmptyInitialConfiguration\" \"True\"\n    SubSection     \"Display\"\n        Depth       24\n        Virtual 1024 768\n    EndSubSection\nEndSection\n\"\"\"\n    screen_records = []\n    for i, bus_id in enumerate(devices):\n        xorg_conf.append(device_section.format(device_id=i, bus_id=bus_id))\n        xorg_conf.append(screen_section.format(device_id=i, screen_id=i))\n        screen_records.append('Screen {screen_id} \"Screen{screen_id}\" 0 0'.format(screen_id=i))\n    \n    xorg_conf.append(server_layout_section.format(screen_records=\"\\n    \".join(screen_records)))\n\n    output =  \"\\n\".join(xorg_conf)\n    return output\n\ndef startx(display=0):\n    if platform.system() != \"Linux\":\n        raise Exception(\"Can only run startx on linux\")\n\n    devices = []\n    for r in pci_records():\n        if r.get(\"Vendor\", \"\") == \"NVIDIA Corporation\"\\\n                and r[\"Class\"] in [\"VGA compatible controller\", \"3D controller\"]:\n            bus_id = \"PCI:\" + \":\".join(map(lambda x: str(int(x, 16)), re.split(r\"[:\\.]\", r[\"Slot\"])))\n            devices.append(bus_id)\n\n    if not devices:\n        raise Exception(\"no nvidia cards found\")\n\n    fd = None\n    path = None\n    try:\n        fd, path = tempfile.mkstemp()\n        with open(path, \"w\") as f:\n            f.write(generate_xorg_conf(devices))\n        command = shlex.split(\"Xorg -noreset +extension GLX +extension RANDR +extension RENDER -config %s :%s\" % (path, display))\n        proc = subprocess.Popen(command)\n        atexit.register(lambda: proc.poll() is None and proc.kill())\n        proc.wait()\n    finally:\n        if fd is not None:\n            os.close(fd)\n            os.unlink(path)\n\n# fmt: on\n\n\nif __name__ == \"__main__\":\n    startx()\n"
  },
  {
    "path": "tests/.gitignore",
    "content": "tmp\n.DS_Store\n!.py\n!.gitignore\n"
  },
  {
    "path": "tests/__init__.py",
    "content": ""
  },
  {
    "path": "tests/hierarchical_policies/__init__.py",
    "content": ""
  },
  {
    "path": "tests/hierarchical_policies/test_minigrid_conditional.py",
    "content": "import os\nfrom tempfile import mkdtemp\nfrom typing import Dict, Optional, List, Any, cast\n\nimport gym\nfrom gym_minigrid.envs import EmptyRandomEnv5x5\nfrom torch import nn\nfrom torch import optim\nfrom torch.optim.lr_scheduler import LambdaLR\n\nfrom allenact.algorithms.onpolicy_sync.losses.imitation import Imitation\nfrom allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig\nfrom allenact.algorithms.onpolicy_sync.runner import OnPolicyRunner\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler\nfrom allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor\nfrom allenact.utils.experiment_utils import (\n    TrainingPipeline,\n    Builder,\n    PipelineStage,\n    LinearDecay,\n)\nfrom allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor\nfrom allenact_plugins.minigrid_plugin.minigrid_tasks import MiniGridTaskSampler\nfrom projects.tutorials.minigrid_tutorial_conds import (\n    ConditionedMiniGridSimpleConvRNN,\n    ConditionedMiniGridTask,\n)\n\n\nclass MiniGridCondTestExperimentConfig(ExperimentConfig):\n    @classmethod\n    def tag(cls) -> str:\n        return \"MiniGridCondTest\"\n\n    SENSORS = [\n        EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),\n        ExpertActionSensor(\n            action_space=gym.spaces.Dict(\n                higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)\n            )\n        ),\n    ]\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return ConditionedMiniGridSimpleConvRNN(\n            action_space=gym.spaces.Dict(\n                higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)\n            ),\n            observation_space=SensorSuite(cls.SENSORS).observation_spaces,\n            num_objects=cls.SENSORS[0].num_objects,\n            num_colors=cls.SENSORS[0].num_colors,\n            num_states=cls.SENSORS[0].num_states,\n        )\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return MiniGridTaskSampler(**kwargs)\n\n    def train_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"train\")\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"valid\")\n\n    def test_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        return self._get_sampler_args(process_ind=process_ind, mode=\"test\")\n\n    def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:\n        \"\"\"Generate initialization arguments for train, valid, and test\n        TaskSamplers.\n\n        # Parameters\n        process_ind : index of the current task sampler\n        mode:  one of `train`, `valid`, or `test`\n        \"\"\"\n        if mode == \"train\":\n            max_tasks = None  # infinite training tasks\n            task_seeds_list = None  # no predefined random seeds for training\n            deterministic_sampling = False  # randomly sample tasks in training\n        else:\n            max_tasks = 20 + 20 * (\n                mode == \"test\"\n            )  # 20 tasks for valid, 40 for test (per sampler)\n\n            # one seed for each task to sample:\n            # - ensures different seeds for each sampler, and\n            # - ensures a deterministic set of sampled tasks.\n            task_seeds_list = list(\n                range(process_ind * max_tasks, (process_ind + 1) * max_tasks)\n            )\n\n            deterministic_sampling = (\n                True  # deterministically sample task in validation/testing\n            )\n\n        return dict(\n            max_tasks=max_tasks,  # see above\n            env_class=self.make_env,  # builder for third-party environment (defined below)\n            sensors=self.SENSORS,  # sensors used to return observations to the agent\n            env_info=dict(),  # parameters for environment builder (none for now)\n            task_seeds_list=task_seeds_list,  # see above\n            deterministic_sampling=deterministic_sampling,  # see above\n            task_class=ConditionedMiniGridTask,\n        )\n\n    @staticmethod\n    def make_env(*args, **kwargs):\n        return EmptyRandomEnv5x5()\n\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        return {\n            \"nprocesses\": 4 if mode == \"train\" else 1,\n            \"devices\": [],\n        }\n\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        ppo_steps = int(512)\n        return TrainingPipeline(\n            named_losses=dict(\n                imitation_loss=Imitation(\n                    cls.SENSORS[1]\n                ),  # 0 is Minigrid, 1 is ExpertActionSensor\n                ppo_loss=PPO(**PPOConfig, entropy_method_name=\"conditional_entropy\"),\n            ),  # type:ignore\n            pipeline_stages=[\n                PipelineStage(\n                    teacher_forcing=LinearDecay(\n                        startp=1.0,\n                        endp=0.0,\n                        steps=ppo_steps // 2,\n                    ),\n                    loss_names=[\"imitation_loss\", \"ppo_loss\"],\n                    max_stage_steps=ppo_steps,\n                )\n            ],\n            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),\n            num_mini_batch=4,\n            update_repeats=3,\n            max_grad_norm=0.5,\n            num_steps=16,\n            gamma=0.99,\n            use_gae=True,\n            gae_lambda=0.95,\n            advance_scene_rollout_period=None,\n            save_interval=10000,\n            metric_accumulate_interval=1,\n            lr_scheduler_builder=Builder(\n                LambdaLR, {\"lr_lambda\": LinearDecay(steps=ppo_steps)}  # type:ignore\n            ),\n        )\n\n\nclass TestMiniGridCond:\n    def test_train(self, tmpdir):\n        cfg = MiniGridCondTestExperimentConfig()\n        train_runner = OnPolicyRunner(\n            config=cfg,\n            output_dir=tmpdir,\n            loaded_config_src_files=None,\n            seed=12345,\n            mode=\"train\",\n            deterministic_cudnn=False,\n            deterministic_agents=False,\n            extra_tag=\"\",\n            disable_tensorboard=True,\n            disable_config_saving=True,\n        )\n        start_time_str, valid_results = train_runner.start_train(\n            checkpoint=None,\n            restart_pipeline=False,\n            max_sampler_processes_per_worker=1,\n            collect_valid_results=True,\n        )\n        assert len(valid_results) > 0\n\n        test_runner = OnPolicyRunner(\n            config=cfg,\n            output_dir=tmpdir,\n            loaded_config_src_files=None,\n            seed=12345,\n            mode=\"test\",\n            deterministic_cudnn=False,\n            deterministic_agents=False,\n            extra_tag=\"\",\n            disable_tensorboard=True,\n            disable_config_saving=True,\n        )\n        test_results = test_runner.start_test(\n            checkpoint_path_dir_or_pattern=os.path.join(\n                tmpdir, \"checkpoints\", \"**\", start_time_str, \"*.pt\"\n            ),\n            max_sampler_processes_per_worker=1,\n            inference_expert=True,\n        )\n        assert test_results[-1][\"test-metrics/ep_length\"] < 4\n\n\nif __name__ == \"__main__\":\n    TestMiniGridCond().test_train(mkdtemp())  # type:ignore\n"
  },
  {
    "path": "tests/manipulathor_plugin/__init__.py",
    "content": ""
  },
  {
    "path": "tests/manipulathor_plugin/test_utils.py",
    "content": "from allenact_plugins.manipulathor_plugin.arm_calculation_utils import (\n    world_coords_to_agent_coords,\n)\n\n\nclass TestArmCalculationUtils(object):\n    def test_translation_functions(self):\n        agent_coordinate = {\n            \"position\": {\"x\": 1, \"y\": 0, \"z\": 2},\n            \"rotation\": {\"x\": 0, \"y\": -45, \"z\": 0},\n        }\n        obj_coordinate = {\n            \"position\": {\"x\": 0, \"y\": 1, \"z\": 0},\n            \"rotation\": {\"x\": 0, \"y\": 0, \"z\": 0},\n        }\n        rotated = world_coords_to_agent_coords(obj_coordinate, agent_coordinate)\n        eps = 0.01\n        assert (\n            abs(rotated[\"position\"][\"x\"] - (-2.12)) < eps\n            and abs(rotated[\"position\"][\"y\"] - (1.0)) < eps\n            and abs(rotated[\"position\"][\"z\"] - (-0.70)) < eps\n        )\n\n\nif __name__ == \"__main__\":\n    TestArmCalculationUtils().test_translation_functions()\n"
  },
  {
    "path": "tests/mapping/__init__.py",
    "content": ""
  },
  {
    "path": "tests/mapping/test_ai2thor_mapping.py",
    "content": "import os\nimport platform\nimport random\nimport sys\nimport urllib\nimport urllib.request\nimport warnings\nfrom collections import defaultdict\n\n# noinspection PyUnresolvedReferences\nfrom tempfile import mkdtemp\nfrom typing import Dict, List, Tuple, cast\n\n# noinspection PyUnresolvedReferences\nimport ai2thor\n\n# noinspection PyUnresolvedReferences\nimport ai2thor.wsgi_server\nimport compress_pickle\nimport numpy as np\nimport torch\n\nfrom allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage\nfrom allenact.base_abstractions.misc import Memory, ActorCriticOutput\nfrom allenact.embodiedai.mapping.mapping_utils.map_builders import SemanticMapBuilder\nfrom allenact.utils.experiment_utils import set_seed\nfrom allenact.utils.system import get_logger\nfrom allenact.utils.tensor_utils import batch_observations\nfrom allenact_plugins.ithor_plugin.ithor_sensors import (\n    RelativePositionChangeTHORSensor,\n    ReachableBoundsTHORSensor,\n    BinnedPointCloudMapTHORSensor,\n    SemanticMapTHORSensor,\n)\nfrom allenact_plugins.ithor_plugin.ithor_util import get_open_x_displays\nfrom allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor\nfrom constants import ABS_PATH_OF_TOP_LEVEL_DIR\n\n\nclass TestAI2THORMapSensors(object):\n    def setup_path_for_use_with_rearrangement_project(self) -> bool:\n        if platform.system() != \"Darwin\" and len(get_open_x_displays()) == 0:\n            wrn_msg = \"Cannot run tests as there seem to be no open displays!\"\n            warnings.warn(wrn_msg)\n            get_logger().warning(wrn_msg)\n            return False\n\n        os.chdir(ABS_PATH_OF_TOP_LEVEL_DIR)\n        sys.path.append(\n            os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, \"projects/ithor_rearrangement\")\n        )\n        try:\n            import rearrange\n        except ImportError:\n            wrn_msg = (\n                \"Could not import `rearrange`. Is it possible you have\"\n                \" not initialized the submodules (i.e. by running\"\n                \" `git submodule init; git submodule update;`)?\"\n            )\n            warnings.warn(wrn_msg)\n            get_logger().warning(wrn_msg)\n            return False\n\n        return True\n\n    def test_binned_and_semantic_mapping(self, tmpdir):\n        try:\n            if not self.setup_path_for_use_with_rearrangement_project():\n                return\n\n            from baseline_configs.rearrange_base import RearrangeBaseExperimentConfig\n            from baseline_configs.walkthrough.walkthrough_rgb_base import (\n                WalkthroughBaseExperimentConfig,\n            )\n            from rearrange.constants import (\n                FOV,\n                PICKUPABLE_OBJECTS,\n                OPENABLE_OBJECTS,\n            )\n            from datagen.datagen_utils import get_scenes\n\n            ORDERED_OBJECT_TYPES = list(sorted(PICKUPABLE_OBJECTS + OPENABLE_OBJECTS))\n\n            map_range_sensor = ReachableBoundsTHORSensor(margin=1.0)\n            map_info = dict(\n                map_range_sensor=map_range_sensor,\n                vision_range_in_cm=40 * 5,\n                map_size_in_cm=1050,\n                resolution_in_cm=5,\n            )\n            map_sensors = [\n                RelativePositionChangeTHORSensor(),\n                map_range_sensor,\n                DepthSensorThor(\n                    height=224,\n                    width=224,\n                    use_normalization=False,\n                    uuid=\"depth\",\n                ),\n                BinnedPointCloudMapTHORSensor(\n                    fov=FOV,\n                    ego_only=False,\n                    **map_info,\n                ),\n                SemanticMapTHORSensor(\n                    fov=FOV,\n                    ego_only=False,\n                    ordered_object_types=ORDERED_OBJECT_TYPES,\n                    **map_info,\n                ),\n            ]\n            all_sensors = [*WalkthroughBaseExperimentConfig.SENSORS, *map_sensors]\n\n            open_x_displays = []\n            try:\n                open_x_displays = get_open_x_displays()\n            except (AssertionError, IOError):\n                pass\n            walkthrough_task_sampler = WalkthroughBaseExperimentConfig.make_sampler_fn(\n                stage=\"train\",\n                sensors=all_sensors,\n                scene_to_allowed_rearrange_inds={s: [0] for s in get_scenes(\"train\")},\n                force_cache_reset=True,\n                allowed_scenes=None,\n                seed=1,\n                x_display=open_x_displays[0] if len(open_x_displays) != 0 else None,\n                thor_controller_kwargs={\n                    **RearrangeBaseExperimentConfig.THOR_CONTROLLER_KWARGS,\n                    # \"server_class\": ai2thor.wsgi_server.WsgiServer,  # Only for debugging\n                },\n            )\n\n            targets_path = os.path.join(tmpdir, \"rearrange_mapping_examples.pkl.gz\")\n            urllib.request.urlretrieve(\n                \"https://ai2-prior-allenact-public-test.s3-us-west-2.amazonaws.com/ai2thor_mapping/rearrange_mapping_examples.pkl.gz\",\n                targets_path,\n            )\n            goal_obs_dict = compress_pickle.load(targets_path)\n\n            def compare_recursive(obs, goal_obs, key_list: List):\n                if isinstance(obs, Dict):\n                    for k in goal_obs:\n                        compare_recursive(\n                            obs=obs[k], goal_obs=goal_obs[k], key_list=key_list + [k]\n                        )\n                elif isinstance(obs, (List, Tuple)):\n                    for i in range(len(goal_obs)):\n                        compare_recursive(\n                            obs=obs[i], goal_obs=goal_obs[i], key_list=key_list + [i]\n                        )\n                else:\n                    # Should be a numpy array at this point\n                    assert isinstance(obs, np.ndarray) and isinstance(\n                        goal_obs, np.ndarray\n                    ), f\"After {key_list}, not numpy arrays, obs={obs}, goal_obs={goal_obs}\"\n\n                    obs = 1.0 * obs\n                    goal_obs = 1.0 * goal_obs\n\n                    goal_where_nan = np.isnan(goal_obs)\n                    obs_where_nan = np.isnan(obs)\n\n                    where_nan_not_equal = (goal_where_nan != obs_where_nan).sum()\n                    # assert (\n                    #     where_nan_not_equal.sum() <= 1\n                    #     and where_nan_not_equal.mean() < 1e3\n                    # )\n\n                    where_nan = np.logical_or(goal_where_nan, obs_where_nan)\n                    obs[where_nan] = 0.0\n                    goal_obs[where_nan] = 0.0\n\n                    def special_mean(v):\n                        while len(v.shape) > 2:\n                            v = v.sum(-1)\n                        return v.mean()\n\n                    numer = np.abs(obs - goal_obs)\n                    denom = np.abs(\n                        np.stack((obs, goal_obs, np.ones_like(obs)), axis=0)\n                    ).max(0)\n                    difference = special_mean(numer / denom)\n                    # assert (\n                    #     difference < 1.2e-3\n                    # ), f\"Difference of {np.abs(obs - goal_obs).mean()} at {key_list}.\"\n\n                    if (\n                        len(obs.shape) >= 2\n                        and obs.shape[0] == obs.shape[1]\n                        and obs.shape[0] > 1\n                    ):\n                        # Sanity check that rotating the observations makes them not-equal\n                        rot_obs = np.rot90(obs)\n                        numer = np.abs(rot_obs - goal_obs)\n                        denom = np.abs(\n                            np.stack((rot_obs, goal_obs, np.ones_like(obs)), axis=0)\n                        ).max(0)\n                        rot_difference = special_mean(numer / denom)\n                        assert (\n                            difference < rot_difference or (obs == rot_obs).all()\n                        ), f\"Too small a difference ({(numer / denom).mean()}).\"\n\n            observations_dict = defaultdict(lambda: [])\n            for i in range(5):  # Why 5, why not 5?\n                set_seed(i)\n                task = walkthrough_task_sampler.next_task()\n\n                obs_list = observations_dict[i]\n                obs_list.append(task.get_observations())\n                k = 0\n                compare_recursive(\n                    obs=obs_list[0], goal_obs=goal_obs_dict[i][0], key_list=[i, k]\n                )\n                while not task.is_done():\n                    obs = task.step(\n                        action=task.action_names().index(\n                            random.choice(\n                                3\n                                * [\n                                    \"move_ahead\",\n                                    \"rotate_right\",\n                                    \"rotate_left\",\n                                    \"look_up\",\n                                    \"look_down\",\n                                ]\n                                + [\"done\"]\n                            )\n                        )\n                    ).observation\n                    k += 1\n                    obs_list.append(obs)\n                    compare_recursive(\n                        obs=obs,\n                        goal_obs=goal_obs_dict[i][task.num_steps_taken()],\n                        key_list=[i, k],\n                    )\n\n                    # Free space metric map in RGB using pointclouds coming from depth images. This\n                    # is built iteratively after every step.\n                    # R - is used to encode points at a height < 0.02m (i.e. the floor)\n                    # G - is used to encode points at a height between 0.02m and 2m, i.e. objects the agent would run into\n                    # B - is used to encode points higher than 2m, i.e. ceiling\n\n                    # Uncomment if you wish to visualize the observations:\n                    import matplotlib.pyplot as plt\n\n                    plt.imshow(\n                        np.flip(255 * (obs[\"binned_pc_map\"][\"map\"] > 0), 0)\n                    )  # np.flip because we expect \"up\" to be -row\n                    plt.title(\"Free space map\")\n                    plt.show()\n                    plt.close()\n\n                    # See also `obs[\"binned_pc_map\"][\"egocentric_update\"]` to see the\n                    # the metric map from the point of view of the agent before it is\n                    # rotated into the world-space coordinates and merged with past observations.\n\n                    # Semantic map in RGB which is iteratively revealed using depth maps to figure out what\n                    # parts of the scene the agent has seen so far.\n                    # This map has shape 210x210x72 with the 72 channels corresponding to the 72\n                    # object types in `ORDERED_OBJECT_TYPES`\n                    semantic_map = obs[\"semantic_map\"][\"map\"]\n\n                    # We can't display all 72 channels in an RGB image so instead we randomly assign\n                    # each object a color and then just allow them to overlap each other\n                    colored_semantic_map = (\n                        SemanticMapBuilder.randomly_color_semantic_map(semantic_map)\n                    )\n\n                    # Here's the full semantic map with nothing masked out because the agent\n                    # hasn't seen it yet\n                    colored_semantic_map_no_fog = (\n                        SemanticMapBuilder.randomly_color_semantic_map(\n                            map_sensors[\n                                -1\n                            ].semantic_map_builder.ground_truth_semantic_map\n                        )\n                    )\n\n                    # Uncomment if you wish to visualize the observations:\n                    # import matplotlib.pyplot as plt\n                    # plt.imshow(\n                    #     np.flip(  # np.flip because we expect \"up\" to be -row\n                    #         np.concatenate(\n                    #             (\n                    #                 colored_semantic_map,\n                    #                 255 + 0 * colored_semantic_map[:, :10, :],\n                    #                 colored_semantic_map_no_fog,\n                    #             ),\n                    #             axis=1,\n                    #         ),\n                    #         0,\n                    #     )\n                    # )\n                    # plt.title(\"Semantic map with and without exploration fog\")\n                    # plt.show()\n                    # plt.close()\n\n                    # See also\n                    # * `obs[\"semantic_map\"][\"egocentric_update\"]`\n                    # * `obs[\"semantic_map\"][\"explored_mask\"]`\n                    # * `obs[\"semantic_map\"][\"egocentric_mask\"]`\n\n            # To save observations for comparison against future runs, uncomment the below.\n            # os.makedirs(\"tmp_out\", exist_ok=True)\n            # compress_pickle.dump(\n            #     {**observations_dict}, \"tmp_out/rearrange_mapping_examples.pkl.gz\"\n            # )\n        finally:\n            try:\n                walkthrough_task_sampler.close()\n            except NameError:\n                pass\n\n    def test_pretrained_rearrange_walkthrough_mapping_agent(self, tmpdir):\n        try:\n            if not self.setup_path_for_use_with_rearrangement_project():\n                return\n\n            from baseline_configs.rearrange_base import RearrangeBaseExperimentConfig\n            from baseline_configs.walkthrough.walkthrough_rgb_mapping_ppo import (\n                WalkthroughRGBMappingPPOExperimentConfig,\n            )\n            from rearrange.constants import (\n                FOV,\n                PICKUPABLE_OBJECTS,\n                OPENABLE_OBJECTS,\n            )\n            from datagen.datagen_utils import get_scenes\n\n            open_x_displays = []\n            try:\n                open_x_displays = get_open_x_displays()\n            except (AssertionError, IOError):\n                pass\n            walkthrough_task_sampler = (\n                WalkthroughRGBMappingPPOExperimentConfig.make_sampler_fn(\n                    stage=\"train\",\n                    scene_to_allowed_rearrange_inds={\n                        s: [0] for s in get_scenes(\"train\")\n                    },\n                    force_cache_reset=True,\n                    allowed_scenes=None,\n                    seed=2,\n                    x_display=open_x_displays[0] if len(open_x_displays) != 0 else None,\n                )\n            )\n\n            named_losses = (\n                WalkthroughRGBMappingPPOExperimentConfig.training_pipeline()._named_losses\n            )\n\n            ckpt_path = os.path.join(\n                tmpdir, \"pretrained_walkthrough_mapping_agent_75mil.pt\"\n            )\n            if not os.path.exists(ckpt_path):\n                urllib.request.urlretrieve(\n                    \"https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/rearrangement/walkthrough/pretrained_walkthrough_mapping_agent_75mil.pt\",\n                    ckpt_path,\n                )\n\n            state_dict = torch.load(\n                ckpt_path,\n                map_location=\"cpu\",\n            )\n\n            walkthrough_model = WalkthroughRGBMappingPPOExperimentConfig.create_model()\n            walkthrough_model.load_state_dict(state_dict[\"model_state_dict\"])\n\n            memory = RolloutBlockStorage.create_memory(\n                spec=walkthrough_model.recurrent_memory_specification, num_samplers=1\n            ).step_squeeze(0)\n\n            masks = torch.FloatTensor([0]).view(1, 1, 1)\n\n            binned_map_losses = []\n            semantic_map_losses = []\n            for i in range(5):\n                masks = 0 * masks\n\n                set_seed(i + 1)\n                task = walkthrough_task_sampler.next_task()\n\n                def add_step_dim(input):\n                    if isinstance(input, torch.Tensor):\n                        return input.unsqueeze(0)\n                    elif isinstance(input, Dict):\n                        return {k: add_step_dim(v) for k, v in input.items()}\n                    else:\n                        raise NotImplementedError\n\n                batch = add_step_dim(batch_observations([task.get_observations()]))\n\n                while not task.is_done():\n                    # noinspection PyTypeChecker\n                    ac_out, memory = cast(\n                        Tuple[ActorCriticOutput, Memory],\n                        walkthrough_model.forward(\n                            observations=batch,\n                            memory=memory,\n                            prev_actions=None,\n                            masks=masks,\n                        ),\n                    )\n\n                    binned_map_losses.append(\n                        named_losses[\"binned_map_loss\"]\n                        .loss(\n                            step_count=0,  # Not used in this loss\n                            batch={\"observations\": batch},\n                            actor_critic_output=ac_out,\n                        )[0]\n                        .item()\n                    )\n                    assert (\n                        binned_map_losses[-1] < 0.16\n                    ), f\"Binned map loss to large at ({i}, {task.num_steps_taken()})\"\n\n                    semantic_map_losses.append(\n                        named_losses[\"semantic_map_loss\"]\n                        .loss(\n                            step_count=0,  # Not used in this loss\n                            batch={\"observations\": batch},\n                            actor_critic_output=ac_out,\n                        )[0]\n                        .item()\n                    )\n                    assert (\n                        semantic_map_losses[-1] < 0.004\n                    ), f\"Semantic map loss to large at ({i}, {task.num_steps_taken()})\"\n\n                    masks = masks.fill_(1.0)\n                    obs = task.step(\n                        action=ac_out.distributions.sample().item()\n                    ).observation\n                    batch = add_step_dim(batch_observations([obs]))\n\n                    if task.num_steps_taken() >= 10:\n                        break\n\n            # To save observations for comparison against future runs, uncomment the below.\n            # os.makedirs(\"tmp_out\", exist_ok=True)\n            # compress_pickle.dump(\n            #     {**observations_dict}, \"tmp_out/rearrange_mapping_examples.pkl.gz\"\n            # )\n        finally:\n            try:\n                walkthrough_task_sampler.close()\n            except NameError:\n                pass\n\n\nif __name__ == \"__main__\":\n    TestAI2THORMapSensors().test_binned_and_semantic_mapping(mkdtemp())  # type:ignore\n    # TestAI2THORMapSensors().test_binned_and_semantic_mapping(\"tmp_out\")  # Used for local debugging\n    # TestAI2THORMapSensors().test_pretrained_rearrange_walkthrough_mapping_agent(\n    #     mkdtemp() # \"tmp_out\"\n    # )  # Used for local debugging\n"
  },
  {
    "path": "tests/multiprocessing/__init__.py",
    "content": ""
  },
  {
    "path": "tests/multiprocessing/test_frozen_attribs.py",
    "content": "from typing import Dict, Any\n\nimport torch.multiprocessing as mp\nimport torch.nn as nn\n\nfrom allenact.base_abstractions.experiment_config import ExperimentConfig\nfrom allenact.base_abstractions.task import TaskSampler\nfrom allenact.utils.experiment_utils import TrainingPipeline\n\n\n# noinspection PyAbstractClass,PyTypeChecker\nclass MyConfig(ExperimentConfig):\n    MY_VAR: int = 3\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"\"\n\n    @classmethod\n    def training_pipeline(cls, **kwargs) -> TrainingPipeline:\n        return None\n\n    @classmethod\n    def create_model(cls, **kwargs) -> nn.Module:\n        return None\n\n    @classmethod\n    def make_sampler_fn(cls, **kwargs) -> TaskSampler:\n        return None\n\n    def my_var_is(self, val):\n        assert self.MY_VAR == val\n\n\n# noinspection PyAbstractClass\nclass MySpecConfig(MyConfig):\n    MY_VAR = 6\n\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs) -> Dict[str, Any]:\n        return {}\n\n    @classmethod\n    def tag(cls) -> str:\n        return \"SpecTag\"\n\n\nscfg = MySpecConfig()\n\n\nclass TestFrozenAttribs(object):\n    def test_frozen_inheritance(self):\n        from abc import abstractmethod\n        from allenact.base_abstractions.experiment_config import FrozenClassVariables\n\n        class SomeBase(metaclass=FrozenClassVariables):\n            yar = 3\n\n            @abstractmethod\n            def use(self):\n                raise NotImplementedError()\n\n        class SomeDerived(SomeBase):\n            yar = 33\n\n            def use(self):\n                return self.yar\n\n        failed = False\n        try:\n            SomeDerived.yar = 6  # Error\n        except Exception as _:\n            failed = True\n        assert failed\n\n        inst = SomeDerived()\n        inst2 = SomeDerived()\n        inst.yar = 12  # No error\n        assert inst.use() == 12\n        assert inst2.use() == 33\n\n    @staticmethod\n    def my_func(config, val):\n        config.my_var_is(val)\n\n    def test_frozen_experiment_config(self):\n        val = 5\n\n        failed = False\n        try:\n            MyConfig()\n        except (RuntimeError, TypeError):\n            failed = True\n        assert failed\n\n        scfg.MY_VAR = val\n        scfg.my_var_is(val)\n\n        failed = False\n        try:\n            MyConfig.MY_VAR = val\n        except RuntimeError:\n            failed = True\n        assert failed\n\n        failed = False\n        try:\n            MySpecConfig.MY_VAR = val\n        except RuntimeError:\n            failed = True\n        assert failed\n\n        for fork_method in [\"forkserver\", \"fork\"]:\n            ctxt = mp.get_context(fork_method)\n            p = ctxt.Process(target=self.my_func, kwargs=dict(config=scfg, val=val))\n            p.start()\n            p.join()\n\n\nif __name__ == \"__main__\":\n    TestFrozenAttribs().test_frozen_inheritance()  # type:ignore\n    TestFrozenAttribs().test_frozen_experiment_config()  # type:ignore\n"
  },
  {
    "path": "tests/sync_algs_cpu/__init__.py",
    "content": ""
  },
  {
    "path": "tests/sync_algs_cpu/test_to_to_obj_trains.py",
    "content": "import io\nimport math\nimport os\nimport pathlib\nfrom contextlib import redirect_stdout, redirect_stderr\nfrom typing import Optional, List, Dict, Any\n\nimport torch\n\nfrom allenact.algorithms.onpolicy_sync.losses.abstract_loss import (\n    AbstractActorCriticLoss,\n)\nfrom allenact.algorithms.onpolicy_sync.policy import ObservationType\nfrom allenact.algorithms.onpolicy_sync.runner import OnPolicyRunner\nfrom allenact.algorithms.onpolicy_sync.storage import (\n    StreamingStorageMixin,\n    ExperienceStorage,\n    RolloutBlockStorage,\n)\nfrom allenact.base_abstractions.experiment_config import MachineParams\nfrom allenact.base_abstractions.misc import (\n    Memory,\n    GenericAbstractLoss,\n    ModelType,\n    LossOutput,\n)\nfrom allenact.utils.experiment_utils import PipelineStage, StageComponent\nfrom allenact.utils.misc_utils import prepare_locals_for_super\nfrom projects.babyai_baselines.experiments.go_to_obj.ppo import (\n    PPOBabyAIGoToObjExperimentConfig,\n)\n\nSILLY_STORAGE_VALUES = [1.0, 2.0, 3.0, 4.0]\nSILLY_STORAGE_REPEATS = [1, 2, 3, 4]\n\n\nclass FixedConstantLoss(AbstractActorCriticLoss):\n    def __init__(self, name: str, value: float):\n        super().__init__()\n        self.name = name\n        self.value = value\n\n    def loss(  # type: ignore\n        self,\n        *args,\n        **kwargs,\n    ):\n        return self.value, {self.name: self.value}\n\n\nclass SillyStorage(ExperienceStorage, StreamingStorageMixin):\n    def __init__(self, values_to_return: List[float], repeats: List[int]):\n        self.values_to_return = values_to_return\n        self.repeats = repeats\n        assert len(self.values_to_return) == len(self.repeats)\n        self.index = 0\n\n    def initialize(self, *, observations: ObservationType, **kwargs):\n        pass\n\n    def add(\n        self,\n        observations: ObservationType,\n        memory: Optional[Memory],\n        actions: torch.Tensor,\n        action_log_probs: torch.Tensor,\n        value_preds: torch.Tensor,\n        rewards: torch.Tensor,\n        masks: torch.Tensor,\n    ):\n        pass\n\n    def to(self, device: torch.device):\n        pass\n\n    def set_partition(self, index: int, num_parts: int):\n        pass\n\n    @property\n    def total_experiences(self) -> int:\n        return 0\n\n    @total_experiences.setter\n    def total_experiences(self, value: int):\n        pass\n\n    def next_batch(self) -> Dict[str, Any]:\n        if self.index >= len(self.values_to_return):\n            raise EOFError\n\n        to_return = {\n            \"value\": torch.tensor(\n                [self.values_to_return[self.index]] * self.repeats[self.index]\n            ),\n        }\n        self.index += 1\n        return to_return\n\n    def reset_stream(self):\n        self.index = 0\n\n    def empty(self) -> bool:\n        return len(self.values_to_return) == 0\n\n\nclass AverageBatchValueLoss(GenericAbstractLoss):\n    def loss(\n        self,\n        *,\n        model: ModelType,\n        batch: ObservationType,\n        batch_memory: Memory,\n        stream_memory: Memory,\n    ) -> LossOutput:\n        v = batch[\"value\"].mean()\n        return LossOutput(\n            value=v,\n            info={\"avg_batch_val\": v},\n            per_epoch_info={},\n            batch_memory=batch_memory,\n            stream_memory=stream_memory,\n            bsize=batch[\"value\"].shape[0],\n        )\n\n\nclass PPOBabyAIGoToObjTestExperimentConfig(PPOBabyAIGoToObjExperimentConfig):\n    NUM_CKPTS_TO_SAVE = 2\n\n    @classmethod\n    def tag(cls):\n        return \"BabyAIGoToObjPPO-TESTING\"\n\n    @classmethod\n    def machine_params(cls, mode=\"train\", **kwargs):\n        mp = super().machine_params(mode=mode, **kwargs)\n        if mode == \"valid\":\n            mp = MachineParams(\n                nprocesses=1,\n                devices=mp.devices,\n                sensor_preprocessor_graph=mp.sensor_preprocessor_graph,\n                sampler_devices=mp.sampler_devices,\n                visualizer=mp.visualizer,\n                local_worker_ids=mp.local_worker_ids,\n            )\n        return mp\n\n    @classmethod\n    def training_pipeline(cls, **kwargs):\n        total_train_steps = cls.TOTAL_RL_TRAIN_STEPS\n        ppo_info = cls.rl_loss_default(\"ppo\", steps=total_train_steps)\n\n        tp = cls._training_pipeline(\n            named_losses={\n                \"ppo_loss\": ppo_info[\"loss\"],\n                \"3_loss\": FixedConstantLoss(\"3_loss\", 3.0),\n                \"avg_value_loss\": AverageBatchValueLoss(),\n            },\n            named_storages={\n                \"onpolicy\": RolloutBlockStorage(),\n                \"silly_storage\": SillyStorage(\n                    values_to_return=SILLY_STORAGE_VALUES, repeats=SILLY_STORAGE_REPEATS\n                ),\n            },\n            pipeline_stages=[\n                PipelineStage(\n                    loss_names=[\"ppo_loss\", \"3_loss\"],\n                    max_stage_steps=total_train_steps,\n                    stage_components=[\n                        StageComponent(\n                            uuid=\"onpolicy\",\n                            storage_uuid=\"onpolicy\",\n                            loss_names=[\"ppo_loss\", \"3_loss\"],\n                        )\n                    ],\n                ),\n            ],\n            num_mini_batch=ppo_info[\"num_mini_batch\"],\n            update_repeats=ppo_info[\"update_repeats\"],\n            total_train_steps=total_train_steps,\n            valid_pipeline_stage=PipelineStage(\n                loss_names=[\"ppo_loss\", \"3_loss\"],\n                max_stage_steps=-1,\n                update_repeats=1,\n                num_mini_batch=1,\n            ),\n            test_pipeline_stage=PipelineStage(\n                loss_names=[\"avg_value_loss\"],\n                stage_components=[\n                    StageComponent(\n                        uuid=\"debug\",\n                        storage_uuid=\"silly_storage\",\n                        loss_names=[\"avg_value_loss\"],\n                    ),\n                ],\n                max_stage_steps=-1,\n                update_repeats=1,\n                num_mini_batch=1,\n            ),\n        )\n\n        tp.training_settings.save_interval = int(\n            math.ceil(cls.TOTAL_RL_TRAIN_STEPS / cls.NUM_CKPTS_TO_SAVE)\n        )\n        return tp\n\n    def valid_task_sampler_args(\n        self,\n        process_ind: int,\n        total_processes: int,\n        devices: Optional[List[int]] = None,\n        seeds: Optional[List[int]] = None,\n        deterministic_cudnn: bool = False,\n    ) -> Dict[str, Any]:\n        # Also run validation\n        return self.test_task_sampler_args(**prepare_locals_for_super(locals()))\n\n\n# Wrapper context manager to redirect stdout and stderr to a file when potentially\n# using pytest capsys\nclass RedirectOutput:\n    def __init__(self, capsys: Optional, capfd: Optional):\n        self.capsys = capsys\n        self.capfd = capfd\n\n        self.f = io.StringIO()\n        self.redirect_stdout = redirect_stdout(self.f)\n        self.redirect_stderr = redirect_stderr(self.f)\n        self.capsys_output = \"\"\n        self.capfd_output = \"\"\n        # self.capsys_disabler = None\n\n    def get_output(self):\n        return self.f.getvalue() + self.capsys_output + self.capfd_output\n\n    def __enter__(self):\n        if self.capsys is not None:\n            self.capsys.readouterr()  # Clear out any existing output\n\n        if self.capfd is not None:\n            self.capfd.readouterr()  # Clear out any existing output\n            # self.capsys_disabler = self.capsys.disabled()\n            # self.capsys_disabler.__enter__()\n\n        self.redirect_stdout.__enter__()\n        self.redirect_stderr.__enter__()\n\n    def __exit__(self, *args):\n        if self.capsys is not None:\n            captured = self.capsys.readouterr()\n            self.capsys_output = captured.out + captured.err\n            # self.capsys_disabler.__exit__(*args)\n\n        if self.capfd is not None:\n            captured = self.capfd.readouterr()\n            self.capfd_output = captured.out + captured.err\n\n        self.redirect_stdout.__exit__(*args)\n        self.redirect_stderr.__exit__(*args)\n\n\nclass TestGoToObjTrains:\n    def test_ppo_trains(self, capfd, tmpdir):\n        cfg = PPOBabyAIGoToObjTestExperimentConfig()\n\n        d = tmpdir / \"test_ppo_trains\"\n        if isinstance(d, pathlib.Path):\n            d.mkdir(parents=True, exist_ok=True)\n        else:\n            d.mkdir()\n        output_dir = str(d)\n\n        train_runner = OnPolicyRunner(\n            config=cfg,\n            output_dir=output_dir,\n            loaded_config_src_files=None,\n            seed=1,\n            mode=\"train\",\n            deterministic_cudnn=True,\n        )\n\n        output_redirector = RedirectOutput(capsys=None, capfd=capfd)\n        with output_redirector:\n            start_time_str = train_runner.start_train(\n                max_sampler_processes_per_worker=1\n            )\n        s = output_redirector.get_output()\n\n        def extract_final_metrics_from_log(s: str, mode: str):\n            lines = s.splitlines()\n            lines = [l for l in lines if mode.upper() in l]\n            try:\n                metrics_and_losses_list = (\n                    lines[-1].split(\")\")[-1].split(\"[\")[0].strip().split(\" \")\n                )\n            except IndexError:\n                raise RuntimeError(f\"Failed to parse log:\\n{s}\")\n\n            def try_float(f):\n                try:\n                    return float(f)\n                except ValueError:\n                    return f\n\n            metrics_and_losses_dict = {\n                k: try_float(v)\n                for k, v in zip(\n                    metrics_and_losses_list[::2], metrics_and_losses_list[1::2]\n                )\n            }\n            return metrics_and_losses_dict\n\n        train_metrics = extract_final_metrics_from_log(s, \"train\")\n        assert train_metrics[\"global_batch_size\"] == 256\n\n        valid_metrics = extract_final_metrics_from_log(s, \"valid\")\n        assert valid_metrics[\"3_loss/3_loss\"] == 3, \"Incorrect validation loss\"\n        assert (\n            valid_metrics[\"new_tasks_completed\"] == cfg.NUM_TEST_TASKS\n        ), \"Incorrect number of tasks evaluated in validation\"\n\n        test_runner = OnPolicyRunner(\n            config=cfg,\n            output_dir=output_dir,\n            loaded_config_src_files=None,\n            seed=1,\n            mode=\"test\",\n            deterministic_cudnn=True,\n        )\n\n        test_results = test_runner.start_test(\n            checkpoint_path_dir_or_pattern=os.path.join(\n                output_dir, \"checkpoints\", \"**\", start_time_str, \"*.pt\"\n            ),\n            max_sampler_processes_per_worker=1,\n        )\n\n        assert (\n            len(test_results) == 2\n        ), f\"Too many or too few test results ({test_results})\"\n\n        tr = test_results[-1]\n        assert (\n            tr[\"training_steps\"]\n            == round(\n                math.ceil(\n                    cfg.TOTAL_RL_TRAIN_STEPS\n                    / (cfg.ROLLOUT_STEPS * cfg.NUM_TRAIN_SAMPLERS)\n                )\n            )\n            * cfg.ROLLOUT_STEPS\n            * cfg.NUM_TRAIN_SAMPLERS\n        ), \"Incorrect number of training steps\"\n        assert len(tr[\"tasks\"]) == cfg.NUM_TEST_TASKS, \"Incorrect number of test tasks\"\n        assert tr[\"test-metrics/success\"] == sum(\n            task[\"success\"] for task in tr[\"tasks\"]\n        ) / len(tr[\"tasks\"]), \"Success counts don't seem to match\"\n        assert (\n            tr[\"test-metrics/success\"] > 0.95\n        ), f\"PPO did not seem to converge for the go_to_obj task (success {tr['success']}).\"\n        assert tr[\"test-debug-losses/avg_value_loss/avg_batch_val\"] == sum(\n            ssv * ssr for ssv, ssr in zip(SILLY_STORAGE_VALUES, SILLY_STORAGE_REPEATS)\n        ) / sum(SILLY_STORAGE_REPEATS)\n        assert tr[\"test-debug-losses/avg_value_loss/avg_batch_val\"] == sum(\n            ssv * ssr for ssv, ssr in zip(SILLY_STORAGE_VALUES, SILLY_STORAGE_REPEATS)\n        ) / sum(SILLY_STORAGE_REPEATS)\n        assert tr[\"test-debug-misc/worker_batch_size\"] == sum(\n            SILLY_STORAGE_VALUES\n        ) / len(SILLY_STORAGE_VALUES)\n\n\nif __name__ == \"__main__\":\n    TestGoToObjTrains().test_ppo_trains(\n        pathlib.Path(\"experiment_output/testing\"), capsys=None, capfd=None\n    )  # type:ignore\n"
  },
  {
    "path": "tests/utils/__init__.py",
    "content": ""
  },
  {
    "path": "tests/utils/test_inference_agent.py",
    "content": "from collections import Counter\n\nimport torch\n\nfrom allenact.utils.experiment_utils import set_seed\nfrom allenact.utils.inference import InferenceAgent\nfrom projects.babyai_baselines.experiments.go_to_obj.ppo import (\n    PPOBabyAIGoToObjExperimentConfig,\n)\n\nfrom packaging.version import parse\n\nif parse(torch.__version__) >= parse(\"2.0.0\"):\n    expected_results = [\n        {\n            \"ep_length\": 39,\n            \"reward\": 0.45999999999999996,\n            \"task_info\": {},\n            \"success\": 1.0,\n        },\n        {\"ep_length\": 64, \"reward\": 0.0, \"task_info\": {}, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"task_info\": {}, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"task_info\": {}, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"task_info\": {}, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"task_info\": {}, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"task_info\": {}, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"task_info\": {}, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"task_info\": {}, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"task_info\": {}, \"success\": 0.0},\n    ]\nelse:\n    expected_results = [\n        {\"ep_length\": 64, \"reward\": 0.0, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"success\": 0.0},\n        {\"ep_length\": 17, \"reward\": 0.7646153846153846, \"success\": 1.0},\n        {\"ep_length\": 22, \"reward\": 0.6953846153846154, \"success\": 1.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"success\": 0.0},\n        {\"ep_length\": 64, \"reward\": 0.0, \"success\": 0.0},\n    ]\n\n\nclass TestInferenceAgent(object):\n    def test_inference_agent_from_minigrid_config(self):\n        set_seed(1)\n\n        exp_config = PPOBabyAIGoToObjExperimentConfig()\n        agent = InferenceAgent.from_experiment_config(\n            exp_config=exp_config,\n            device=torch.device(\"cpu\"),\n        )\n\n        task_sampler = exp_config.make_sampler_fn(\n            **exp_config.test_task_sampler_args(process_ind=0, total_processes=1)\n        )\n\n        all_actions = []\n        successes = 0\n        for ind, expected_result in zip(range(10), expected_results):\n            agent.reset()\n\n            task = task_sampler.next_task()\n            observations = task.get_observations()\n\n            actions = []\n            while not task.is_done():\n                action = agent.act(observations=observations)\n                actions.append(action)\n                observations = task.step(action).observation\n\n            metrics = task.metrics()\n\n            successes += metrics[\"success\"]\n\n            assert metrics[\"success\"] == 0 or metrics[\"reward\"] > 0\n            assert metrics[\"ep_length\"] <= 64\n\n            # Random seeding seems to not work well when changing linux/mac and torch versions :(\n            # assert all(\n            #     abs(v - expected_result[k]) < 1e-4\n            #     for k, v in task.metrics().items()\n            #     if k != \"task_info\"\n            # ), f\"Failed on task {ind} with actions {actions} and metrics {task.metrics()} (expected={expected_result}).\"\n\n            all_actions.append(actions)\n\n        assert successes > 0, \"At least one task should be successful hopefully...\"\n        assert min(Counter(sum(all_actions, [])).values()) >= len(\n            sum(all_actions, [])\n        ) * 1 / (7 + 3), (\n            \"Statistically, all actions should be taken at around 1/7 * num_actions times. We add 3 to\"\n            \" the denominator for unlikely settings.\"\n        )\n\n\nif __name__ == \"__main__\":\n    TestInferenceAgent().test_inference_agent_from_minigrid_config()\n"
  },
  {
    "path": "tests/utils/test_spaces.py",
    "content": "import warnings\nfrom collections import OrderedDict\nfrom typing import Tuple\n\nimport numpy as np\nimport torch\nfrom gym import spaces as gyms\n\nfrom allenact.utils import spaces_utils as su\n\n\nclass TestSpaces(object):\n    space = gyms.Dict(\n        {\n            \"first\": gyms.Tuple(\n                [\n                    gyms.Box(-10, 10, (3, 4)),\n                    gyms.MultiDiscrete([2, 3, 4]),\n                    gyms.Box(-1, 1, ()),\n                ]\n            ),\n            \"second\": gyms.Tuple(\n                [\n                    gyms.Dict({\"third\": gyms.Discrete(11)}),\n                    gyms.MultiBinary(8),\n                ]\n            ),\n        }\n    )\n\n    @staticmethod\n    def same(a, b, bidx=None):\n        if isinstance(a, OrderedDict):\n            for key in a:\n                if not TestSpaces.same(a[key], b[key], bidx):\n                    return False\n            return True\n        elif isinstance(a, Tuple):\n            for it in range(len(a)):\n                if not TestSpaces.same(a[it], b[it], bidx):\n                    return False\n            return True\n        else:\n            # np.array_equal also works for torch tensors and scalars\n            if bidx is None:\n                return np.array_equal(a, b)\n            else:\n                return np.array_equal(a, b[bidx])\n\n    def test_conversion(self):\n        gsample = self.space.sample()\n\n        asample = su.torch_point(self.space, gsample)\n\n        back = su.numpy_point(self.space, asample)\n\n        assert self.same(back, gsample)\n\n    def test_flatten(self):\n        # We flatten Discrete to 1 value\n        assert su.flatdim(self.space) == 25\n        # gym flattens Discrete to one-hot\n        assert gyms.flatdim(self.space) == 35\n\n        asample = su.torch_point(self.space, self.space.sample())\n        flattened = su.flatten(self.space, asample)\n        unflattened = su.unflatten(self.space, flattened)\n        assert self.same(asample, unflattened)\n\n        # suppress `UserWarning: WARN: Box bound precision lowered by casting to float32`\n        with warnings.catch_warnings():\n            warnings.simplefilter(\"ignore\")\n\n            flattened_space = su.flatten_space(self.space)\n            assert flattened_space.shape == (25,)\n            # The maximum comes from Discrete(11)\n            assert flattened_space.high.max() == 11.0\n            assert flattened_space.low.min() == -10.0\n\n            gym_flattened_space = gyms.flatten_space(self.space)\n            assert gym_flattened_space.shape == (35,)\n            # The maximum comes from Box(-10, 10, (3, 4))\n            assert gym_flattened_space.high.max() == 10.0\n            assert gym_flattened_space.low.min() == -10.0\n\n    def test_batched(self):\n        samples = [self.space.sample() for _ in range(10)]\n        flattened = [\n            su.flatten(self.space, su.torch_point(self.space, sample))\n            for sample in samples\n        ]\n        stacked = torch.stack(flattened, dim=0)\n        unflattened = su.unflatten(self.space, stacked)\n        for bidx, refsample in enumerate(samples):\n            # Compare each torch-ified sample to the corresponding unflattened from the stack\n            assert self.same(su.torch_point(self.space, refsample), unflattened, bidx)\n\n        assert self.same(su.flatten(self.space, unflattened), stacked)\n\n    def test_tolist(self):\n        space = gyms.MultiDiscrete([3, 3])\n        actions = su.torch_point(space, space.sample())  # single sampler\n        actions = actions.unsqueeze(0).unsqueeze(0)  # add [step, sampler]\n        flat_actions = su.flatten(space, actions)\n        al = su.action_list(space, flat_actions)\n        assert len(al) == 1\n        assert len(al[0]) == 2\n\n        space = gyms.Tuple([gyms.MultiDiscrete([3, 3]), gyms.Discrete(2)])\n        actions = su.torch_point(space, space.sample())  # single sampler\n        actions = (\n            actions[0].unsqueeze(0).unsqueeze(0),\n            torch.tensor(actions[1]).unsqueeze(0).unsqueeze(0),\n        )  # add [step, sampler]\n        flat_actions = su.flatten(space, actions)\n        al = su.action_list(space, flat_actions)\n        assert len(al) == 1\n        assert len(al[0][0]) == 2\n        assert isinstance(al[0][1], int)\n\n        space = gyms.Dict(\n            {\"tuple\": gyms.MultiDiscrete([3, 3]), \"scalar\": gyms.Discrete(2)}\n        )\n        actions = su.torch_point(space, space.sample())  # single sampler\n        actions = OrderedDict(\n            [\n                (\"tuple\", actions[\"tuple\"].unsqueeze(0).unsqueeze(0)),\n                (\"scalar\", torch.tensor(actions[\"scalar\"]).unsqueeze(0).unsqueeze(0)),\n            ]\n        )\n        flat_actions = su.flatten(space, actions)\n        al = su.action_list(space, flat_actions)\n        assert len(al) == 1\n        assert len(al[0][\"tuple\"]) == 2\n        assert isinstance(al[0][\"scalar\"], int)\n\n\nif __name__ == \"__main__\":\n    TestSpaces().test_conversion()  # type:ignore\n    TestSpaces().test_flatten()  # type:ignore\n    TestSpaces().test_batched()  # type:ignore\n    TestSpaces().test_tolist()  # type:ignore\n"
  },
  {
    "path": "tests/vision/__init__.py",
    "content": ""
  },
  {
    "path": "tests/vision/test_pillow_rescaling.py",
    "content": "import hashlib\nimport os\n\nimport imageio\nimport numpy as np\nfrom torchvision.transforms import transforms\n\nfrom allenact.utils.tensor_utils import ScaleBothSides\nfrom constants import ABS_PATH_OF_TOP_LEVEL_DIR\n\nto_pil = transforms.ToPILImage()  # Same as used by the vision sensors\n\n\nclass TestPillowRescaling(object):\n    def _load_thor_img(self) -> np.ndarray:\n        img_path = os.path.join(\n            ABS_PATH_OF_TOP_LEVEL_DIR, \"docs/img/iTHOR_framework.jpg\"\n        )\n        img = imageio.v2.imread(img_path)\n        return img\n\n    def _get_img_hash(self, img: np.ndarray) -> str:\n        img_hash = hashlib.sha1(np.ascontiguousarray(img))\n        return img_hash.hexdigest()\n\n    def _random_rgb_image(self, width: int, height: int, seed: int) -> np.ndarray:\n        s = np.random.get_state()\n        np.random.seed(seed)\n        img = np.random.randint(\n            low=0, high=256, size=(width, height, 3), dtype=np.uint8\n        )\n        np.random.set_state(s)\n        return img\n\n    def _random_depthmap(\n        self, width: int, height: int, max_depth: float, seed: int\n    ) -> np.ndarray:\n        s = np.random.get_state()\n        np.random.seed(seed)\n        img = max_depth * np.random.rand(width, height, 1)\n        np.random.set_state(s)\n        return np.float32(img)\n\n    def test_scaler_rgb_thor(self):\n        thor_img_arr = np.uint8(self._load_thor_img())\n\n        assert self._get_img_hash(thor_img_arr) in [\n            \"80ff8a342b4f74966796eee91babde31409d0457\",\n            \"eb808b2218ccc2e56144131f9ef596a5c2ae3e2a\",\n        ]\n\n        img = to_pil(thor_img_arr)\n\n        scaler = ScaleBothSides(width=75, height=75)\n        scaled_img = np.array(scaler(img))\n        assert self._get_img_hash(scaled_img) in [\n            \"2c47057aa188240cb21b2edc39e0f269c1085bac\",\n            \"b5df3cc03f181cb7be07ddd229cac8d1efd5d077\",\n        ]\n\n        scaler = ScaleBothSides(width=500, height=600)\n        scaled_img = np.array(scaler(img))\n        assert self._get_img_hash(scaled_img) in [\n            \"faf0be2b9ec9bfd23a1b7b465c86ad961d03c259\",\n            \"cccddd7f17b59434dcdd0006dceeffbe1a969dc8\",\n        ]\n\n    def test_scaler_rgb_random(self):\n        arr = self._random_rgb_image(width=100, height=100, seed=1)\n\n        assert self._get_img_hash(arr) == \"d01bd8ba151ab790fde9a8cc29aa8a3c63147334\"\n\n        img = to_pil(arr)\n\n        scaler = ScaleBothSides(width=60, height=60)\n        scaled_img = np.array(scaler(img))\n        assert (\n            self._get_img_hash(scaled_img) == \"22473537e50d5e39abeeec4f92dbfde51c754010\"\n        )\n\n        scaler = ScaleBothSides(width=1000, height=800)\n        scaled_img = np.array(scaler(img))\n        assert (\n            self._get_img_hash(scaled_img) == \"5e5b955981e4ee3b5e22287536040d001a31fbd3\"\n        )\n\n    def test_scaler_depth_thor(self):\n        thor_depth_arr = 5 * np.float32(self._load_thor_img()).sum(-1)\n        thor_depth_arr /= thor_depth_arr.max()\n\n        assert self._get_img_hash(thor_depth_arr) in [\n            \"d3c1474400ba57ed78f52cf4ba6a4c2a1d90516c\",\n            \"85a18befb2a174403079bf49d149630f829222c2\",\n        ]\n\n        img = to_pil(thor_depth_arr)\n\n        scaler = ScaleBothSides(width=75, height=75)\n        scaled_img = np.array(scaler(img))\n        assert self._get_img_hash(scaled_img) in [\n            \"6a879beb6bed49021e438c1e3af7a62c428a44d8\",\n            \"868f1d2b32167bda524ba502158f1ee81c8a24d2\",\n        ]\n\n        scaler = ScaleBothSides(width=500, height=600)\n        scaled_img = np.array(scaler(img))\n        assert self._get_img_hash(scaled_img) in [\n            \"79f11fb741ae638afca40125e4c501f54b22cc01\",\n            \"2d3012e1cced2942f7368e84bf332241fcf9d7fe\",\n        ]\n\n    def test_scaler_depth_random(self):\n        depth_arr = self._random_depthmap(width=96, height=103, max_depth=5.0, seed=1)\n\n        assert (\n            self._get_img_hash(depth_arr) == \"cbd8ca127951ffafb6848536d9d731970a5397e9\"\n        )\n\n        img = to_pil(depth_arr)\n\n        scaler = ScaleBothSides(width=60, height=60)\n        scaled_img = np.array(scaler(img))\n        assert (\n            self._get_img_hash(scaled_img) == \"5bed173f2d783fb2badcde9b43904ef85a1a5820\"\n        )\n\n        scaler = ScaleBothSides(width=1000, height=800)\n        scaled_img = np.array(scaler(img))\n        assert (\n            self._get_img_hash(scaled_img) == \"9dceb7f77d767888f24a84c00913c0cf4ccd9d49\"\n        )\n\n\nif __name__ == \"__main__\":\n    TestPillowRescaling().test_scaler_rgb_thor()\n    TestPillowRescaling().test_scaler_rgb_random()\n    TestPillowRescaling().test_scaler_depth_thor()\n    TestPillowRescaling().test_scaler_depth_random()\n"
  }
]