Repository: allenai/allenact Branch: main Commit: d055fc9d4533 Files: 402 Total size: 2.0 MB Directory structure: gitextract_rp45h8jw/ ├── .VERSION ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── feature_request.md │ │ └── support_request.md │ └── workflows/ │ ├── black.yml │ ├── codeql.yml │ ├── publish.yml │ └── pytest.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CNAME ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── ROADMAP.md ├── allenact/ │ ├── __init__.py │ ├── _constants.py │ ├── algorithms/ │ │ ├── __init__.py │ │ ├── offpolicy_sync/ │ │ │ ├── __init__.py │ │ │ └── losses/ │ │ │ ├── __init__.py │ │ │ └── abstract_offpolicy_loss.py │ │ └── onpolicy_sync/ │ │ ├── __init__.py │ │ ├── engine.py │ │ ├── losses/ │ │ │ ├── __init__.py │ │ │ ├── a2cacktr.py │ │ │ ├── abstract_loss.py │ │ │ ├── grouped_action_imitation.py │ │ │ ├── imitation.py │ │ │ └── ppo.py │ │ ├── misc.py │ │ ├── policy.py │ │ ├── runner.py │ │ ├── storage.py │ │ └── vector_sampled_tasks.py │ ├── base_abstractions/ │ │ ├── __init__.py │ │ ├── callbacks.py │ │ ├── distributions.py │ │ ├── experiment_config.py │ │ ├── misc.py │ │ ├── preprocessor.py │ │ ├── sensor.py │ │ └── task.py │ ├── embodiedai/ │ │ ├── __init__.py │ │ ├── aux_losses/ │ │ │ ├── __init__.py │ │ │ └── losses.py │ │ ├── mapping/ │ │ │ ├── __init__.py │ │ │ ├── mapping_losses.py │ │ │ ├── mapping_models/ │ │ │ │ ├── __init__.py │ │ │ │ └── active_neural_slam.py │ │ │ └── mapping_utils/ │ │ │ ├── __init__.py │ │ │ ├── map_builders.py │ │ │ └── point_cloud_utils.py │ │ ├── models/ │ │ │ ├── __init__.py │ │ │ ├── aux_models.py │ │ │ ├── basic_models.py │ │ │ ├── fusion_models.py │ │ │ ├── resnet.py │ │ │ └── visual_nav_models.py │ │ ├── preprocessors/ │ │ │ ├── __init__.py │ │ │ └── resnet.py │ │ ├── sensors/ │ │ │ ├── __init__.py │ │ │ └── vision_sensors.py │ │ └── storage/ │ │ ├── __init__.py │ │ └── vdr_storage.py │ ├── main.py │ ├── setup.py │ └── utils/ │ ├── __init__.py │ ├── cache_utils.py │ ├── cacheless_frcnn.py │ ├── experiment_utils.py │ ├── inference.py │ ├── misc_utils.py │ ├── model_utils.py │ ├── multi_agent_viz_utils.py │ ├── spaces_utils.py │ ├── system.py │ ├── tensor_utils.py │ └── viz_utils.py ├── allenact_plugins/ │ ├── __init__.py │ ├── babyai_plugin/ │ │ ├── __init__.py │ │ ├── babyai_constants.py │ │ ├── babyai_models.py │ │ ├── babyai_tasks.py │ │ ├── configs/ │ │ │ └── __init__.py │ │ ├── data/ │ │ │ └── __init__.py │ │ ├── extra_environment.yml │ │ ├── extra_requirements.txt │ │ └── scripts/ │ │ ├── __init__.py │ │ ├── download_babyai_expert_demos.py │ │ ├── get_instr_length_percentiles.py │ │ └── truncate_expert_demos.py │ ├── clip_plugin/ │ │ ├── __init__.py │ │ ├── clip_preprocessors.py │ │ ├── extra_environment.yml │ │ └── extra_requirements.txt │ ├── gym_plugin/ │ │ ├── __init__.py │ │ ├── extra_environment.yml │ │ ├── extra_requirements.txt │ │ ├── gym_distributions.py │ │ ├── gym_environment.py │ │ ├── gym_models.py │ │ ├── gym_sensors.py │ │ └── gym_tasks.py │ ├── habitat_plugin/ │ │ ├── __init__.py │ │ ├── data/ │ │ │ └── __init__.py │ │ ├── extra_environment.yml │ │ ├── extra_environment_headless.yml │ │ ├── extra_requirements.txt │ │ ├── habitat_constants.py │ │ ├── habitat_environment.py │ │ ├── habitat_preprocessors.py │ │ ├── habitat_sensors.py │ │ ├── habitat_task_samplers.py │ │ ├── habitat_tasks.py │ │ ├── habitat_utils.py │ │ └── scripts/ │ │ ├── __init__.py │ │ ├── agent_demo.py │ │ └── make_map.py │ ├── ithor_plugin/ │ │ ├── __init__.py │ │ ├── extra_environment.yml │ │ ├── extra_requirements.txt │ │ ├── ithor_constants.py │ │ ├── ithor_environment.py │ │ ├── ithor_sensors.py │ │ ├── ithor_task_samplers.py │ │ ├── ithor_tasks.py │ │ ├── ithor_util.py │ │ ├── ithor_viz.py │ │ └── scripts/ │ │ ├── __init__.py │ │ ├── make_objectnav_debug_dataset.py │ │ └── make_pointnav_debug_dataset.py │ ├── lighthouse_plugin/ │ │ ├── __init__.py │ │ ├── configs/ │ │ │ └── __init__.py │ │ ├── data/ │ │ │ └── __init__.py │ │ ├── extra_environment.yml │ │ ├── extra_requirements.txt │ │ ├── lighthouse_environment.py │ │ ├── lighthouse_models.py │ │ ├── lighthouse_sensors.py │ │ ├── lighthouse_tasks.py │ │ ├── lighthouse_util.py │ │ └── scripts/ │ │ └── __init__.py │ ├── manipulathor_plugin/ │ │ ├── __init__.py │ │ ├── arm_calculation_utils.py │ │ ├── armpointnav_constants.py │ │ ├── manipulathor_constants.py │ │ ├── manipulathor_environment.py │ │ ├── manipulathor_sensors.py │ │ ├── manipulathor_task_samplers.py │ │ ├── manipulathor_tasks.py │ │ ├── manipulathor_utils.py │ │ └── manipulathor_viz.py │ ├── minigrid_plugin/ │ │ ├── __init__.py │ │ ├── configs/ │ │ │ ├── __init__.py │ │ │ └── minigrid_nomemory.py │ │ ├── data/ │ │ │ └── __init__.py │ │ ├── extra_environment.yml │ │ ├── extra_requirements.txt │ │ ├── minigrid_environments.py │ │ ├── minigrid_models.py │ │ ├── minigrid_offpolicy.py │ │ ├── minigrid_sensors.py │ │ ├── minigrid_tasks.py │ │ └── scripts/ │ │ └── __init__.py │ ├── navigation_plugin/ │ │ ├── __init__.py │ │ ├── objectnav/ │ │ │ ├── __init__.py │ │ │ └── models.py │ │ └── pointnav/ │ │ ├── __init__.py │ │ └── models.py │ ├── robothor_plugin/ │ │ ├── __init__.py │ │ ├── configs/ │ │ │ └── __init__.py │ │ ├── extra_environment.yml │ │ ├── extra_requirements.txt │ │ ├── robothor_constants.py │ │ ├── robothor_distributions.py │ │ ├── robothor_environment.py │ │ ├── robothor_models.py │ │ ├── robothor_preprocessors.py │ │ ├── robothor_sensors.py │ │ ├── robothor_task_samplers.py │ │ ├── robothor_tasks.py │ │ ├── robothor_viz.py │ │ └── scripts/ │ │ ├── __init__.py │ │ ├── make_objectnav_debug_dataset.py │ │ └── make_pointnav_debug_dataset.py │ └── setup.py ├── conda/ │ ├── environment-10.1.yml │ ├── environment-10.2.yml │ ├── environment-11.1.yml │ ├── environment-9.2.yml │ ├── environment-base.yml │ ├── environment-cpu.yml │ └── environment-dev.yml ├── constants.py ├── datasets/ │ ├── .gitignore │ ├── .habitat_datasets_download_info.json │ ├── .habitat_downloader_helper.py │ ├── download_habitat_datasets.sh │ └── download_navigation_datasets.sh ├── dev_requirements.txt ├── docs/ │ ├── .gitignore │ ├── CNAME │ ├── FAQ.md │ ├── css/ │ │ └── extra.css │ ├── getting_started/ │ │ ├── abstractions.md │ │ ├── running-your-first-experiment.md │ │ └── structure.md │ ├── howtos/ │ │ ├── changing-rewards-and-losses.md │ │ ├── defining-a-new-model.md │ │ ├── defining-a-new-task.md │ │ ├── defining-a-new-training-pipeline.md │ │ ├── defining-an-experiment.md │ │ ├── running-a-multi-agent-experiment.md │ │ └── visualizing-results.md │ ├── installation/ │ │ ├── download-datasets.md │ │ ├── installation-allenact.md │ │ └── installation-framework.md │ ├── javascripts/ │ │ └── extra.js │ ├── notebooks/ │ │ └── firstbook.md │ ├── projects/ │ │ ├── advisor_2020/ │ │ │ └── README.md │ │ ├── babyai_baselines/ │ │ │ └── README.md │ │ ├── gym_baselines/ │ │ │ └── README.md │ │ ├── objectnav_baselines/ │ │ │ └── README.md │ │ ├── pointnav_baselines/ │ │ │ └── README.md │ │ └── two_body_problem_2019/ │ │ └── README.md │ └── tutorials/ │ ├── distributed-objectnav-tutorial.md │ ├── gym-mujoco-tutorial.md │ ├── gym-tutorial.md │ ├── index.md │ ├── minigrid-tutorial.md │ ├── offpolicy-tutorial.md │ ├── running-inference-on-a-pretrained-model.md │ ├── training-a-pointnav-model.md │ ├── training-pipelines.md │ └── transfering-to-a-different-environment-framework.md ├── main.py ├── mkdocs.yml ├── mypy.ini ├── overrides/ │ └── main.html ├── pretrained_model_ckpts/ │ ├── .gitignore │ └── download_navigation_model_ckpts.sh ├── projects/ │ ├── __init__.py │ ├── babyai_baselines/ │ │ ├── README.md │ │ ├── __init__.py │ │ └── experiments/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── go_to_local/ │ │ │ ├── __init__.py │ │ │ ├── a2c.py │ │ │ ├── base.py │ │ │ ├── bc.py │ │ │ ├── bc_teacher_forcing.py │ │ │ ├── dagger.py │ │ │ ├── distributed_bc_offpolicy.py │ │ │ ├── distributed_bc_teacher_forcing.py │ │ │ └── ppo.py │ │ └── go_to_obj/ │ │ ├── __init__.py │ │ ├── a2c.py │ │ ├── base.py │ │ ├── bc.py │ │ ├── bc_teacher_forcing.py │ │ ├── dagger.py │ │ └── ppo.py │ ├── gym_baselines/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── experiments/ │ │ │ ├── __init__.py │ │ │ ├── gym_base.py │ │ │ ├── gym_humanoid_base.py │ │ │ ├── gym_humanoid_ddppo.py │ │ │ ├── gym_mujoco_base.py │ │ │ ├── gym_mujoco_ddppo.py │ │ │ └── mujoco/ │ │ │ ├── __init__.py │ │ │ ├── gym_mujoco_ant_ddppo.py │ │ │ ├── gym_mujoco_halfcheetah_ddppo.py │ │ │ ├── gym_mujoco_hopper_ddppo.py │ │ │ ├── gym_mujoco_humanoid_ddppo.py │ │ │ ├── gym_mujoco_inverteddoublependulum_ddppo.py │ │ │ ├── gym_mujoco_invertedpendulum_ddppo.py │ │ │ ├── gym_mujoco_reacher_ddppo.py │ │ │ ├── gym_mujoco_swimmer_ddppo.py │ │ │ └── gym_mujoco_walker2d_ddppo.py │ │ └── models/ │ │ ├── __init__.py │ │ └── gym_models.py │ ├── manipulathor_baselines/ │ │ ├── __init__.py │ │ └── armpointnav_baselines/ │ │ ├── __init__.py │ │ ├── experiments/ │ │ │ ├── __init__.py │ │ │ ├── armpointnav_base.py │ │ │ ├── armpointnav_mixin_ddppo.py │ │ │ ├── armpointnav_mixin_simplegru.py │ │ │ ├── armpointnav_thor_base.py │ │ │ └── ithor/ │ │ │ ├── __init__.py │ │ │ ├── armpointnav_depth.py │ │ │ ├── armpointnav_disjoint_depth.py │ │ │ ├── armpointnav_ithor_base.py │ │ │ ├── armpointnav_no_vision.py │ │ │ ├── armpointnav_rgb.py │ │ │ └── armpointnav_rgbdepth.py │ │ └── models/ │ │ ├── __init__.py │ │ ├── arm_pointnav_models.py │ │ ├── base_models.py │ │ ├── disjoint_arm_pointnav_models.py │ │ └── manipulathor_net_utils.py │ ├── objectnav_baselines/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── experiments/ │ │ │ ├── __init__.py │ │ │ ├── clip/ │ │ │ │ ├── __init__.py │ │ │ │ └── mixins.py │ │ │ ├── habitat/ │ │ │ │ ├── __init__.py │ │ │ │ ├── clip/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── objectnav_habitat_rgb_clipresnet50gru_ddppo.py │ │ │ │ │ └── objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py │ │ │ │ └── objectnav_habitat_base.py │ │ │ ├── ithor/ │ │ │ │ ├── __init__.py │ │ │ │ ├── objectnav_ithor_base.py │ │ │ │ ├── objectnav_ithor_depth_resnet18gru_ddppo.py │ │ │ │ ├── objectnav_ithor_rgb_resnet18gru_ddppo.py │ │ │ │ └── objectnav_ithor_rgbd_resnet18gru_ddppo.py │ │ │ ├── objectnav_base.py │ │ │ ├── objectnav_thor_base.py │ │ │ └── robothor/ │ │ │ ├── __init__.py │ │ │ ├── beta/ │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.py │ │ │ │ └── objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py │ │ │ ├── clip/ │ │ │ │ ├── __init__.py │ │ │ │ ├── objectnav_robothor_rgb_clipresnet50gru_ddppo.py │ │ │ │ └── objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py │ │ │ ├── objectnav_robothor_base.py │ │ │ ├── objectnav_robothor_depth_resnet18gru_ddppo.py │ │ │ ├── objectnav_robothor_rgb_resnet18gru_dagger.py │ │ │ ├── objectnav_robothor_rgb_resnet18gru_ddppo.py │ │ │ ├── objectnav_robothor_rgb_resnet50gru_ddppo.py │ │ │ ├── objectnav_robothor_rgb_unfrozenresnet18gru_ddppo.py │ │ │ └── objectnav_robothor_rgbd_resnet18gru_ddppo.py │ │ └── mixins.py │ ├── pointnav_baselines/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── experiments/ │ │ │ ├── __init__.py │ │ │ ├── habitat/ │ │ │ │ ├── __init__.py │ │ │ │ ├── clip/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── pointnav_habitat_rgb_clipresnet50gru_ddppo.py │ │ │ │ ├── pointnav_habitat_base.py │ │ │ │ ├── pointnav_habitat_depth_simpleconvgru_ddppo.py │ │ │ │ ├── pointnav_habitat_rgb_simpleconvgru_ddppo.py │ │ │ │ └── pointnav_habitat_rgbd_simpleconvgru_ddppo.py │ │ │ ├── ithor/ │ │ │ │ ├── __init__.py │ │ │ │ ├── pointnav_ithor_base.py │ │ │ │ ├── pointnav_ithor_depth_simpleconvgru_ddppo.py │ │ │ │ ├── pointnav_ithor_rgb_simpleconvgru_ddppo.py │ │ │ │ └── pointnav_ithor_rgbd_simpleconvgru_ddppo.py │ │ │ ├── pointnav_base.py │ │ │ ├── pointnav_thor_base.py │ │ │ └── robothor/ │ │ │ ├── __init__.py │ │ │ ├── pointnav_robothor_base.py │ │ │ ├── pointnav_robothor_depth_simpleconvgru_ddppo.py │ │ │ ├── pointnav_robothor_rgb_simpleconvgru_ddppo.py │ │ │ └── pointnav_robothor_rgbd_simpleconvgru_ddppo.py │ │ └── mixins.py │ └── tutorials/ │ ├── __init__.py │ ├── distributed_objectnav_tutorial.py │ ├── gym_mujoco_tutorial.py │ ├── gym_tutorial.py │ ├── minigrid_offpolicy_tutorial.py │ ├── minigrid_tutorial.py │ ├── minigrid_tutorial_conds.py │ ├── navtopartner_robothor_rgb_ppo.py │ ├── object_nav_ithor_dagger_then_ppo_one_object.py │ ├── object_nav_ithor_dagger_then_ppo_one_object_viz.py │ ├── object_nav_ithor_ppo_one_object.py │ ├── pointnav_habitat_rgb_ddppo.py │ ├── pointnav_ithor_rgb_ddppo.py │ ├── running_inference_tutorial.py │ └── training_a_pointnav_model.py ├── requirements.txt ├── scripts/ │ ├── auto_format.sh │ ├── build_docs.py │ ├── build_docs.sh │ ├── dcommand.py │ ├── dconfig.py │ ├── dkill.py │ ├── dmain.py │ ├── literate.py │ ├── release.py │ ├── run_tests.sh │ └── startx.py └── tests/ ├── .gitignore ├── __init__.py ├── hierarchical_policies/ │ ├── __init__.py │ └── test_minigrid_conditional.py ├── manipulathor_plugin/ │ ├── __init__.py │ └── test_utils.py ├── mapping/ │ ├── __init__.py │ └── test_ai2thor_mapping.py ├── multiprocessing/ │ ├── __init__.py │ └── test_frozen_attribs.py ├── sync_algs_cpu/ │ ├── __init__.py │ └── test_to_to_obj_trains.py ├── utils/ │ ├── __init__.py │ ├── test_inference_agent.py │ └── test_spaces.py └── vision/ ├── __init__.py └── test_pillow_rescaling.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .VERSION ================================================ 0.5.5a ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: bug assignees: '' --- ## Problem A clear and concise description of what the bug is. ## Steps to reproduce Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error ## Expected behavior A clear and concise description of what you expected to happen. ## Screenshots If applicable, add screenshots to help explain your problem. ## Desktop Please add the following information: - OS: [e.g. Ubuntu 16.04.5] - AllenAct Version: [e.g. current HEAD of master or v0.1.0] ## Additional context Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an enhancement or a new feature title: '' labels: enhancement assignees: '' --- ## Problem Is your feature request related to a problem? Please provide a clear and concise description of what the problem is: E.g. I would really like to have better support for my favorite environment X. ## Desired solution A clear and concise description of what you want to happen. ## Alternative solutions A description of any alternative solutions or features you've considered. ## Additional context Add any other context or screenshots about the feature request here. ================================================ FILE: .github/ISSUE_TEMPLATE/support_request.md ================================================ --- name: Support request about: Request support regarding AllenAct title: '' labels: '' assignees: '' --- ## Problem / Question What do you need help with? E.g. "I'm having trouble running model X" or "when I run command Y I get error Z." ## Additional context _(Optional)_ - To provide support it's helpful to have as many details as possible, add additional context here. ================================================ FILE: .github/workflows/black.yml ================================================ name: Lint on: [push, pull_request] jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: psf/black@stable ================================================ FILE: .github/workflows/codeql.yml ================================================ name: "CodeQL" on: push: branches: [ "main" ] pull_request: branches: [ "main" ] schedule: - cron: "13 6 * * 4" jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: [ python ] steps: - name: Checkout uses: actions/checkout@v3 - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} queries: +security-and-quality - name: Autobuild uses: github/codeql-action/autobuild@v2 - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v2 with: category: "/language:${{ matrix.language }}" ================================================ FILE: .github/workflows/publish.yml ================================================ # This workflow will upload the allenact and allenact_plugins packages using Twine (after manually triggering it) # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Publish PYPI Packages on: workflow_dispatch: jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: '3.7' - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools twine - name: Build and publish env: TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python scripts/release.py twine upload -u __token__ dist/* ================================================ FILE: .github/workflows/pytest.yml ================================================ name: PyTest on: [push] jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: [3.9] steps: - uses: actions/checkout@v2 - uses: ouzi-dev/commit-status-updater@v1.1.0 # Updates the commit status badge to pending - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install pytest wandb python -m pip install --editable="./allenact" python -m pip install --editable="./allenact_plugins[all]" python -m pip install -e "git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai" python -m pip install compress_pickle # Needed for some mapping tests pip list - name: Test with pytest run: | pytest --capture=tee-sys tests - if: always() # Updates the commit status badge to the result of running the tests above uses: ouzi-dev/commit-status-updater@v1.1.0 with: status: "${{ job.status }}" ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python docs/build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pycharm .idea/ # pytorch *.pt # Default output dir experiment_output *_out # PDFs *.pdf # PNGs *.png # Tensorboard logs events.out.tfevents.* # TSV files *.tsv # tmp directory tmp/ # Pickle files *.pkl *.pkl.gz # Zip files *.zip # VSCode .vscode/ # MacOS .DS_Store # Docs docs/index.md docs/CONTRIBUTING.md docs/LICENSE.md # Metrics metrics__*.json # Robothor allenact_plugins/robothor_plugin/data/* # ithor allenact_plugins/ithor_plugin/data/* # Habitat external_projects/habitat-lab # Local pip installations src .pip_src # Files created when running training **/used_configs *.patch # Package building *.egg_info *.egg-info # Additional allenact-specific locks and hidden files *.allenact_last_start_time_string *.allenact_start_time_string.lock *.lock rsync-* ================================================ FILE: .gitmodules ================================================ [submodule "projects/ithor_rearrangement"] path = projects/ithor_rearrangement url = https://github.com/allenai/ai2thor-rearrangement.git branch = active_neural_slam ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/ambv/black rev: 19.10b0 hooks: - id: black language_version: python3.7 - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v0.761' # Use the sha / tag you want to point at hooks: - id: mypy args: [--follow-imports=skip] ================================================ FILE: CNAME ================================================ www.allenact.org ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing We welcome contributions from the greater community. If you would like to make such a contributions we recommend first submitting an [issue](https://github.com/allenai/allenact/issues) describing your proposed improvement. Doing so can ensure we can validate your suggestions before you spend a great deal of time upon them. Improvements and bug fixes should be made via a pull request from your fork of the repository at [https://github.com/allenai/allenact](https://github.com/allenai/allenact). All code in pull requests should adhere to the following guidelines. ## Found a bug or want to suggest an enhancement? Please submit an [issue](https://github.com/allenai/allenact/issues) in which you note the steps to reproduce the bug or in which you detail the enhancement. ## Making a pull request? When making a pull request we require that any code respects several guidelines detailed below. ### Auto-formatting All python code in this repository should be formatted using [black](https://black.readthedocs.io/en/stable/). To use `black` auto-formatting across all files, simply run ```bash bash scripts/auto_format.sh ``` which will run `black` auto-formatting as well as [docformatter](https://pypi.org/project/docformatter/) (used to auto-format documentation strings). ### Type-checking Our code makes liberal use of type hints. If you have not had experience with type hinting in python we recommend reading the [documentation](https://docs.python.org/3/library/typing.html) of the `typing` python module or the simplified introduction to type hints found [here](https://www.python.org/dev/peps/pep-0483/). All methods should have typed arguments and output. Furthermore we use [mypy](https://mypy.readthedocs.io/en/stable/) to perform basic static type checking. Before making a pull request, there should be no warnings or errors when running ```bash dmypy run -- --follow-imports=skip . ``` Explicitly ignoring type checking (for instance using `# type: ignore`) should be only be done when it would otherwise be an extensive burden. ### Setting up pre-commit hooks (optional) Pre-commit hooks check that, when you attempt to commit changes, your code adheres a number of formatting and type-checking guidelines. Pull requests containing code not adhering to these guidelines will not be accepted and thus we recommend installing these pre-commit hooks. Assuming you have installed all of the project requirements, you can install our recommended pre-commit hooks by running (from this project's root directory) ```bash pre-commit install ``` After running the above, each time you run `git commit ...` a set of pre-commit checks will be run. ================================================ FILE: LICENSE ================================================ MIT License Original work Copyright (c) 2017 Ilya Kostrikov Original work Copyright (c) Facebook, Inc. and its affiliates. Modified work Copyright (c) 2020 Allen Institute for Artificial Intelligence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================

An open source framework for research in Embodied AI


[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE) [![Documentation Status](https://img.shields.io/badge/docs-up%20to%20date-Green.svg)](https://allenact.org) [![Latest Release](https://img.shields.io/github/v/release/allenai/allenact)](https://github.com/allenai/allenact/releases/latest) [![Python 3.7](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/downloads/release/python-360/) [![LGTM Grade: Python](https://img.shields.io/lgtm/grade/python/g/allenai/allenact.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/allenai/allenact/context:python) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) **AllenAct** is a modular and flexible learning framework designed with a focus on the unique requirements of Embodied-AI research. It provides first-class support for a growing collection of embodied environments, tasks and algorithms, provides reproductions of state-of-the-art models and includes extensive documentation, tutorials, start-up code, and pre-trained models. AllenAct is built and backed by the [Allen Institute for AI (AI2)](https://allenai.org/). AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering. ## Quick Links - [Website & Docs](https://www.allenact.org/) - [Github](https://github.com/allenai/allenact) - [Install](https://www.allenact.org/installation/installation-allenact/) - [Tutorials](https://www.allenact.org/tutorials/) - [AllenAct Paper](https://arxiv.org/abs/2008.12760) - [Citation](#citation) ## Features & Highlights * _Support for multiple environments_: Support for the [iTHOR](https://ai2thor.allenai.org/ithor/), [RoboTHOR](https://ai2thor.allenai.org/robothor/) and [Habitat](https://aihabitat.org/) embodied environments as well as for grid-worlds including [MiniGrid](https://github.com/maximecb/gym-minigrid). * _Task Abstraction_: Tasks and environments are decoupled in AllenAct, enabling researchers to easily implement a large variety of tasks in the same environment. * _Algorithms_: Support for a variety of on-policy algorithms including [PPO](https://arxiv.org/pdf/1707.06347.pdf), [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf), [A2C](https://arxiv.org/pdf/1611.05763.pdf), Imitation Learning and [DAgger](https://www.ri.cmu.edu/pub_files/2011/4/Ross-AISTATS11-NoRegret.pdf) as well as offline training such as offline IL. * _Sequential Algorithms_: It is trivial to experiment with different sequences of training routines, which are often the key to successful policies. * _Simultaneous Losses_: Easily combine various losses while training models (e.g. use an external self-supervised loss while optimizing a PPO loss). * _Multi-agent support_: Support for multi-agent algorithms and tasks. * _Visualizations_: Out of the box support to easily visualize first and third person views for agents as well as intermediate model tensors, integrated into Tensorboard. * _Pre-trained models_: Code and models for a number of standard Embodied AI tasks. * _Tutorials_: Start-up code and extensive tutorials to help ramp up to Embodied AI. * _First-class PyTorch support_: One of the few RL frameworks to target PyTorch. * _Arbitrary action spaces_: Supporting both discrete and continuous actions. |Environments|Tasks|Algorithms| |------------|-----|----------| |[iTHOR](https://ai2thor.allenai.org/ithor/), [RoboTHOR](https://ai2thor.allenai.org/robothor/), [Habitat](https://aihabitat.org/), [MiniGrid](https://github.com/maximecb/gym-minigrid), [OpenAI Gym](https://gym.openai.com/)|[PointNav](https://arxiv.org/pdf/1807.06757.pdf), [ObjectNav](https://arxiv.org/pdf/2006.13171.pdf), [MiniGrid tasks](https://github.com/maximecb/gym-minigrid), [Gym Box2D tasks](https://gym.openai.com/envs/#box2d)|[A2C](https://arxiv.org/pdf/1611.05763.pdf), [PPO](https://arxiv.org/pdf/1707.06347.pdf), [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf), [DAgger](https://www.ri.cmu.edu/pub_files/2011/4/Ross-AISTATS11-NoRegret.pdf), Off-policy Imitation| ## Contributions We welcome contributions from the greater community. If you would like to make such a contributions we recommend first submitting an [issue](https://github.com/allenai/allenact/issues) describing your proposed improvement. Doing so can ensure we can validate your suggestions before you spend a great deal of time upon them. Improvements and bug fixes should be made via a pull request from your fork of the repository at [https://github.com/allenai/allenact](https://github.com/allenai/allenact). All code in this repository is subject to formatting, documentation, and type-annotation guidelines. For more details, please see the our [contribution guidelines](CONTRIBUTING.md). ## Acknowledgments This work builds upon the [pytorch-a2c-ppo-acktr](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail) library of Ilya Kostrikov and uses some data structures from FAIR's [habitat-lab](https://github.com/facebookresearch/habitat-lab). We would like to thank Dustin Schwenk for his help for the public release of the framework. ## License AllenAct is MIT licensed, as found in the [LICENSE](LICENSE) file. ## Team AllenAct is an open-source project built by members of the PRIOR research group at the Allen Institute for Artificial Intelligence (AI2).

## Citation If you use this work, please cite our [paper](https://arxiv.org/abs/2008.12760): ```bibtex @article{AllenAct, author = {Luca Weihs and Jordi Salvador and Klemen Kotar and Unnat Jain and Kuo-Hao Zeng and Roozbeh Mottaghi and Aniruddha Kembhavi}, title = {AllenAct: A Framework for Embodied AI Research}, year = {2020}, journal = {arXiv preprint arXiv:2008.12760}, } ``` ================================================ FILE: ROADMAP.md ================================================ # Roadmap Here we track new features/support to be added in the short/mid-term. ## New environments * [SAPIEN](https://sapien.ucsd.edu/) * [ThreeDWorld](http://www.threedworld.org/) ## New tasks * [Room-to-room navigation](https://arxiv.org/pdf/1711.07280.pdf) * [Furniture Lifting](https://arxiv.org/abs/1904.05879) and [Furniture Moving](https://arxiv.org/abs/2007.04979) ## New training methods * A3C * Deep Q-Learning ================================================ FILE: allenact/__init__.py ================================================ try: # noinspection PyProtectedMember,PyUnresolvedReferences from allenact._version import __version__ except ModuleNotFoundError: __version__ = None ================================================ FILE: allenact/_constants.py ================================================ import os from pathlib import Path ALLENACT_INSTALL_DIR = os.path.abspath(os.path.dirname(Path(__file__))) ================================================ FILE: allenact/algorithms/__init__.py ================================================ ================================================ FILE: allenact/algorithms/offpolicy_sync/__init__.py ================================================ ================================================ FILE: allenact/algorithms/offpolicy_sync/losses/__init__.py ================================================ ================================================ FILE: allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss.py ================================================ """Defining abstract loss classes for actor critic models.""" import abc from typing import Dict, Tuple, TypeVar, Generic import torch from allenact.algorithms.onpolicy_sync.policy import ObservationType from allenact.base_abstractions.misc import Loss, Memory ModelType = TypeVar("ModelType") class AbstractOffPolicyLoss(Generic[ModelType], Loss): """Abstract class representing an off-policy loss function used to train a model.""" # noinspection PyMethodOverriding @abc.abstractmethod def loss( # type: ignore self, *, # No positional arguments step_count: int, model: ModelType, batch: ObservationType, memory: Memory, **kwargs, ) -> Tuple[torch.FloatTensor, Dict[str, float], Memory, int]: """Computes the loss. Loss after processing a batch of data with (part of) a model (possibly with memory). # Parameters model: model to run on data batch (both assumed to be on the same device) batch: data to use as input for model (already on the same device as model) memory: model memory before processing current data batch # Returns A tuple with: current_loss: total loss current_info: additional information about the current loss memory: model memory after processing current data batch bsize: batch size """ raise NotImplementedError() ================================================ FILE: allenact/algorithms/onpolicy_sync/__init__.py ================================================ ================================================ FILE: allenact/algorithms/onpolicy_sync/engine.py ================================================ """Defines the reinforcement learning `OnPolicyRLEngine`.""" import datetime import logging import numbers import os import random import time import traceback from functools import partial from multiprocessing.context import BaseContext from typing import Any, Dict, List, Optional, Sequence, Union, cast import filelock import torch import torch.distributed as dist # type: ignore import torch.distributions # type: ignore import torch.multiprocessing as mp # type: ignore import torch.nn as nn import torch.optim as optim # noinspection PyProtectedMember from torch._C._distributed_c10d import ReduceOp from allenact.algorithms.onpolicy_sync.misc import TrackingInfo, TrackingInfoType from allenact.base_abstractions.sensor import Sensor from allenact.utils.misc_utils import str2bool from allenact.utils.model_utils import md5_hash_of_state_dict try: # noinspection PyProtectedMember,PyUnresolvedReferences from torch.optim.lr_scheduler import _LRScheduler except (ImportError, ModuleNotFoundError): raise ImportError("`_LRScheduler` was not found in `torch.optim.lr_scheduler`") from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ) from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel from allenact.algorithms.onpolicy_sync.storage import ( ExperienceStorage, MiniBatchStorageMixin, RolloutStorage, StreamingStorageMixin, ) from allenact.algorithms.onpolicy_sync.vector_sampled_tasks import ( COMPLETE_TASK_CALLBACK_KEY, COMPLETE_TASK_METRICS_KEY, SingleProcessVectorSampledTasks, VectorSampledTasks, ) from allenact.base_abstractions.distributions import TeacherForcingDistr from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams from allenact.base_abstractions.misc import ( ActorCriticOutput, GenericAbstractLoss, Memory, RLStepResult, ) from allenact.utils import spaces_utils as su from allenact.utils.experiment_utils import ( LoggingPackage, PipelineStage, ScalarMeanTracker, StageComponent, TrainingPipeline, set_deterministic_cudnn, set_seed, ) from allenact.utils.system import get_logger from allenact.utils.tensor_utils import batch_observations, detach_recursively from allenact.utils.viz_utils import VizSuite try: # When debugging we don't want to timeout in the VectorSampledTasks # noinspection PyPackageRequirements import pydevd DEBUGGING = str2bool(os.getenv("ALLENACT_DEBUG", "true")) except ImportError: DEBUGGING = str2bool(os.getenv("ALLENACT_DEBUG", "false")) DEBUG_VST_TIMEOUT: Optional[int] = (lambda x: int(x) if x is not None else x)( os.getenv("ALLENACT_DEBUG_VST_TIMEOUT", None) ) TRAIN_MODE_STR = "train" VALID_MODE_STR = "valid" TEST_MODE_STR = "test" class OnPolicyRLEngine(object): """The reinforcement learning primary controller. This `OnPolicyRLEngine` class handles all training, validation, and testing as well as logging and checkpointing. You are not expected to instantiate this class yourself, instead you should define an experiment which will then be used to instantiate an `OnPolicyRLEngine` and perform any desired tasks. """ def __init__( self, experiment_name: str, config: ExperimentConfig, results_queue: mp.Queue, # to output aggregated results checkpoints_queue: Optional[ mp.Queue ], # to write/read (trainer/evaluator) ready checkpoints checkpoints_dir: str, mode: str = "train", callback_sensors: Optional[Sequence[Sensor]] = None, seed: Optional[int] = None, deterministic_cudnn: bool = False, mp_ctx: Optional[BaseContext] = None, worker_id: int = 0, num_workers: int = 1, device: Union[str, torch.device, int] = "cpu", distributed_ip: str = "127.0.0.1", distributed_port: int = 0, deterministic_agents: bool = False, max_sampler_processes_per_worker: Optional[int] = None, initial_model_state_dict: Optional[Union[Dict[str, Any], int]] = None, try_restart_after_task_error: bool = False, **kwargs, ): """Initializer. # Parameters config : The ExperimentConfig defining the experiment to run. output_dir : Root directory at which checkpoints and logs should be saved. seed : Seed used to encourage deterministic behavior (it is difficult to ensure completely deterministic behavior due to CUDA issues and nondeterminism in environments). mode : "train", "valid", or "test". deterministic_cudnn : Whether to use deterministic cudnn. If `True` this may lower training performance this is necessary (but not sufficient) if you desire deterministic behavior. extra_tag : An additional label to add to the experiment when saving tensorboard logs. """ self.config = config self.results_queue = results_queue self.checkpoints_queue = checkpoints_queue self.mp_ctx = mp_ctx self.checkpoints_dir = checkpoints_dir self.worker_id = worker_id self.num_workers = num_workers self.device = torch.device("cpu") if device == -1 else torch.device(device) # type: ignore if self.device != torch.device("cpu"): torch.cuda.set_device(device) self.distributed_ip = distributed_ip self.distributed_port = distributed_port self.try_restart_after_task_error = try_restart_after_task_error self.mode = mode.lower().strip() assert self.mode in [ TRAIN_MODE_STR, VALID_MODE_STR, TEST_MODE_STR, ], f"Only {TRAIN_MODE_STR}, {VALID_MODE_STR}, {TEST_MODE_STR}, modes supported" self.callback_sensors = callback_sensors self.deterministic_cudnn = deterministic_cudnn if self.deterministic_cudnn: set_deterministic_cudnn() self.seed = seed set_seed(self.seed) self.experiment_name = experiment_name assert ( max_sampler_processes_per_worker is None or max_sampler_processes_per_worker >= 1 ), "`max_sampler_processes_per_worker` must be either `None` or a positive integer." self.max_sampler_processes_per_worker = max_sampler_processes_per_worker machine_params = config.machine_params(self.mode) self.machine_params: MachineParams if isinstance(machine_params, MachineParams): self.machine_params = machine_params else: self.machine_params = MachineParams(**machine_params) self.num_samplers_per_worker = self.machine_params.nprocesses self.num_samplers = self.num_samplers_per_worker[self.worker_id] self._vector_tasks: Optional[ Union[VectorSampledTasks, SingleProcessVectorSampledTasks] ] = None self.sensor_preprocessor_graph = None self.actor_critic: Optional[ActorCriticModel] = None create_model_kwargs = {} if self.machine_params.sensor_preprocessor_graph is not None: self.sensor_preprocessor_graph = ( self.machine_params.sensor_preprocessor_graph.to(self.device) ) create_model_kwargs["sensor_preprocessor_graph"] = ( self.sensor_preprocessor_graph ) set_seed(self.seed) self.actor_critic = cast( ActorCriticModel, self.config.create_model(**create_model_kwargs), ).to(self.device) if initial_model_state_dict is not None: if isinstance(initial_model_state_dict, int): assert ( md5_hash_of_state_dict(self.actor_critic.state_dict()) == initial_model_state_dict ), ( f"Could not reproduce the correct model state dict on worker {self.worker_id} despite seeding." f" Please ensure that your model's initialization is reproducable when `set_seed(...)`" f"] has been called with a fixed seed before initialization." ) else: self.actor_critic.load_state_dict( state_dict=cast( "OrderedDict[str, Tensor]", initial_model_state_dict ) ) else: assert mode != TRAIN_MODE_STR or self.num_workers == 1, ( "When training with multiple workers you must pass a," " non-`None` value for the `initial_model_state_dict` argument." ) if get_logger().level == logging.DEBUG: model_hash = md5_hash_of_state_dict(self.actor_critic.state_dict()) get_logger().debug( f"[{self.mode} worker {self.worker_id}] model weights hash: {model_hash}" ) self.is_distributed = False self.store: Optional[torch.distributed.TCPStore] = None # type:ignore if self.num_workers > 1: self.store = torch.distributed.TCPStore( # type:ignore host_name=self.distributed_ip, port=self.distributed_port, world_size=self.num_workers, is_master=self.worker_id == 0, timeout=datetime.timedelta( seconds=3 * (DEBUG_VST_TIMEOUT if DEBUGGING else 1 * 60) + 300 ), ) cpu_device = self.device == torch.device("cpu") # type:ignore # "gloo" required during testing to ensure that `barrier()` doesn't time out. backend = "gloo" if cpu_device or self.mode == TEST_MODE_STR else "nccl" get_logger().debug( f"Worker {self.worker_id}: initializing distributed {backend} backend with device {self.device}." ) dist.init_process_group( # type:ignore backend=backend, store=self.store, rank=self.worker_id, world_size=self.num_workers, # During testing, we sometimes found that default timeout was too short # resulting in the run terminating surprisingly, we increase it here. timeout=( datetime.timedelta(minutes=3000) if (self.mode == TEST_MODE_STR or DEBUGGING) else dist.default_pg_timeout ), ) self.is_distributed = True self.deterministic_agents = deterministic_agents self._is_closing: bool = ( False # Useful for letting the RL runner know if this is closing ) self._is_closed: bool = False # Keeping track of metrics and losses during training/inference self.single_process_metrics: List = [] self.single_process_task_callback_data: List = [] self.tracking_info_list: List[TrackingInfo] = [] # Variables that wil only be instantiated in the trainer self.optimizer: Optional[optim.optimizer.Optimizer] = None # noinspection PyProtectedMember self.lr_scheduler: Optional[_LRScheduler] = None self.insufficient_data_for_update: Optional[torch.distributed.PrefixStore] = ( None ) # Training pipeline will be instantiated during training and inference. # During inference however, it will be instantiated anew on each run of `run_eval` # and will be set to `None` after the eval run is complete. self.training_pipeline: Optional[TrainingPipeline] = None @property def vector_tasks( self, ) -> Union[VectorSampledTasks, SingleProcessVectorSampledTasks]: if self._vector_tasks is None and self.num_samplers > 0: if self.is_distributed: total_processes = sum( self.num_samplers_per_worker ) # TODO this will break the fixed seed for multi-device test else: total_processes = self.num_samplers seeds = self.worker_seeds( total_processes, initial_seed=self.seed, # do not update the RNG state (creation might happen after seed resetting) ) # TODO: The `self.max_sampler_processes_per_worker == 1` case below would be # great to have but it does not play nicely with us wanting to kill things # using SIGTERM/SIGINT signals. Would be nice to figure out a solution to # this at some point. # if self.max_sampler_processes_per_worker == 1: # # No need to instantiate a new task sampler processes if we're # # restricted to one sampler process for this worker. # self._vector_tasks = SingleProcessVectorSampledTasks( # make_sampler_fn=self.config.make_sampler_fn, # sampler_fn_args_list=self.get_sampler_fn_args(seeds), # ) # else: self._vector_tasks = VectorSampledTasks( make_sampler_fn=self.config.make_sampler_fn, sampler_fn_args=self.get_sampler_fn_args(seeds), callback_sensors=self.callback_sensors, multiprocessing_start_method=( "forkserver" if self.mp_ctx is None else None ), mp_ctx=self.mp_ctx, max_processes=self.max_sampler_processes_per_worker, read_timeout=DEBUG_VST_TIMEOUT if DEBUGGING else 1 * 60, ) return self._vector_tasks @staticmethod def worker_seeds(nprocesses: int, initial_seed: Optional[int]) -> List[int]: """Create a collection of seeds for workers without modifying the RNG state.""" rstate = None # type:ignore if initial_seed is not None: rstate = random.getstate() random.seed(initial_seed) seeds = [random.randint(0, (2**31) - 1) for _ in range(nprocesses)] if initial_seed is not None: random.setstate(rstate) return seeds def get_sampler_fn_args(self, seeds: Optional[List[int]] = None): sampler_devices = self.machine_params.sampler_devices if self.mode == TRAIN_MODE_STR: fn = self.config.train_task_sampler_args elif self.mode == VALID_MODE_STR: fn = self.config.valid_task_sampler_args elif self.mode == TEST_MODE_STR: fn = self.config.test_task_sampler_args else: raise NotImplementedError( f"self.mode must be one of {TRAIN_MODE_STR}, {VALID_MODE_STR}, or {TEST_MODE_STR}." ) if self.is_distributed: total_processes = sum(self.num_samplers_per_worker) process_offset = sum(self.num_samplers_per_worker[: self.worker_id]) else: total_processes = self.num_samplers process_offset = 0 sampler_devices_as_ints: Optional[List[int]] = None if ( self.is_distributed or self.mode == TEST_MODE_STR ) and self.device.index is not None: sampler_devices_as_ints = [self.device.index] elif sampler_devices is not None: sampler_devices_as_ints = [ -1 if sd.index is None else sd.index for sd in sampler_devices ] return [ fn( process_ind=process_offset + it, total_processes=total_processes, devices=sampler_devices_as_ints, seeds=seeds, ) for it in range(self.num_samplers) ] def checkpoint_load( self, ckpt: Union[str, Dict[str, Any]], restart_pipeline: bool ) -> Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]]: if isinstance(ckpt, str): get_logger().info( f"[{self.mode} worker {self.worker_id}] Loading checkpoint from {ckpt}" ) # Map location CPU is almost always better than mapping to a CUDA device. ckpt = torch.load(os.path.abspath(ckpt), map_location="cpu") ckpt = cast( Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]], ckpt, ) self.actor_critic.load_state_dict(ckpt["model_state_dict"]) # type:ignore if "training_pipeline_state_dict" in ckpt and not restart_pipeline: self.training_pipeline.load_state_dict( cast(Dict[str, Any], ckpt["training_pipeline_state_dict"]) ) return ckpt # aggregates task metrics currently in queue def aggregate_task_metrics( self, logging_pkg: LoggingPackage, num_tasks: int = -1, ) -> LoggingPackage: if num_tasks > 0: if len(self.single_process_metrics) != num_tasks: error_msg = ( "shorter" if len(self.single_process_metrics) < num_tasks else "longer" ) get_logger().error( f"Metrics out is {error_msg} than expected number of tasks." " This should only happen if a positive number of `num_tasks` were" " set during testing but the queue did not contain this number of entries." " Please file an issue at https://github.com/allenai/allenact/issues." ) num_empty_tasks_dequeued = 0 for metrics_dict in self.single_process_metrics: num_empty_tasks_dequeued += not logging_pkg.add_metrics_dict( single_task_metrics_dict=metrics_dict ) self.single_process_metrics = [] if num_empty_tasks_dequeued != 0: get_logger().warning( f"Discarded {num_empty_tasks_dequeued} empty task metrics" ) return logging_pkg def _preprocess_observations(self, batched_observations): if self.sensor_preprocessor_graph is None: return batched_observations return self.sensor_preprocessor_graph.get_observations(batched_observations) def remove_paused(self, observations): paused, keep, running = [], [], [] for it, obs in enumerate(observations): if obs is None: paused.append(it) else: keep.append(it) running.append(obs) for p in reversed(paused): self.vector_tasks.pause_at(p) # Group samplers along new dim: batch = batch_observations(running, device=self.device) return len(paused), keep, batch def initialize_storage_and_viz( self, storage_to_initialize: Optional[Sequence[ExperienceStorage]], visualizer: Optional[VizSuite] = None, ): keep: Optional[List] = None if visualizer is not None or ( storage_to_initialize is not None and any(isinstance(s, RolloutStorage) for s in storage_to_initialize) ): # No rollout storage, thus we are not observations = self.vector_tasks.get_observations() npaused, keep, batch = self.remove_paused(observations) observations = ( self._preprocess_observations(batch) if len(keep) > 0 else batch ) assert npaused == 0, f"{npaused} samplers are paused during initialization." num_samplers = len(keep) else: observations = {} num_samplers = 0 npaused = 0 recurrent_memory_specification = ( self.actor_critic.recurrent_memory_specification ) if storage_to_initialize is not None: for s in storage_to_initialize: s.to(self.device) s.set_partition(index=self.worker_id, num_parts=self.num_workers) s.initialize( observations=observations, num_samplers=num_samplers, recurrent_memory_specification=recurrent_memory_specification, action_space=self.actor_critic.action_space, ) if visualizer is not None and num_samplers > 0: visualizer.collect(vector_task=self.vector_tasks, alive=keep) return npaused @property def num_active_samplers(self): if self.vector_tasks is None: return 0 return self.vector_tasks.num_unpaused_tasks def act( self, rollout_storage: RolloutStorage, dist_wrapper_class: Optional[type] = None, ): with torch.no_grad(): agent_input = rollout_storage.agent_input_for_next_step() actor_critic_output, memory = self.actor_critic(**agent_input) distr = actor_critic_output.distributions if dist_wrapper_class is not None: distr = dist_wrapper_class(distr=distr, obs=agent_input["observations"]) actions = distr.sample() if not self.deterministic_agents else distr.mode() return actions, actor_critic_output, memory, agent_input["observations"] def aggregate_and_send_logging_package( self, tracking_info_list: List[TrackingInfo], logging_pkg: Optional[LoggingPackage] = None, send_logging_package: bool = True, checkpoint_file_name: Optional[str] = None, ): if logging_pkg is None: logging_pkg = LoggingPackage( mode=self.mode, training_steps=self.training_pipeline.total_steps, pipeline_stage=self.training_pipeline.current_stage_index, storage_uuid_to_total_experiences=self.training_pipeline.storage_uuid_to_total_experiences, checkpoint_file_name=checkpoint_file_name, ) self.aggregate_task_metrics(logging_pkg=logging_pkg) for callback_dict in self.single_process_task_callback_data: logging_pkg.task_callback_data.append(callback_dict) self.single_process_task_callback_data = [] for tracking_info in tracking_info_list: if tracking_info.n < 0: get_logger().warning( f"Obtained a train_info_dict with {tracking_info.n} elements." f" Full info: ({tracking_info.type}, {tracking_info.info}, {tracking_info.n})." ) else: tracking_info_dict = tracking_info.info if tracking_info.type == TrackingInfoType.LOSS: tracking_info_dict = { f"losses/{k}": v for k, v in tracking_info_dict.items() } logging_pkg.add_info_dict( info_dict=tracking_info_dict, n=tracking_info.n, stage_component_uuid=tracking_info.stage_component_uuid, storage_uuid=tracking_info.storage_uuid, ) if send_logging_package: self.results_queue.put(logging_pkg) return logging_pkg @staticmethod def _active_memory(memory, keep): return memory.sampler_select(keep) if memory is not None else memory def probe(self, dones: List[bool], npaused, period=100000): """Debugging util. When called from self.collect_step_across_all_task_samplers(...), calls render for the 0-th task sampler of the 0-th distributed worker for the first beginning episode spaced at least period steps from the beginning of the previous one. For valid, train, it currently renders all episodes for the 0-th task sampler of the 0-th distributed worker. If this is not wanted, it must be hard-coded for now below. # Parameters dones : dones list from self.collect_step_across_all_task_samplers(...) npaused : number of newly paused tasks returned by self.removed_paused(...) period : minimal spacing in sampled steps between the beginning of episodes to be shown. """ sampler_id = 0 done = dones[sampler_id] if self.mode != TRAIN_MODE_STR: setattr( self, "_probe_npaused", getattr(self, "_probe_npaused", 0) + npaused ) if self._probe_npaused == self.num_samplers: # type:ignore del self._probe_npaused # type:ignore return period = 0 if self.worker_id == 0: if done: if period > 0 and ( getattr(self, "_probe_steps", None) is None or ( self._probe_steps < 0 # type:ignore and ( self.training_pipeline.total_steps + self._probe_steps # type:ignore ) >= period ) ): self._probe_steps = self.training_pipeline.total_steps if period == 0 or ( getattr(self, "_probe_steps", None) is not None and self._probe_steps >= 0 and ((self.training_pipeline.total_steps - self._probe_steps) < period) ): if ( period == 0 or not done or self._probe_steps == self.training_pipeline.total_steps ): self.vector_tasks.call_at(sampler_id, "render", ["human"]) else: # noinspection PyAttributeOutsideInit self._probe_steps = -self._probe_steps def collect_step_across_all_task_samplers( self, rollout_storage_uuid: str, uuid_to_storage: Dict[str, ExperienceStorage], visualizer=None, dist_wrapper_class=None, ) -> int: rollout_storage = cast(RolloutStorage, uuid_to_storage[rollout_storage_uuid]) actions, actor_critic_output, memory, _ = self.act( rollout_storage=rollout_storage, dist_wrapper_class=dist_wrapper_class, ) # Flatten actions flat_actions = su.flatten(self.actor_critic.action_space, actions) assert len(flat_actions.shape) == 3, ( "Distribution samples must include step and task sampler dimensions [step, sampler, ...]. The simplest way" "to accomplish this is to pass param tensors (like `logits` in a `CategoricalDistr`) with these dimensions" "to the Distribution." ) # Convert flattened actions into list of actions and send them outputs: List[RLStepResult] = self.vector_tasks.step( su.action_list(self.actor_critic.action_space, flat_actions) ) # Save after task completion metrics for step_result in outputs: if step_result.info is not None: if COMPLETE_TASK_METRICS_KEY in step_result.info: self.single_process_metrics.append( step_result.info[COMPLETE_TASK_METRICS_KEY] ) del step_result.info[COMPLETE_TASK_METRICS_KEY] if COMPLETE_TASK_CALLBACK_KEY in step_result.info: self.single_process_task_callback_data.append( step_result.info[COMPLETE_TASK_CALLBACK_KEY] ) del step_result.info[COMPLETE_TASK_CALLBACK_KEY] rewards: Union[List, torch.Tensor] observations, rewards, dones, infos = [list(x) for x in zip(*outputs)] rewards = torch.tensor( rewards, dtype=torch.float, device=self.device, # type:ignore ) # We want rewards to have dimensions [sampler, reward] if len(rewards.shape) == 1: # Rewards are of shape [sampler,] rewards = rewards.unsqueeze(-1) elif len(rewards.shape) > 1: raise NotImplementedError() # If done then clean the history of observations. masks = ( 1.0 - torch.tensor( dones, dtype=torch.float32, device=self.device, # type:ignore ) ).view( -1, 1 ) # [sampler, 1] npaused, keep, batch = self.remove_paused(observations) if hasattr(self.actor_critic, "sampler_select"): self.actor_critic.sampler_select(keep) # TODO self.probe(...) can be useful for debugging (we might want to control it from main?) # self.probe(dones, npaused) if npaused > 0: if self.mode == TRAIN_MODE_STR: raise NotImplementedError( "When trying to get a new task from a task sampler (using the `.next_task()` method)" " the task sampler returned `None`. This is not currently supported during training" " (and almost certainly a bug in the implementation of the task sampler or in the " " initialization of the task sampler for training)." ) for s in uuid_to_storage.values(): if isinstance(s, RolloutStorage): s.sampler_select(keep) to_add_to_storage = dict( observations=( self._preprocess_observations(batch) if len(keep) > 0 else batch ), memory=self._active_memory(memory, keep), actions=flat_actions[0, keep], action_log_probs=actor_critic_output.distributions.log_prob(actions)[ 0, keep ], value_preds=actor_critic_output.values[0, keep], rewards=rewards[keep], masks=masks[keep], ) for storage in uuid_to_storage.values(): storage.add(**to_add_to_storage) # TODO we always miss tensors for the last action in the last episode of each worker if visualizer is not None: if len(keep) > 0: visualizer.collect( rollout=rollout_storage, vector_task=self.vector_tasks, alive=keep, actor_critic=actor_critic_output, ) else: visualizer.collect(actor_critic=actor_critic_output) return npaused def distributed_weighted_sum( self, to_share: Union[torch.Tensor, float, int], weight: Union[torch.Tensor, float, int], ): """Weighted sum of scalar across distributed workers.""" if self.is_distributed: aggregate = torch.tensor(to_share * weight).to(self.device) dist.all_reduce(aggregate) return aggregate.item() else: if abs(1 - weight) > 1e-5: get_logger().warning( f"Scaling non-distributed value with weight {weight}" ) return torch.tensor(to_share * weight).item() def distributed_reduce( self, to_share: Union[torch.Tensor, float, int], op: ReduceOp ): """Weighted sum of scalar across distributed workers.""" if self.is_distributed: aggregate = torch.tensor(to_share).to(self.device) dist.all_reduce(aggregate, op=op) return aggregate.item() else: return torch.tensor(to_share).item() def backprop_step( self, total_loss: torch.Tensor, max_grad_norm: float, local_to_global_batch_size_ratio: float = 1.0, ): raise NotImplementedError def save_error_data(self, batch: Dict[str, Any]): raise NotImplementedError @property def step_count(self) -> int: if ( self.training_pipeline.current_stage is None ): # Might occur during testing when all stages are complete return 0 return self.training_pipeline.current_stage.steps_taken_in_stage def compute_losses_track_them_and_backprop( self, stage: PipelineStage, stage_component: StageComponent, storage: ExperienceStorage, skip_backprop: bool = False, ): training = self.mode == TRAIN_MODE_STR assert training or skip_backprop if training and self.is_distributed: self.insufficient_data_for_update.set( "insufficient_data_for_update", str(0) ) dist.barrier( device_ids=( None if self.device == torch.device("cpu") else [self.device.index] ) ) training_settings = stage_component.training_settings loss_names = stage_component.loss_names losses = [self.training_pipeline.get_loss(ln) for ln in loss_names] loss_weights = [stage.uuid_to_loss_weight[ln] for ln in loss_names] loss_update_repeats_list = training_settings.update_repeats if isinstance(loss_update_repeats_list, numbers.Integral): loss_update_repeats_list = [loss_update_repeats_list] * len(loss_names) if skip_backprop and isinstance(storage, MiniBatchStorageMixin): if loss_update_repeats_list != [1] * len(loss_names): loss_update_repeats_list = [1] * len(loss_names) get_logger().warning( "Does not make sense to do multiple updates when" " skip_backprop is `True` and you are using a storage of type" " `MiniBatchStorageMixin`. This is likely a problem caused by" " using a custom valid/test stage component that is inheriting its" " TrainingSettings from the TrainingPipeline's TrainingSettings. We will override" " the requested number of updates repeats (which was" f" {dict(zip(loss_names, loss_update_repeats_list))}) to be 1 for all losses." ) enough_data_for_update = True for current_update_repeat_index in range( max(loss_update_repeats_list, default=0) ): if isinstance(storage, MiniBatchStorageMixin): batch_iterator = storage.batched_experience_generator( num_mini_batch=training_settings.num_mini_batch ) elif isinstance(storage, StreamingStorageMixin): assert ( training_settings.num_mini_batch is None or training_settings.num_mini_batch == 1 ) def single_batch_generator(streaming_storage: StreamingStorageMixin): try: yield cast( StreamingStorageMixin, streaming_storage ).next_batch() except EOFError: if not training: raise if streaming_storage.empty(): yield None else: cast( StreamingStorageMixin, streaming_storage ).reset_stream() stage.stage_component_uuid_to_stream_memory[ stage_component.uuid ].clear() yield cast( StreamingStorageMixin, streaming_storage ).next_batch() batch_iterator = single_batch_generator(streaming_storage=storage) else: raise NotImplementedError( f"Storage {storage} must be a subclass of `MiniBatchStorageMixin` or `StreamingStorageMixin`." ) for batch in batch_iterator: if batch is None: # This should only happen in a `StreamingStorageMixin` when it cannot # generate an initial batch or when we are in testing/validation and # we've reached the end of the dataset over which to test/validate. if training: assert isinstance(storage, StreamingStorageMixin) get_logger().warning( f"Worker {self.worker_id}: could not run update in {storage}, potentially because" f" not enough data has been accumulated to be able to fill an initial batch." ) else: pass enough_data_for_update = False if training and self.is_distributed: self.insufficient_data_for_update.add( "insufficient_data_for_update", 1 * (not enough_data_for_update), ) dist.barrier( device_ids=( None if self.device == torch.device("cpu") else [self.device.index] ) ) if ( int( self.insufficient_data_for_update.get( "insufficient_data_for_update" ) ) != 0 ): enough_data_for_update = False break info: Dict[str, float] = {} bsize: Optional[int] = None total_loss: Optional[torch.Tensor] = None actor_critic_output_for_batch: Optional[ActorCriticOutput] = None batch_memory = Memory() for loss, loss_name, loss_weight, max_update_repeats_for_loss in zip( losses, loss_names, loss_weights, loss_update_repeats_list ): if current_update_repeat_index >= max_update_repeats_for_loss: continue if isinstance(loss, AbstractActorCriticLoss): bsize = batch["bsize"] if actor_critic_output_for_batch is None: try: actor_critic_output_for_batch, _ = self.actor_critic( observations=batch["observations"], memory=batch["memory"], prev_actions=batch["prev_actions"], masks=batch["masks"], ) except ValueError: save_path = self.save_error_data(batch=batch) get_logger().error( f"Encountered a value error! Likely because of nans in the output/input." f" Saving all error information to {save_path}." ) raise loss_return = loss.loss( step_count=self.step_count, batch=batch, actor_critic_output=actor_critic_output_for_batch, ) per_epoch_info = {} if len(loss_return) == 2: current_loss, current_info = loss_return elif len(loss_return) == 3: current_loss, current_info, per_epoch_info = loss_return else: raise NotImplementedError elif isinstance(loss, GenericAbstractLoss): loss_output = loss.loss( model=self.actor_critic, batch=batch, batch_memory=batch_memory, stream_memory=stage.stage_component_uuid_to_stream_memory[ stage_component.uuid ], ) current_loss = loss_output.value current_info = loss_output.info per_epoch_info = loss_output.per_epoch_info batch_memory = loss_output.batch_memory stage.stage_component_uuid_to_stream_memory[ stage_component.uuid ] = loss_output.stream_memory bsize = loss_output.bsize else: raise NotImplementedError( f"Loss of type {type(loss)} is not supported. Losses must be subclasses of" f" `AbstractActorCriticLoss` or `GenericAbstractLoss`." ) if total_loss is None: total_loss = loss_weight * current_loss else: total_loss = total_loss + loss_weight * current_loss for key, value in current_info.items(): info[f"{loss_name}/{key}"] = value if per_epoch_info is not None: for key, value in per_epoch_info.items(): if max(loss_update_repeats_list, default=0) > 1: info[ f"{loss_name}/{key}_epoch{current_update_repeat_index:02d}" ] = value info[f"{loss_name}/{key}_combined"] = value else: info[f"{loss_name}/{key}"] = value assert total_loss is not None, ( f"No {stage_component.uuid} losses specified for training in stage" f" {self.training_pipeline.current_stage_index}" ) total_loss_scalar = total_loss.item() info[f"total_loss"] = total_loss_scalar self.tracking_info_list.append( TrackingInfo( type=TrackingInfoType.LOSS, info=info, n=bsize, storage_uuid=stage_component.storage_uuid, stage_component_uuid=stage_component.uuid, ) ) to_track = { "rollout_epochs": max(loss_update_repeats_list, default=0), "worker_batch_size": bsize, } aggregate_bsize = None if training: aggregate_bsize = self.distributed_weighted_sum(bsize, 1) to_track["global_batch_size"] = aggregate_bsize to_track["lr"] = self.optimizer.param_groups[0]["lr"] if training_settings.num_mini_batch is not None: to_track["rollout_num_mini_batch"] = ( training_settings.num_mini_batch ) for k, v in to_track.items(): # We need to set the bsize to 1 for `worker_batch_size` below as we're trying to record the # average batch size per worker, not the average per worker weighted by the size of the batches # of those workers. self.tracking_info_list.append( TrackingInfo( type=TrackingInfoType.UPDATE_INFO, info={k: v}, n=1 if k == "worker_batch_size" else bsize, storage_uuid=stage_component.storage_uuid, stage_component_uuid=stage_component.uuid, ) ) if not skip_backprop: total_grad_norm = self.backprop_step( total_loss=total_loss, max_grad_norm=training_settings.max_grad_norm, local_to_global_batch_size_ratio=bsize / aggregate_bsize, ) self.tracking_info_list.append( TrackingInfo( type=TrackingInfoType.UPDATE_INFO, info={"total_grad_norm": total_grad_norm}, n=bsize, storage_uuid=stage_component.storage_uuid, stage_component_uuid=stage_component.uuid, ) ) stage.stage_component_uuid_to_stream_memory[stage_component.uuid] = ( detach_recursively( input=stage.stage_component_uuid_to_stream_memory[ stage_component.uuid ], inplace=True, ) ) def close(self, verbose=True): self._is_closing = True if "_is_closed" in self.__dict__ and self._is_closed: return def logif(s: Union[str, Exception]): if verbose: if isinstance(s, str): get_logger().info(s) elif isinstance(s, Exception): get_logger().error(traceback.format_exc()) else: raise NotImplementedError() if "_vector_tasks" in self.__dict__ and self._vector_tasks is not None: try: logif( f"[{self.mode} worker {self.worker_id}] Closing OnPolicyRLEngine.vector_tasks." ) self._vector_tasks.close() logif(f"[{self.mode} worker {self.worker_id}] Closed.") except Exception as e: logif( f"[{self.mode} worker {self.worker_id}] Exception raised when closing OnPolicyRLEngine.vector_tasks:" ) logif(e) self._is_closed = True self._is_closing = False @property def is_closed(self): return self._is_closed @property def is_closing(self): return self._is_closing def __del__(self): self.close(verbose=False) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close(verbose=False) class OnPolicyTrainer(OnPolicyRLEngine): def __init__( self, experiment_name: str, config: ExperimentConfig, results_queue: mp.Queue, checkpoints_queue: Optional[mp.Queue], checkpoints_dir: str = "", seed: Optional[int] = None, deterministic_cudnn: bool = False, mp_ctx: Optional[BaseContext] = None, worker_id: int = 0, num_workers: int = 1, device: Union[str, torch.device, int] = "cpu", distributed_ip: str = "127.0.0.1", distributed_port: int = 0, deterministic_agents: bool = False, distributed_preemption_threshold: float = 0.7, max_sampler_processes_per_worker: Optional[int] = None, save_ckpt_after_every_pipeline_stage: bool = True, first_local_worker_id: int = 0, save_ckpt_at_every_host: bool = False, **kwargs, ): kwargs["mode"] = TRAIN_MODE_STR super().__init__( experiment_name=experiment_name, config=config, results_queue=results_queue, checkpoints_queue=checkpoints_queue, checkpoints_dir=checkpoints_dir, seed=seed, deterministic_cudnn=deterministic_cudnn, mp_ctx=mp_ctx, worker_id=worker_id, num_workers=num_workers, device=device, distributed_ip=distributed_ip, distributed_port=distributed_port, deterministic_agents=deterministic_agents, max_sampler_processes_per_worker=max_sampler_processes_per_worker, **kwargs, ) self.save_ckpt_after_every_pipeline_stage = save_ckpt_after_every_pipeline_stage self.actor_critic.train() self.training_pipeline: TrainingPipeline = config.training_pipeline() if self.num_workers != 1: # Ensure that we're only using early stopping criterions in the non-distributed setting. if any( stage.early_stopping_criterion is not None for stage in self.training_pipeline.pipeline_stages ): raise NotImplementedError( "Early stopping criterions are currently only allowed when using a single training worker, i.e." " no distributed (multi-GPU) training. If this is a feature you'd like please create an issue" " at https://github.com/allenai/allenact/issues or (even better) create a pull request with this " " feature and we'll be happy to review it." ) self.optimizer: optim.optimizer.Optimizer = ( self.training_pipeline.optimizer_builder( params=[p for p in self.actor_critic.parameters() if p.requires_grad] ) ) # noinspection PyProtectedMember self.lr_scheduler: Optional[_LRScheduler] = None if self.training_pipeline.lr_scheduler_builder is not None: self.lr_scheduler = self.training_pipeline.lr_scheduler_builder( optimizer=self.optimizer ) if self.is_distributed: # Tracks how many workers have finished their rollout self.num_workers_done = torch.distributed.PrefixStore( # type:ignore "num_workers_done", self.store ) # Tracks the number of steps taken by each worker in current rollout self.num_workers_steps = torch.distributed.PrefixStore( # type:ignore "num_workers_steps", self.store ) self.distributed_preemption_threshold = distributed_preemption_threshold # Flag for finished worker in current epoch self.offpolicy_epoch_done = torch.distributed.PrefixStore( # type:ignore "offpolicy_epoch_done", self.store ) # Flag for finished worker in current epoch with custom component self.insufficient_data_for_update = ( torch.distributed.PrefixStore( # type:ignore "insufficient_data_for_update", self.store ) ) else: self.num_workers_done = None self.num_workers_steps = None self.distributed_preemption_threshold = 1.0 self.offpolicy_epoch_done = None # Keeping track of training state self.former_steps: Optional[int] = None self.last_log: Optional[int] = None self.last_save: Optional[int] = None # The `self._last_aggregated_train_task_metrics` attribute defined # below is used for early stopping criterion computations self._last_aggregated_train_task_metrics: ScalarMeanTracker = ( ScalarMeanTracker() ) self.first_local_worker_id = first_local_worker_id self.save_ckpt_at_every_host = save_ckpt_at_every_host def advance_seed( self, seed: Optional[int], return_same_seed_per_worker=False ) -> Optional[int]: if seed is None: return seed seed = (seed ^ (self.training_pipeline.total_steps + 1)) % ( 2**31 - 1 ) # same seed for all workers if (not return_same_seed_per_worker) and ( self.mode == TRAIN_MODE_STR or self.mode == TEST_MODE_STR ): return self.worker_seeds(self.num_workers, seed)[ self.worker_id ] # doesn't modify the current rng state else: return self.worker_seeds(1, seed)[0] # doesn't modify the current rng state def deterministic_seeds(self) -> None: if self.seed is not None: set_seed(self.advance_seed(self.seed)) # known state for all workers seeds = self.worker_seeds( self.num_samplers, None ) # use the latest seed for workers and update rng state if self.vector_tasks is not None: self.vector_tasks.set_seeds(seeds) def save_error_data(self, batch: Dict[str, Any]) -> str: model_path = os.path.join( self.checkpoints_dir, "error_for_exp_{}__stage_{:02d}__steps_{:012d}.pt".format( self.experiment_name, self.training_pipeline.current_stage_index, self.training_pipeline.total_steps, ), ) with filelock.FileLock( os.path.join(self.checkpoints_dir, "error.lock"), timeout=60 ): if not os.path.exists(model_path): save_dict = { "model_state_dict": self.actor_critic.state_dict(), # type:ignore "total_steps": self.training_pipeline.total_steps, # Total steps including current stage "optimizer_state_dict": self.optimizer.state_dict(), # type: ignore "training_pipeline_state_dict": self.training_pipeline.state_dict(), "trainer_seed": self.seed, "batch": batch, } if self.lr_scheduler is not None: save_dict["scheduler_state"] = cast( _LRScheduler, self.lr_scheduler ).state_dict() torch.save(save_dict, model_path) return model_path def aggregate_and_send_logging_package( self, tracking_info_list: List[TrackingInfo], logging_pkg: Optional[LoggingPackage] = None, send_logging_package: bool = True, checkpoint_file_name: Optional[str] = None, ): logging_pkg = super().aggregate_and_send_logging_package( tracking_info_list=tracking_info_list, logging_pkg=logging_pkg, send_logging_package=send_logging_package, checkpoint_file_name=checkpoint_file_name, ) if self.mode == TRAIN_MODE_STR: # Technically self.mode should always be "train" here (as this is the training engine), # this conditional is defensive self._last_aggregated_train_task_metrics.add_scalars( scalars=logging_pkg.metrics_tracker.means(), n=logging_pkg.metrics_tracker.counts(), ) return logging_pkg def checkpoint_save(self, pipeline_stage_index: Optional[int] = None) -> str: model_path = os.path.join( self.checkpoints_dir, "exp_{}__stage_{:02d}__steps_{:012d}.pt".format( self.experiment_name, ( self.training_pipeline.current_stage_index if pipeline_stage_index is None else pipeline_stage_index ), self.training_pipeline.total_steps, ), ) save_dict = { "model_state_dict": self.actor_critic.state_dict(), # type:ignore "total_steps": self.training_pipeline.total_steps, # Total steps including current stage "optimizer_state_dict": self.optimizer.state_dict(), # type: ignore "training_pipeline_state_dict": self.training_pipeline.state_dict(), "trainer_seed": self.seed, } if self.lr_scheduler is not None: save_dict["scheduler_state"] = cast( _LRScheduler, self.lr_scheduler ).state_dict() torch.save(save_dict, model_path) return model_path def checkpoint_load( self, ckpt: Union[str, Dict[str, Any]], restart_pipeline: bool = False ) -> Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]]: if restart_pipeline: if "training_pipeline_state_dict" in ckpt: del ckpt["training_pipeline_state_dict"] ckpt = super().checkpoint_load(ckpt, restart_pipeline=restart_pipeline) if restart_pipeline: self.training_pipeline.restart_pipeline() else: self.seed = cast(int, ckpt["trainer_seed"]) self.optimizer.load_state_dict(ckpt["optimizer_state_dict"]) # type: ignore if self.lr_scheduler is not None and "scheduler_state" in ckpt: self.lr_scheduler.load_state_dict(ckpt["scheduler_state"]) # type: ignore self.deterministic_seeds() return ckpt @property def step_count(self): return self.training_pipeline.current_stage.steps_taken_in_stage @step_count.setter def step_count(self, val: int) -> None: self.training_pipeline.current_stage.steps_taken_in_stage = val @property def log_interval(self): return ( self.training_pipeline.current_stage.training_settings.metric_accumulate_interval ) @property def approx_steps(self): if self.is_distributed: # the actual number of steps gets synchronized after each rollout return ( self.step_count - self.former_steps ) * self.num_workers + self.former_steps else: return self.step_count # this is actually accurate def act( self, rollout_storage: RolloutStorage, dist_wrapper_class: Optional[type] = None, ): if self.training_pipeline.current_stage.teacher_forcing is not None: assert dist_wrapper_class is None def tracking_callback(type: TrackingInfoType, info: Dict[str, Any], n: int): self.tracking_info_list.append( TrackingInfo( type=type, info=info, n=n, storage_uuid=self.training_pipeline.rollout_storage_uuid, stage_component_uuid=None, ) ) dist_wrapper_class = partial( TeacherForcingDistr, action_space=self.actor_critic.action_space, num_active_samplers=self.num_active_samplers, approx_steps=self.approx_steps, teacher_forcing=self.training_pipeline.current_stage.teacher_forcing, tracking_callback=tracking_callback, ) actions, actor_critic_output, memory, step_observation = super().act( rollout_storage=rollout_storage, dist_wrapper_class=dist_wrapper_class, ) self.step_count += self.num_active_samplers return actions, actor_critic_output, memory, step_observation def advantage_stats(self, advantages: torch.Tensor) -> Dict[str, torch.Tensor]: r"""Computes the mean and variances of advantages (possibly over multiple workers). For multiple workers, this method is equivalent to first collecting all versions of advantages and then computing the mean and variance locally over that. # Parameters advantages: Tensors to compute mean and variance over. Assumed to be solely the worker's local copy of this tensor, the resultant mean and variance will be computed as though _all_ workers' versions of this tensor were concatenated together in distributed training. """ # Step count has already been updated with the steps from all workers global_rollout_steps = self.step_count - self.former_steps if self.is_distributed: summed_advantages = advantages.sum() dist.all_reduce(summed_advantages) mean = summed_advantages / global_rollout_steps summed_squares = (advantages - mean).pow(2).sum() dist.all_reduce(summed_squares) std = (summed_squares / (global_rollout_steps - 1)).sqrt() else: # noinspection PyArgumentList mean, std = advantages.mean(), advantages.std() return {"mean": mean, "std": std} def backprop_step( self, total_loss: torch.Tensor, max_grad_norm: float, local_to_global_batch_size_ratio: float = 1.0, ): self.optimizer.zero_grad() # type: ignore if isinstance(total_loss, torch.Tensor): total_loss.backward() if self.is_distributed: # From https://github.com/pytorch/pytorch/issues/43135 reductions, all_params = [], [] for p in self.actor_critic.parameters(): # you can also organize grads to larger buckets to make all_reduce more efficient if p.requires_grad: if p.grad is None: p.grad = torch.zeros_like(p.data) else: # local_global_batch_size_tuple is not None, since we're distributed: p.grad = p.grad * local_to_global_batch_size_ratio reductions.append( dist.all_reduce( p.grad, async_op=True, ) # sum ) # synchronize all_params.append(p) for reduction, p in zip(reductions, all_params): reduction.wait() if hasattr(self.actor_critic, "compute_total_grad_norm"): total_grad_norm = self.actor_critic.compute_total_grad_norm().item() else: total_grad_norm = 0.0 nn.utils.clip_grad_norm_( self.actor_critic.parameters(), max_norm=max_grad_norm, # type: ignore ) self.optimizer.step() # type: ignore return total_grad_norm def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter( self, pipeline_stage_index: Optional[int] = None ): model_path = None self.deterministic_seeds() if ( self.save_ckpt_at_every_host and self.worker_id == self.first_local_worker_id ) or self.worker_id == 0: model_path = self.checkpoint_save(pipeline_stage_index=pipeline_stage_index) if self.checkpoints_queue is not None: self.checkpoints_queue.put(("eval", model_path)) self.last_save = self.training_pipeline.total_steps return model_path def run_pipeline(self, valid_on_initial_weights: bool = False): cur_stage_training_settings = ( self.training_pipeline.current_stage.training_settings ) # Change engine attributes that depend on the current stage self.training_pipeline.current_stage.change_engine_attributes(self) rollout_storage = self.training_pipeline.rollout_storage uuid_to_storage = self.training_pipeline.current_stage_storage self.initialize_storage_and_viz( storage_to_initialize=cast( List[ExperienceStorage], list(uuid_to_storage.values()) ) ) self.tracking_info_list.clear() self.last_log = self.training_pipeline.total_steps if self.last_save is None: self.last_save = self.training_pipeline.total_steps should_save_checkpoints = ( self.checkpoints_dir != "" and cur_stage_training_settings.save_interval is not None and cur_stage_training_settings.save_interval > 0 ) already_saved_checkpoint = False if ( valid_on_initial_weights and should_save_checkpoints and self.checkpoints_queue is not None ): if ( self.save_ckpt_at_every_host and self.worker_id == self.first_local_worker_id ) or self.worker_id == 0: model_path = self.checkpoint_save() if self.checkpoints_queue is not None: self.checkpoints_queue.put(("eval", model_path)) while True: pipeline_stage_changed = self.training_pipeline.before_rollout( train_metrics=self._last_aggregated_train_task_metrics ) # This is `False` at the very start of training, i.e. pipeline starts with a stage initialized self._last_aggregated_train_task_metrics.reset() training_is_complete = self.training_pipeline.current_stage is None # `training_is_complete` should imply `pipeline_stage_changed` assert pipeline_stage_changed or not training_is_complete # Saving checkpoints and initializing storage when the pipeline stage changes if pipeline_stage_changed: # Here we handle saving a checkpoint after a pipeline stage ends. We # do this: # (1) after every pipeline stage if the `self.save_ckpt_after_every_pipeline_stage` # boolean is True, and # (2) when we have reached the end of ALL training (i.e. all stages are complete). if ( should_save_checkpoints and ( # Might happen if the `save_interval` was hit just previously, see below not already_saved_checkpoint ) and ( self.save_ckpt_after_every_pipeline_stage or training_is_complete ) ): self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter( pipeline_stage_index=( self.training_pipeline.current_stage_index - 1 if not training_is_complete else len(self.training_pipeline.pipeline_stages) - 1 ) ) # If training is complete, break out if training_is_complete: break # Here we handle updating our training settings after a pipeline stage ends. # Update the training settings we're using cur_stage_training_settings = ( self.training_pipeline.current_stage.training_settings ) # If the pipeline stage changed we must initialize any new custom storage and # stop updating any custom storage that is no longer in use (this second bit # is done by simply updating `uuid_to_storage` to the new custom storage objects). new_uuid_to_storage = self.training_pipeline.current_stage_storage storage_to_initialize = [ s for uuid, s in new_uuid_to_storage.items() if uuid not in uuid_to_storage # Don't initialize storage already in use ] self.initialize_storage_and_viz( storage_to_initialize=storage_to_initialize, ) uuid_to_storage = new_uuid_to_storage # Change engine attributes that depend on the current stage self.training_pipeline.current_stage.change_engine_attributes(self) already_saved_checkpoint = False if self.is_distributed: self.num_workers_done.set("done", str(0)) self.num_workers_steps.set("steps", str(0)) # Ensure all workers are done before incrementing num_workers_{steps, done} dist.barrier( device_ids=( None if self.device == torch.device("cpu") else [self.device.index] ) ) self.former_steps = self.step_count former_storage_experiences = { k: v.total_experiences for k, v in self.training_pipeline.current_stage_storage.items() } if self.training_pipeline.rollout_storage_uuid is None: # In this case we're not expecting to collect storage experiences, i.e. everything # will be off-policy. # self.step_count is normally updated by the `self.collect_step_across_all_task_samplers` # call below, but since we're not collecting onpolicy experiences, we need to update # it here. The step count here is now just effectively a count of the number of times # we've called `compute_losses_track_them_and_backprop` below. self.step_count += 1 before_update_info = dict( next_value=None, use_gae=cur_stage_training_settings.use_gae, gamma=cur_stage_training_settings.gamma, tau=cur_stage_training_settings.gae_lambda, adv_stats_callback=self.advantage_stats, ) else: vector_tasks_already_restarted = False step = -1 while step < cur_stage_training_settings.num_steps - 1: step += 1 try: num_paused = self.collect_step_across_all_task_samplers( rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid, uuid_to_storage=uuid_to_storage, ) except (TimeoutError, EOFError) as e: if ( not self.try_restart_after_task_error ) or self.mode != TRAIN_MODE_STR: # Apparently you can just call `raise` here and doing so will just raise the exception as though # it was not caught (so the stacktrace isn't messed up) raise elif vector_tasks_already_restarted: raise RuntimeError( f"[{self.mode} worker {self.worker_id}] `vector_tasks` has timed out twice in the same" f" rollout. This suggests that this error was not recoverable. Timeout exception:\n{traceback.format_exc()}" ) else: get_logger().warning( f"[{self.mode} worker {self.worker_id}] `vector_tasks` appears to have crashed during" f" training due to an {type(e).__name__} error. You have set" f" `try_restart_after_task_error` to `True` so we will attempt to restart these tasks from" f" the beginning. USE THIS FEATURE AT YOUR OWN" f" RISK. Exception:\n{traceback.format_exc()}." ) self.vector_tasks.close() self._vector_tasks = None vector_tasks_already_restarted = True for ( storage ) in self.training_pipeline.current_stage_storage.values(): storage.after_updates() self.initialize_storage_and_viz( storage_to_initialize=cast( List[ExperienceStorage], list(uuid_to_storage.values()), ) ) step = -1 continue # A more informative error message should already have been thrown in be given in # `collect_step_across_all_task_samplers` if `num_paused != 0` here but this serves # as a sanity check. assert num_paused == 0 if self.is_distributed: # Preempt stragglers # Each worker will stop collecting steps for the current rollout whenever a # 100 * distributed_preemption_threshold percentage of workers are finished collecting their # rollout steps, and we have collected at least 25% but less than 90% of the steps. num_done = int(self.num_workers_done.get("done")) if ( num_done > self.distributed_preemption_threshold * self.num_workers and 0.25 * cur_stage_training_settings.num_steps <= step < 0.9 * cur_stage_training_settings.num_steps ): get_logger().debug( f"[{self.mode} worker {self.worker_id}] Preempted after {step}" f" steps (out of {cur_stage_training_settings.num_steps})" f" with {num_done} workers done" ) break with torch.no_grad(): actor_critic_output, _ = self.actor_critic( **rollout_storage.agent_input_for_next_step() ) self.training_pipeline.rollout_count += 1 if self.is_distributed: # Mark that a worker is done collecting experience self.num_workers_done.add("done", 1) self.num_workers_steps.add( "steps", self.step_count - self.former_steps ) # Ensure all workers are done before updating step counter dist.barrier( device_ids=( None if self.device == torch.device("cpu") else [self.device.index] ) ) ndone = int(self.num_workers_done.get("done")) assert ( ndone == self.num_workers ), f"# workers done {ndone} != # workers {self.num_workers}" # get the actual step_count self.step_count = ( int(self.num_workers_steps.get("steps")) + self.former_steps ) before_update_info = dict( next_value=actor_critic_output.values.detach(), use_gae=cur_stage_training_settings.use_gae, gamma=cur_stage_training_settings.gamma, tau=cur_stage_training_settings.gae_lambda, adv_stats_callback=self.advantage_stats, ) # Prepare storage for iteration during updates for storage in self.training_pipeline.current_stage_storage.values(): storage.before_updates(**before_update_info) for sc in self.training_pipeline.current_stage.stage_components: component_storage = uuid_to_storage[sc.storage_uuid] self.compute_losses_track_them_and_backprop( stage=self.training_pipeline.current_stage, stage_component=sc, storage=component_storage, ) for storage in self.training_pipeline.current_stage_storage.values(): storage.after_updates() # We update the storage step counts saved in # `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` here rather than with # `self.steps` above because some storage step counts may only change after the update calls above. # This may seem a bit weird but consider a storage that corresponds to a fixed dataset # used for imitation learning. For such a dataset, the "steps" will only increase as # new batches are sampled during update calls. # Note: We don't need to sort the keys below to ensure that distributed updates happen correctly # as `self.training_pipeline.current_stage_storage` is an ordered `dict`. # First we calculate the change in counts (possibly aggregating across devices) change_in_storage_experiences = {} for k in sorted(self.training_pipeline.current_stage_storage.keys()): delta = ( self.training_pipeline.current_stage_storage[k].total_experiences - former_storage_experiences[k] ) assert delta >= 0 change_in_storage_experiences[k] = self.distributed_weighted_sum( to_share=delta, weight=1 ) # Then we update `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` with the above # computed changes. for storage_uuid, delta in change_in_storage_experiences.items(): self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage[ storage_uuid ] += delta if self.lr_scheduler is not None: self.lr_scheduler.step(epoch=self.training_pipeline.total_steps) # Here we handle saving a checkpoint every `save_interval` steps, saving after # a pipeline stage completes is controlled above checkpoint_file_name = None if should_save_checkpoints and ( self.training_pipeline.total_steps - self.last_save >= cur_stage_training_settings.save_interval ): checkpoint_file_name = ( self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter() ) already_saved_checkpoint = True if ( self.training_pipeline.total_steps - self.last_log >= self.log_interval or self.training_pipeline.current_stage.is_complete ): self.aggregate_and_send_logging_package( tracking_info_list=self.tracking_info_list, checkpoint_file_name=checkpoint_file_name, ) self.tracking_info_list.clear() self.last_log = self.training_pipeline.total_steps if ( cur_stage_training_settings.advance_scene_rollout_period is not None ) and ( self.training_pipeline.rollout_count % cur_stage_training_settings.advance_scene_rollout_period == 0 ): get_logger().info( f"[{self.mode} worker {self.worker_id}] Force advance" f" tasks with {self.training_pipeline.rollout_count} rollouts" ) self.vector_tasks.next_task(force_advance_scene=True) self.initialize_storage_and_viz( storage_to_initialize=cast( List[ExperienceStorage], list(uuid_to_storage.values()) ) ) def train( self, checkpoint_file_name: Optional[str] = None, restart_pipeline: bool = False, valid_on_initial_weights: bool = False, ): assert ( self.mode == TRAIN_MODE_STR ), "train only to be called from a train instance" training_completed_successfully = False # noinspection PyBroadException try: if checkpoint_file_name is not None: self.checkpoint_load(checkpoint_file_name, restart_pipeline) self.run_pipeline(valid_on_initial_weights=valid_on_initial_weights) training_completed_successfully = True except KeyboardInterrupt: get_logger().info( f"[{self.mode} worker {self.worker_id}] KeyboardInterrupt, exiting." ) except Exception as e: get_logger().error( f"[{self.mode} worker {self.worker_id}] Encountered {type(e).__name__}, exiting." ) get_logger().error(traceback.format_exc()) finally: if training_completed_successfully: if self.worker_id == 0: self.results_queue.put(("train_stopped", 0)) get_logger().info( f"[{self.mode} worker {self.worker_id}] Training finished successfully." ) else: self.results_queue.put(("train_stopped", 1 + self.worker_id)) self.close() class OnPolicyInference(OnPolicyRLEngine): def __init__( self, config: ExperimentConfig, results_queue: mp.Queue, # to output aggregated results checkpoints_queue: mp.Queue, # to write/read (trainer/evaluator) ready checkpoints checkpoints_dir: str = "", mode: str = "valid", # or "test" seed: Optional[int] = None, deterministic_cudnn: bool = False, mp_ctx: Optional[BaseContext] = None, device: Union[str, torch.device, int] = "cpu", deterministic_agents: bool = False, worker_id: int = 0, num_workers: int = 1, distributed_port: int = 0, enforce_expert: bool = False, **kwargs, ): super().__init__( experiment_name="", config=config, results_queue=results_queue, checkpoints_queue=checkpoints_queue, checkpoints_dir=checkpoints_dir, mode=mode, seed=seed, deterministic_cudnn=deterministic_cudnn, mp_ctx=mp_ctx, deterministic_agents=deterministic_agents, device=device, worker_id=worker_id, num_workers=num_workers, distributed_port=distributed_port, **kwargs, ) self.enforce_expert = enforce_expert def run_eval( self, checkpoint_file_path: str, rollout_steps: int = 100, visualizer: Optional[VizSuite] = None, update_secs: float = 20.0, verbose: bool = False, ) -> LoggingPackage: assert self.actor_critic is not None, "called `run_eval` with no actor_critic" # Sanity check that we haven't entered an invalid state. During eval the training_pipeline # should be only set in this function and always unset at the end of it. assert self.training_pipeline is None, ( "`training_pipeline` should be `None` before calling `run_eval`." " This is necessary as we want to initialize new storages." ) self.training_pipeline = self.config.training_pipeline() ckpt = self.checkpoint_load(checkpoint_file_path, restart_pipeline=False) total_steps = cast(int, ckpt["total_steps"]) eval_pipeline_stage = cast( PipelineStage, getattr(self.training_pipeline, f"{self.mode}_pipeline_stage"), ) assert ( len(eval_pipeline_stage.stage_components) <= 1 ), "Only one StageComponent is supported during inference." uuid_to_storage = self.training_pipeline.get_stage_storage(eval_pipeline_stage) assert len(uuid_to_storage) > 0, ( "No storage found for eval pipeline stage, this is a bug in AllenAct," " please submit an issue on GitHub (https://github.com/allenai/allenact/issues)." ) uuid_to_rollout_storage = { uuid: storage for uuid, storage in uuid_to_storage.items() if isinstance(storage, RolloutStorage) } uuid_to_non_rollout_storage = { uuid: storage for uuid, storage in uuid_to_storage.items() if not isinstance(storage, RolloutStorage) } if len(uuid_to_rollout_storage) > 1 or len(uuid_to_non_rollout_storage) > 1: raise NotImplementedError( "Only one RolloutStorage and non-RolloutStorage object is allowed within an evaluation pipeline stage." " If you'd like to evaluate against multiple storages please" " submit an issue on GitHub (https://github.com/allenai/allenact/issues). For the moment you'll need" " to evaluate against these storages separately." ) rollout_storage = self.training_pipeline.rollout_storage if visualizer is not None: assert visualizer.empty() num_paused = self.initialize_storage_and_viz( storage_to_initialize=cast( List[ExperienceStorage], list(uuid_to_storage.values()) ), visualizer=visualizer, ) assert num_paused == 0, f"{num_paused} tasks paused when initializing eval" if rollout_storage is not None: num_tasks = sum( self.vector_tasks.command( "sampler_attr", ["length"] * self.num_active_samplers ) ) + ( # We need to add this as the first tasks have already been sampled self.num_active_samplers ) else: num_tasks = 0 # get_logger().debug("worker {self.worker_id} number of tasks {num_tasks}") steps = 0 self.actor_critic.eval() last_time: float = time.time() init_time: float = last_time frames: int = 0 if verbose: get_logger().info( f"[{self.mode} worker {self.worker_id}] Running evaluation on {num_tasks} tasks" f" for ckpt {checkpoint_file_path}" ) if self.enforce_expert: dist_wrapper_class = partial( TeacherForcingDistr, action_space=self.actor_critic.action_space, num_active_samplers=None, approx_steps=None, teacher_forcing=None, tracking_callback=None, always_enforce=True, ) else: dist_wrapper_class = None logging_pkg = LoggingPackage( mode=self.mode, training_steps=total_steps, storage_uuid_to_total_experiences=self.training_pipeline.storage_uuid_to_total_experiences, ) should_compute_onpolicy_losses = ( len(eval_pipeline_stage.loss_names) > 0 and eval_pipeline_stage.stage_components[0].storage_uuid == self.training_pipeline.rollout_storage_uuid ) while self.num_active_samplers > 0: frames += self.num_active_samplers num_newly_paused = self.collect_step_across_all_task_samplers( rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid, uuid_to_storage=uuid_to_rollout_storage, visualizer=visualizer, dist_wrapper_class=dist_wrapper_class, ) steps += 1 if should_compute_onpolicy_losses and num_newly_paused > 0: # The `collect_step_across_all_task_samplers` method will automatically drop # parts of the rollout storage that correspond to paused tasks (namely by calling" # `rollout_storage.sampler_select(UNPAUSED_TASK_INDS)`). This makes sense when you don't need to # compute losses for tasks but is a bit limiting here as we're throwing away data before # using it to compute losses. As changing this is non-trivial we'll just warn the user # for now. get_logger().warning( f"[{self.mode} worker {self.worker_id}] {num_newly_paused * rollout_storage.step} steps" f" will be dropped when computing losses in evaluation. This is a limitation of the current" f" implementation of rollout collection in AllenAct. If you'd like to see this" f" functionality improved please submit an issue on GitHub" f" (https://github.com/allenai/allenact/issues)." ) if self.num_active_samplers == 0 or steps % rollout_steps == 0: if should_compute_onpolicy_losses and self.num_active_samplers > 0: with torch.no_grad(): actor_critic_output, _ = self.actor_critic( **rollout_storage.agent_input_for_next_step() ) before_update_info = dict( next_value=actor_critic_output.values.detach(), use_gae=eval_pipeline_stage.training_settings.use_gae, gamma=eval_pipeline_stage.training_settings.gamma, tau=eval_pipeline_stage.training_settings.gae_lambda, adv_stats_callback=lambda advantages: { "mean": advantages.mean(), "std": advantages.std(), }, ) # Prepare storage for iteration during loss computation for storage in uuid_to_rollout_storage.values(): storage.before_updates(**before_update_info) # Compute losses with torch.no_grad(): for sc in eval_pipeline_stage.stage_components: self.compute_losses_track_them_and_backprop( stage=eval_pipeline_stage, stage_component=sc, storage=uuid_to_rollout_storage[sc.storage_uuid], skip_backprop=True, ) for storage in uuid_to_rollout_storage.values(): storage.after_updates() cur_time = time.time() if self.num_active_samplers == 0 or cur_time - last_time >= update_secs: logging_pkg = self.aggregate_and_send_logging_package( tracking_info_list=self.tracking_info_list, logging_pkg=logging_pkg, send_logging_package=False, ) self.tracking_info_list.clear() if verbose: npending: int lengths: List[int] if self.num_active_samplers > 0: lengths = self.vector_tasks.command( "sampler_attr", ["length"] * self.num_active_samplers, ) npending = sum(lengths) else: lengths = [] npending = 0 est_time_to_complete = ( "{:.2f}".format( ( (cur_time - init_time) * (npending / (num_tasks - npending)) / 60 ) ) if npending != num_tasks else "???" ) get_logger().info( f"[{self.mode} worker {self.worker_id}]" f" For ckpt {checkpoint_file_path}" f" {frames / (cur_time - init_time):.1f} fps," f" {npending}/{num_tasks} tasks pending ({lengths})." f" ~{est_time_to_complete} min. to complete." ) if logging_pkg.num_non_empty_metrics_dicts_added != 0: get_logger().info( ", ".join( [ f"[{self.mode} worker {self.worker_id}]" f" num_{self.mode}_tasks_complete {logging_pkg.num_non_empty_metrics_dicts_added}", *[ f"{k} {v:.3g}" for k, v in logging_pkg.metrics_tracker.means().items() ], *[ f"{k0[1]}/{k1} {v1:.3g}" for k0, v0 in logging_pkg.info_trackers.items() for k1, v1 in v0.means().items() ], ] ) ) last_time = cur_time get_logger().info( f"[{self.mode} worker {self.worker_id}] Task evaluation complete, all task samplers paused." ) if rollout_storage is not None: self.vector_tasks.resume_all() self.vector_tasks.set_seeds(self.worker_seeds(self.num_samplers, self.seed)) self.vector_tasks.reset_all() logging_pkg = self.aggregate_and_send_logging_package( tracking_info_list=self.tracking_info_list, logging_pkg=logging_pkg, send_logging_package=False, ) self.tracking_info_list.clear() logging_pkg.viz_data = ( visualizer.read_and_reset() if visualizer is not None else None ) should_compute_offpolicy_losses = ( len(eval_pipeline_stage.loss_names) > 0 and not should_compute_onpolicy_losses ) if should_compute_offpolicy_losses: # In this case we are evaluating a non-rollout storage, e.g. some off-policy data get_logger().info( f"[{self.mode} worker {self.worker_id}] Non-rollout storage detected, will now compute losses" f" using this storage." ) offpolicy_eval_done = False while not offpolicy_eval_done: before_update_info = dict( next_value=None, use_gae=eval_pipeline_stage.training_settings.use_gae, gamma=eval_pipeline_stage.training_settings.gamma, tau=eval_pipeline_stage.training_settings.gae_lambda, adv_stats_callback=lambda advantages: { "mean": advantages.mean(), "std": advantages.std(), }, ) # Prepare storage for iteration during loss computation for storage in uuid_to_non_rollout_storage.values(): storage.before_updates(**before_update_info) # Compute losses assert len(eval_pipeline_stage.stage_components) == 1 try: for sc in eval_pipeline_stage.stage_components: with torch.no_grad(): self.compute_losses_track_them_and_backprop( stage=eval_pipeline_stage, stage_component=sc, storage=uuid_to_non_rollout_storage[sc.storage_uuid], skip_backprop=True, ) except EOFError: offpolicy_eval_done = True for storage in uuid_to_non_rollout_storage.values(): storage.after_updates() total_bsize = sum( tif.info.get("worker_batch_size", 0) for tif in self.tracking_info_list ) logging_pkg = self.aggregate_and_send_logging_package( tracking_info_list=self.tracking_info_list, logging_pkg=logging_pkg, send_logging_package=False, ) self.tracking_info_list.clear() cur_time = time.time() if verbose and (cur_time - last_time >= update_secs): get_logger().info( f"[{self.mode} worker {self.worker_id}]" f" For ckpt {checkpoint_file_path}" f" {total_bsize / (cur_time - init_time):.1f} its/sec." ) if logging_pkg.info_trackers != 0: get_logger().info( ", ".join( [ f"[{self.mode} worker {self.worker_id}]" f" num_{self.mode}_iters_complete {total_bsize}", *[ f"{'/'.join(k0)}/{k1} {v1:.3g}" for k0, v0 in logging_pkg.info_trackers.items() for k1, v1 in v0.means().items() ], ] ) ) last_time = cur_time # Call after_updates here to reset all storages for storage in uuid_to_storage.values(): storage.after_updates() # Set the training pipeline to `None` so that the storages do not # persist across calls to `run_eval` self.training_pipeline = None logging_pkg.checkpoint_file_name = checkpoint_file_path return logging_pkg @staticmethod def skip_to_latest(checkpoints_queue: mp.Queue, command: Optional[str], data): assert ( checkpoints_queue is not None ), "Attempting to process checkpoints queue but this queue is `None`." cond = True while cond: sentinel = ("skip.AUTO.sentinel", time.time()) checkpoints_queue.put( sentinel ) # valid since a single valid process is the only consumer forwarded = False while not forwarded: new_command: Optional[str] new_data: Any ( new_command, new_data, ) = checkpoints_queue.get() # block until next command arrives if new_command == command: data = new_data elif new_command == sentinel[0]: assert ( new_data == sentinel[1] ), f"Wrong sentinel found: {new_data} vs {sentinel[1]}" forwarded = True else: raise ValueError( f"Unexpected command {new_command} with data {new_data}" ) time.sleep(1) cond = not checkpoints_queue.empty() return data def process_checkpoints(self): assert ( self.mode != TRAIN_MODE_STR ), "process_checkpoints only to be called from a valid or test instance" assert ( self.checkpoints_queue is not None ), "Attempting to process checkpoints queue but this queue is `None`." visualizer: Optional[VizSuite] = None finalized = False # noinspection PyBroadException try: while True: command: Optional[str] ckp_file_path: Any ( command, ckp_file_path, ) = self.checkpoints_queue.get() # block until first command arrives # get_logger().debug( # "{} {} command {} data {}".format( # self.mode, self.worker_id, command, data # ) # ) if command == "eval": if self.mode == VALID_MODE_STR: # skip to latest using # 1. there's only consumer in valid # 2. there's no quit/exit/close message issued by runner nor trainer ckp_file_path = self.skip_to_latest( checkpoints_queue=self.checkpoints_queue, command=command, data=ckp_file_path, ) if ( visualizer is None and self.machine_params.visualizer is not None ): visualizer = self.machine_params.visualizer eval_package = self.run_eval( checkpoint_file_path=ckp_file_path, visualizer=visualizer, verbose=True, update_secs=20 if self.mode == TEST_MODE_STR else 5 * 60, ) self.results_queue.put(eval_package) if self.is_distributed: dist.barrier() elif command in ["quit", "exit", "close"]: finalized = True break else: raise NotImplementedError() except KeyboardInterrupt: get_logger().info( f"[{self.mode} worker {self.worker_id}] KeyboardInterrupt, exiting." ) except Exception as e: get_logger().error( f"[{self.mode} worker {self.worker_id}] Encountered {type(e).__name__}, exiting." ) get_logger().error(traceback.format_exc()) finally: if finalized: if self.mode == TEST_MODE_STR: self.results_queue.put(("test_stopped", 0)) get_logger().info( f"[{self.mode} worker {self.worker_id}] Complete, all checkpoints processed." ) else: if self.mode == TEST_MODE_STR: self.results_queue.put(("test_stopped", self.worker_id + 1)) self.close(verbose=self.mode == TEST_MODE_STR) ================================================ FILE: allenact/algorithms/onpolicy_sync/losses/__init__.py ================================================ from .a2cacktr import A2C, ACKTR, A2CACKTR from .ppo import PPO ================================================ FILE: allenact/algorithms/onpolicy_sync/losses/a2cacktr.py ================================================ """Implementation of A2C and ACKTR losses.""" from typing import cast, Tuple, Dict, Optional import torch from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ObservationType, ) from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput from allenact.utils.system import get_logger class A2CACKTR(AbstractActorCriticLoss): """Class implementing A2C and ACKTR losses. # Attributes acktr : `True` if should use ACKTR loss (currently not supported), otherwise uses A2C loss. value_loss_coef : Weight of value loss. entropy_coef : Weight of entropy (encouraging) loss. entropy_method_name : Name of Distr's entropy method name. Default is `entropy`, but we might use `conditional_entropy` for `SequentialDistr`. """ def __init__( self, value_loss_coef, entropy_coef, acktr=False, entropy_method_name: str = "entropy", *args, **kwargs, ): """Initializer. See class documentation for parameter definitions. """ super().__init__(*args, **kwargs) self.acktr = acktr self.loss_key = "a2c_total" if not acktr else "aktr_total" self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.entropy_method_name = entropy_method_name def loss_per_step( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], ) -> Dict[str, Tuple[torch.Tensor, Optional[float]]]: actions = cast(torch.LongTensor, batch["actions"]) values = actor_critic_output.values action_log_probs = actor_critic_output.distributions.log_prob(actions) action_log_probs = action_log_probs.view( action_log_probs.shape + (1,) * ( len(cast(torch.Tensor, batch["adv_targ"]).shape) - len(action_log_probs.shape) ) ) dist_entropy: torch.FloatTensor = getattr( actor_critic_output.distributions, self.entropy_method_name )() dist_entropy = dist_entropy.view( dist_entropy.shape + ((1,) * (len(action_log_probs.shape) - len(dist_entropy.shape))) ) value_loss = 0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow(2) # TODO: Decided not to use normalized advantages here, # is this correct? (it's how it's done in Kostrikov's) action_loss = -( cast(torch.FloatTensor, batch["adv_targ"]).detach() * action_log_probs ) if self.acktr: # TODO: Currently acktr doesn't really work because of this natural gradient stuff # that we should figure out how to integrate properly. get_logger().warning("acktr is only partially supported.") return { "value": (value_loss, self.value_loss_coef), "action": (action_loss, None), "entropy": (dist_entropy.mul_(-1.0), self.entropy_coef), # type: ignore } def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], *args, **kwargs, ): losses_per_step = self.loss_per_step( step_count=step_count, batch=batch, actor_critic_output=actor_critic_output, ) losses = { key: (loss.mean(), weight) for (key, (loss, weight)) in losses_per_step.items() } total_loss = cast( torch.Tensor, sum( loss * weight if weight is not None else loss for loss, weight in losses.values() ), ) return ( total_loss, { self.loss_key: total_loss.item(), **{key: loss.item() for key, (loss, _) in losses.items()}, }, ) class A2C(A2CACKTR): """A2C Loss.""" def __init__( self, value_loss_coef, entropy_coef, entropy_method_name: str = "entropy", *args, **kwargs, ): super().__init__( value_loss_coef=value_loss_coef, entropy_coef=entropy_coef, acktr=False, entropy_method_name=entropy_method_name, *args, **kwargs, ) class ACKTR(A2CACKTR): """ACKTR Loss. This code is not supported as it currently lacks an implementation for recurrent models. """ def __init__( self, value_loss_coef, entropy_coef, entropy_method_name: str = "entropy", *args, **kwargs, ): super().__init__( value_loss_coef=value_loss_coef, entropy_coef=entropy_coef, acktr=True, entropy_method_name=entropy_method_name, *args, **kwargs, ) A2CConfig = dict( value_loss_coef=0.5, entropy_coef=0.01, ) ================================================ FILE: allenact/algorithms/onpolicy_sync/losses/abstract_loss.py ================================================ """Defining abstract loss classes for actor critic models.""" import abc from typing import Dict, Tuple, Union import torch from allenact.algorithms.onpolicy_sync.policy import ObservationType from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import Loss, ActorCriticOutput class AbstractActorCriticLoss(Loss): """Abstract class representing a loss function used to train an ActorCriticModel.""" # noinspection PyMethodOverriding @abc.abstractmethod def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], *args, **kwargs, ) -> Union[ Tuple[torch.FloatTensor, Dict[str, float]], Tuple[torch.FloatTensor, Dict[str, float], Dict[str, float]], ]: """Computes the loss. # Parameters batch : A batch of data corresponding to the information collected when rolling out (possibly many) agents over a fixed number of steps. In particular this batch should have the same format as that returned by `RolloutStorage.batched_experience_generator`. actor_critic_output : The output of calling an ActorCriticModel on the observations in `batch`. args : Extra args. kwargs : Extra kwargs. # Returns A (0-dimensional) torch.FloatTensor corresponding to the computed loss. `.backward()` will be called on this tensor in order to compute a gradient update to the ActorCriticModel's parameters. A Dict[str, float] with scalar values corresponding to sub-losses. An optional Dict[str, float] with scalar values corresponding to extra info to be processed per epoch and combined across epochs by the engine. """ # TODO: The above documentation is missing what the batch dimensions are. raise NotImplementedError() ================================================ FILE: allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.py ================================================ import functools from typing import Dict, cast, Sequence, Set import torch from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ) from allenact.algorithms.onpolicy_sync.policy import ObservationType from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput class GroupedActionImitation(AbstractActorCriticLoss): def __init__( self, nactions: int, action_groups: Sequence[Set[int]], *args, **kwargs ): super().__init__(*args, **kwargs) assert ( sum(len(ag) for ag in action_groups) == nactions and len(functools.reduce(lambda x, y: x | y, action_groups)) == nactions ), f"`action_groups` (==`{action_groups}`) must be a partition of `[0, 1, 2, ..., nactions - 1]`" self.nactions = nactions self.action_groups_mask = torch.FloatTensor( [ [i in action_group for i in range(nactions)] for action_group in action_groups ] + [[1] * nactions] # type:ignore ) def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], *args, **kwargs, ): observations = cast(Dict[str, torch.Tensor], batch["observations"]) assert "expert_group_action" in observations expert_group_actions = observations["expert_group_action"] # expert_group_actions = expert_group_actions + (expert_group_actions == -1).long() * ( # 1 + self.action_groups_mask.shape[0] # ) if self.action_groups_mask.get_device() != expert_group_actions.get_device(): self.action_groups_mask = cast( torch.FloatTensor, self.action_groups_mask.cuda(expert_group_actions.get_device()), ) expert_group_actions_reshaped = expert_group_actions.view(-1, 1) expert_group_actions_mask = self.action_groups_mask[ expert_group_actions_reshaped ] probs_tensor = actor_critic_output.distributions.probs_tensor expert_group_actions_mask = expert_group_actions_mask.view(probs_tensor.shape) total_loss = -( torch.log((probs_tensor * expert_group_actions_mask).sum(-1)) ).mean() return total_loss, { "grouped_action_cross_entropy": total_loss.item(), } ================================================ FILE: allenact/algorithms/onpolicy_sync/losses/imitation.py ================================================ """Defining imitation losses for actor critic type models.""" from collections import OrderedDict from typing import Dict, cast, Optional, Union import torch import allenact.utils.spaces_utils as su from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ObservationType, ) from allenact.base_abstractions.distributions import ( Distr, CategoricalDistr, SequentialDistr, ConditionalDistr, ) from allenact.base_abstractions.misc import ActorCriticOutput from allenact.base_abstractions.sensor import AbstractExpertSensor class Imitation(AbstractActorCriticLoss): """Expert imitation loss.""" def __init__( self, expert_sensor: Optional[AbstractExpertSensor] = None, *args, **kwargs ): super().__init__(*args, **kwargs) self.expert_sensor = expert_sensor @staticmethod def group_loss( distribution: Union[CategoricalDistr, ConditionalDistr], expert_actions: torch.Tensor, expert_actions_masks: torch.Tensor, ): assert isinstance(distribution, CategoricalDistr) or ( isinstance(distribution, ConditionalDistr) and isinstance(distribution.distr, CategoricalDistr) ), "This implementation only supports (groups of) `CategoricalDistr`" expert_successes = expert_actions_masks.sum() log_probs = distribution.log_prob(cast(torch.LongTensor, expert_actions)) assert ( log_probs.shape[: len(expert_actions_masks.shape)] == expert_actions_masks.shape ) # Add dimensions to `expert_actions_masks` on the right to allow for masking # if necessary. len_diff = len(log_probs.shape) - len(expert_actions_masks.shape) assert len_diff >= 0 expert_actions_masks = expert_actions_masks.view( *expert_actions_masks.shape, *((1,) * len_diff) ) group_loss = -(expert_actions_masks * log_probs).sum() / torch.clamp( expert_successes, min=1 ) return group_loss, expert_successes def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[Distr], *args, **kwargs, ): """Computes the imitation loss. # Parameters batch : A batch of data corresponding to the information collected when rolling out (possibly many) agents over a fixed number of steps. In particular this batch should have the same format as that returned by `RolloutStorage.batched_experience_generator`. Here `batch["observations"]` must contain `"expert_action"` observations or `"expert_policy"` observations. See `ExpertActionSensor` (or `ExpertPolicySensor`) for an example of a sensor producing such observations. actor_critic_output : The output of calling an ActorCriticModel on the observations in `batch`. args : Extra args. Ignored. kwargs : Extra kwargs. Ignored. # Returns A (0-dimensional) torch.FloatTensor corresponding to the computed loss. `.backward()` will be called on this tensor in order to compute a gradient update to the ActorCriticModel's parameters. """ observations = cast(Dict[str, torch.Tensor], batch["observations"]) losses = OrderedDict() should_report_loss = False if "expert_action" in observations: if self.expert_sensor is None or not self.expert_sensor.use_groups: expert_actions_and_mask = observations["expert_action"] assert expert_actions_and_mask.shape[-1] == 2 expert_actions_and_mask_reshaped = expert_actions_and_mask.view(-1, 2) expert_actions = expert_actions_and_mask_reshaped[:, 0].view( *expert_actions_and_mask.shape[:-1], 1 ) expert_actions_masks = ( expert_actions_and_mask_reshaped[:, 1] .float() .view(*expert_actions_and_mask.shape[:-1], 1) ) total_loss, expert_successes = self.group_loss( cast(CategoricalDistr, actor_critic_output.distributions), expert_actions, expert_actions_masks, ) should_report_loss = expert_successes.item() != 0 else: expert_actions = su.unflatten( self.expert_sensor.observation_space, observations["expert_action"] ) total_loss = 0 ready_actions = OrderedDict() for group_name, cd in zip( self.expert_sensor.group_spaces, cast( SequentialDistr, actor_critic_output.distributions ).conditional_distrs, ): assert group_name == cd.action_group_name cd.reset() cd.condition_on_input(**ready_actions) expert_action = expert_actions[group_name][ AbstractExpertSensor.ACTION_POLICY_LABEL ] expert_action_masks = expert_actions[group_name][ AbstractExpertSensor.EXPERT_SUCCESS_LABEL ] ready_actions[group_name] = expert_action current_loss, expert_successes = self.group_loss( cd, expert_action, expert_action_masks, ) should_report_loss = ( expert_successes.item() != 0 or should_report_loss ) cd.reset() if expert_successes.item() != 0: losses[group_name + "_cross_entropy"] = current_loss.item() total_loss = total_loss + current_loss elif "expert_policy" in observations: if self.expert_sensor is None or not self.expert_sensor.use_groups: assert isinstance( actor_critic_output.distributions, CategoricalDistr ), "This implementation currently only supports `CategoricalDistr`" expert_policies = cast(Dict[str, torch.Tensor], batch["observations"])[ "expert_policy" ][..., :-1] expert_actions_masks = cast( Dict[str, torch.Tensor], batch["observations"] )["expert_policy"][..., -1:] expert_successes = expert_actions_masks.sum() if expert_successes.item() > 0: should_report_loss = True log_probs = cast( CategoricalDistr, actor_critic_output.distributions ).log_probs_tensor # Add dimensions to `expert_actions_masks` on the right to allow for masking # if necessary. len_diff = len(log_probs.shape) - len(expert_actions_masks.shape) assert len_diff >= 0 expert_actions_masks = expert_actions_masks.view( *expert_actions_masks.shape, *((1,) * len_diff) ) total_loss = ( -(log_probs * expert_policies) * expert_actions_masks ).sum() / torch.clamp(expert_successes, min=1) else: raise NotImplementedError( "This implementation currently only supports `CategoricalDistr`" ) else: raise NotImplementedError( "Imitation loss requires either `expert_action` or `expert_policy`" " sensor to be active." ) return ( total_loss, ( {"expert_cross_entropy": total_loss.item(), **losses} if should_report_loss else {} ), ) ================================================ FILE: allenact/algorithms/onpolicy_sync/losses/ppo.py ================================================ """Defining the PPO loss for actor critic type models.""" from typing import Dict, Optional, Callable, cast, Tuple import torch from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ObservationType, ) from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput class PPO(AbstractActorCriticLoss): """Implementation of the Proximal Policy Optimization loss. # Attributes clip_param : The clipping parameter to use. value_loss_coef : Weight of the value loss. entropy_coef : Weight of the entropy (encouraging) loss. use_clipped_value_loss : Whether or not to also clip the value loss. clip_decay : Callable for clip param decay factor (function of the current number of steps) entropy_method_name : Name of Distr's entropy method name. Default is `entropy`, but we might use `conditional_entropy` for `SequentialDistr` show_ratios : If True, adds tracking for the PPO ratio (linear, clamped, and used) in each epoch to be logged by the engine. normalize_advantage: Whether or not to use normalized advantage. Default is True. """ def __init__( self, clip_param: float, value_loss_coef: float, entropy_coef: float, use_clipped_value_loss=True, clip_decay: Optional[Callable[[int], float]] = None, entropy_method_name: str = "entropy", normalize_advantage: bool = True, show_ratios: bool = False, *args, **kwargs ): """Initializer. See the class documentation for parameter definitions. """ super().__init__(*args, **kwargs) self.clip_param = clip_param self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.use_clipped_value_loss = use_clipped_value_loss self.clip_decay = clip_decay if clip_decay is not None else (lambda x: 1.0) self.entropy_method_name = entropy_method_name self.show_ratios = show_ratios if normalize_advantage: self.adv_key = "norm_adv_targ" else: self.adv_key = "adv_targ" def loss_per_step( self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], ) -> Tuple[ Dict[str, Tuple[torch.Tensor, Optional[float]]], Dict[str, torch.Tensor] ]: # TODO tuple output actions = cast(torch.LongTensor, batch["actions"]) values = actor_critic_output.values action_log_probs = actor_critic_output.distributions.log_prob(actions) dist_entropy: torch.FloatTensor = getattr( actor_critic_output.distributions, self.entropy_method_name )() def add_trailing_dims(t: torch.Tensor): assert len(t.shape) <= len(batch[self.adv_key].shape) return t.view( t.shape + ((1,) * (len(batch[self.adv_key].shape) - len(t.shape))) ) dist_entropy = add_trailing_dims(dist_entropy) clip_param = self.clip_param * self.clip_decay(step_count) ratio = torch.exp(action_log_probs - batch["old_action_log_probs"]) ratio = add_trailing_dims(ratio) clamped_ratio = torch.clamp(ratio, 1.0 - clip_param, 1.0 + clip_param) surr1 = ratio * batch[self.adv_key] surr2 = clamped_ratio * batch[self.adv_key] use_clamped = surr2 < surr1 action_loss = -torch.where(cast(torch.Tensor, use_clamped), surr2, surr1) if self.use_clipped_value_loss: value_pred_clipped = batch["values"] + (values - batch["values"]).clamp( -clip_param, clip_param ) value_losses = (values - batch["returns"]).pow(2) value_losses_clipped = (value_pred_clipped - batch["returns"]).pow(2) value_loss = 0.5 * torch.max(value_losses, value_losses_clipped) else: value_loss = 0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow( 2 ) # noinspection PyUnresolvedReferences return ( { "value": (value_loss, self.value_loss_coef), "action": (action_loss, None), "entropy": (dist_entropy.mul_(-1.0), self.entropy_coef), # type: ignore }, ( { "ratio": ratio, "ratio_clamped": clamped_ratio, "ratio_used": torch.where( cast(torch.Tensor, use_clamped), clamped_ratio, ratio ), } if self.show_ratios else {} ), ) def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], *args, **kwargs ): losses_per_step, ratio_info = self.loss_per_step( step_count=step_count, batch=batch, actor_critic_output=actor_critic_output, ) losses = { key: (loss.mean(), weight) for (key, (loss, weight)) in losses_per_step.items() } total_loss = sum( loss * weight if weight is not None else loss for loss, weight in losses.values() ) result = ( total_loss, { "ppo_total": cast(torch.Tensor, total_loss).item(), **{key: loss.item() for key, (loss, _) in losses.items()}, }, {key: float(value.mean().item()) for key, value in ratio_info.items()}, ) return result if self.show_ratios else result[:2] class PPOValue(AbstractActorCriticLoss): """Implementation of the Proximal Policy Optimization loss. # Attributes clip_param : The clipping parameter to use. use_clipped_value_loss : Whether or not to also clip the value loss. """ def __init__( self, clip_param: float, use_clipped_value_loss=True, clip_decay: Optional[Callable[[int], float]] = None, *args, **kwargs ): """Initializer. See the class documentation for parameter definitions. """ super().__init__(*args, **kwargs) self.clip_param = clip_param self.use_clipped_value_loss = use_clipped_value_loss self.clip_decay = clip_decay if clip_decay is not None else (lambda x: 1.0) def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], *args, **kwargs ): values = actor_critic_output.values clip_param = self.clip_param * self.clip_decay(step_count) if self.use_clipped_value_loss: value_pred_clipped = batch["values"] + (values - batch["values"]).clamp( -clip_param, clip_param ) value_losses = (values - batch["returns"]).pow(2) value_losses_clipped = (value_pred_clipped - batch["returns"]).pow(2) value_loss = 0.5 * torch.max(value_losses, value_losses_clipped).mean() else: value_loss = ( 0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow(2).mean() ) return ( value_loss, { "value": value_loss.item(), }, ) PPOConfig = dict(clip_param=0.1, value_loss_coef=0.5, entropy_coef=0.01) ================================================ FILE: allenact/algorithms/onpolicy_sync/misc.py ================================================ from enum import Enum from typing import Dict, Any, Optional import attr class TrackingInfoType(Enum): LOSS = "loss" TEACHER_FORCING = "teacher_forcing" UPDATE_INFO = "update_info" @attr.s(kw_only=True) class TrackingInfo: type: TrackingInfoType = attr.ib() info: Dict[str, Any] = attr.ib() n: int = attr.ib() storage_uuid: Optional[str] = attr.ib() stage_component_uuid: Optional[str] = attr.ib() ================================================ FILE: allenact/algorithms/onpolicy_sync/policy.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import abc from collections import OrderedDict from typing import TypeVar, Generic, Tuple, Optional, Union, Dict, List, Any import gym import torch from gym.spaces.dict import Dict as SpaceDict import torch.nn as nn from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput, Memory DistributionType = TypeVar("DistributionType") MemoryDimType = Tuple[str, Optional[int]] MemoryShapeType = Tuple[MemoryDimType, ...] MemorySpecType = Tuple[MemoryShapeType, torch.dtype] FullMemorySpecType = Dict[str, MemorySpecType] ObservationType = Dict[str, Union[torch.Tensor, Dict[str, Any]]] ActionType = Union[torch.Tensor, OrderedDict, Tuple, int] class ActorCriticModel(Generic[DistributionType], nn.Module): """Abstract class defining a deep (recurrent) actor critic agent. When defining a new agent, you should subclass this class and implement the abstract methods. # Attributes action_space : The space of actions available to the agent. This is of type `gym.spaces.Space`. observation_space: The observation space expected by the agent. This is of type `gym.spaces.dict`. """ def __init__(self, action_space: gym.Space, observation_space: SpaceDict): """Initializer. # Parameters action_space : The space of actions available to the agent. observation_space: The observation space expected by the agent. """ super().__init__() self.action_space = action_space self.observation_space = observation_space self.memory_spec: Optional[List[Optional[FullMemorySpecType]]] = None @property def recurrent_memory_specification(self) -> Optional[FullMemorySpecType]: """The memory specification for the `ActorCriticModel`. See docs for `_recurrent_memory_shape` # Returns The memory specification from `_recurrent_memory_shape`. """ if self.memory_spec is None: self.memory_spec = [self._recurrent_memory_specification()] spec = self.memory_spec[0] if spec is None: return None for key in spec: dims, _ = spec[key] dim_names = [d[0] for d in dims] assert ( "step" not in dim_names ), "`step` is automatically added and cannot be reused" assert "sampler" in dim_names, "`sampler` dim must be defined" return self.memory_spec[0] @abc.abstractmethod def _recurrent_memory_specification(self) -> Optional[FullMemorySpecType]: """Implementation of memory specification for the `ActorCriticModel`. # Returns If None, it indicates the model is memory-less. Otherwise, it is a one-level dictionary (a map) with string keys (memory type identification) and tuple values (memory type specification). Each specification tuple contains: 1. Memory type named shape, e.g. `(("layer", 1), ("sampler", None), ("agent", 2), ("hidden", 32))` for a two-agent GRU memory, where the `sampler` dimension placeholder *always* precedes the optional `agent` dimension; the optional `agent` dimension has the number of agents in the model and is *always* the one after `sampler` if present; and `layer` and `hidden` correspond to the standard RNN hidden state parametrization. 2. The data type, e.g. `torch.float32`. The `sampler` dimension placeholder is mandatory for all memories. For a single-agent ActorCritic model it is often more convenient to skip the agent dimension, e.g. `(("layer", 1), ("sampler", None), ("hidden", 32))` for a GRU memory. """ raise NotImplementedError() @abc.abstractmethod def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: ActionType, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: """Transforms input observations (& previous hidden state) into action probabilities and the state value. # Parameters observations : Multi-level map from key strings to tensors of shape [steps, samplers, (agents,) ...] with the current observations. memory : `Memory` object with recurrent memory. The shape of each tensor is determined by the corresponding entry in `_recurrent_memory_specification`. prev_actions : ActionType with tensors of shape [steps, samplers, ...] with the previous actions. masks : tensor of shape [steps, samplers, agents, 1] with zeros indicating steps where a new episode/task starts. # Returns A tuple whose first element is an object of class ActorCriticOutput which stores the agents' probability distribution over possible actions (shape [steps, samplers, ...]), the agents' value for the state (shape [steps, samplers, ..., 1]), and any extra information needed for loss computations. The second element is an optional `Memory`, which is only used in models with recurrent memory. """ raise NotImplementedError() class LinearActorCriticHead(nn.Module): def __init__(self, input_size: int, num_actions: int): super().__init__() self.input_size = input_size self.num_actions = num_actions self.actor_and_critic = nn.Linear(input_size, 1 + num_actions) nn.init.orthogonal_(self.actor_and_critic.weight) nn.init.constant_(self.actor_and_critic.bias, 0) def forward(self, x) -> Tuple[CategoricalDistr, torch.Tensor]: out = self.actor_and_critic(x) logits = out[..., :-1] values = out[..., -1:] # noinspection PyArgumentList return ( # logits are [step, sampler, ...] CategoricalDistr(logits=logits), # values are [step, sampler, flattened] values.view(*values.shape[:2], -1), ) class LinearCriticHead(nn.Module): def __init__(self, input_size: int): super().__init__() self.fc = nn.Linear(input_size, 1) nn.init.orthogonal_(self.fc.weight) nn.init.constant_(self.fc.bias, 0) def forward(self, x): return self.fc(x).view(*x.shape[:2], -1) # [steps, samplers, flattened] class LinearActorHead(nn.Module): def __init__(self, num_inputs: int, num_outputs: int): super().__init__() self.linear = nn.Linear(num_inputs, num_outputs) nn.init.orthogonal_(self.linear.weight, gain=0.01) nn.init.constant_(self.linear.bias, 0) def forward(self, x: torch.FloatTensor): # type: ignore x = self.linear(x) # type:ignore # noinspection PyArgumentList return CategoricalDistr(logits=x) # logits are [step, sampler, ...] ================================================ FILE: allenact/algorithms/onpolicy_sync/runner.py ================================================ """Defines the reinforcement learning `OnPolicyRunner`.""" import copy import enum import glob import importlib.util import inspect import itertools import json import math import os import pathlib import queue import random import signal import subprocess import sys import time import traceback from collections import defaultdict from multiprocessing.context import BaseContext from multiprocessing.process import BaseProcess from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, Set import filelock import numpy as np import torch import torch.multiprocessing as mp from setproctitle import setproctitle as ptitle from torch.distributions.utils import lazy_property from allenact.algorithms.onpolicy_sync.engine import ( TEST_MODE_STR, TRAIN_MODE_STR, VALID_MODE_STR, OnPolicyInference, OnPolicyRLEngine, OnPolicyTrainer, ) from allenact.base_abstractions.callbacks import Callback from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams from allenact.base_abstractions.sensor import Sensor from allenact.utils.experiment_utils import ( LoggingPackage, ScalarMeanTracker, set_deterministic_cudnn, set_seed, download_checkpoint_from_wandb, ) from allenact.utils.misc_utils import ( NumpyJSONEncoder, all_equal, get_git_diff_of_project, ) from allenact.utils.model_utils import md5_hash_of_state_dict from allenact.utils.system import find_free_port, get_logger from allenact.utils.tensor_utils import SummaryWriter from allenact.utils.viz_utils import VizSuite CONFIG_KWARGS_STR = "__CONFIG_KWARGS__" class SaveDirFormat(enum.Enum): """Directory formats that can be used when saving tensorboard logs, checkpoints, etc. during training/evaluation. FLAT: the first-level directories are logs, checkpoints, metrics, etc; the second-level are time strings of each experiment NESTED: the opposite to FLAT. """ FLAT = "FLAT" NESTED = "NESTED" # Has results queue (aggregated per trainer), checkpoints queue and mp context # Instantiates train, validate, and test workers # Logging # Saves configs, makes folder for trainer models class OnPolicyRunner(object): def __init__( self, config: ExperimentConfig, output_dir: str, loaded_config_src_files: Optional[Dict[str, str]], seed: Optional[int] = None, mode: str = "train", deterministic_cudnn: bool = False, deterministic_agents: bool = False, mp_ctx: Optional[BaseContext] = None, multiprocessing_start_method: str = "default", extra_tag: str = "", disable_tensorboard: bool = False, disable_config_saving: bool = False, distributed_ip_and_port: str = "127.0.0.1:0", distributed_preemption_threshold: float = 0.7, machine_id: int = 0, save_dir_fmt: SaveDirFormat = SaveDirFormat.FLAT, callbacks_paths: Optional[str] = None, ): self.config = config self.output_dir = output_dir self.loaded_config_src_files = loaded_config_src_files self.seed = seed if seed is not None else random.randint(0, 2**31 - 1) self.deterministic_cudnn = deterministic_cudnn self.distributed_preemption_threshold = distributed_preemption_threshold if multiprocessing_start_method == "default": if torch.cuda.is_available(): multiprocessing_start_method = "forkserver" else: # Spawn seems to play nicer with cpus and debugging multiprocessing_start_method = "spawn" self.mp_ctx = self.init_context(mp_ctx, multiprocessing_start_method) self.extra_tag = extra_tag self.mode = mode.lower().strip() self.visualizer: Optional[VizSuite] = None self.deterministic_agents = deterministic_agents self.disable_tensorboard = disable_tensorboard self.disable_config_saving = disable_config_saving assert self.mode in [ TRAIN_MODE_STR, TEST_MODE_STR, ], "Only 'train' and 'test' modes supported in runner" if self.deterministic_cudnn: set_deterministic_cudnn() set_seed(self.seed) self.queues: Optional[Dict[str, mp.Queue]] = None self.processes: Dict[str, List[Union[BaseProcess, mp.Process]]] = defaultdict( list ) self.current_checkpoint = None self._local_start_time_str: Optional[str] = None self._is_closed: bool = False self._collect_valid_results: bool = False self.distributed_ip_and_port = distributed_ip_and_port self.machine_id = machine_id self.save_dir_fmt = save_dir_fmt self.callbacks_paths = callbacks_paths @lazy_property def callbacks(self): return self.setup_callback_classes(self.callbacks_paths) @property def local_start_time_str(self) -> str: if self._local_start_time_str is None: raise RuntimeError( "Local start time string does not exist as neither `start_train()` or `start_test()`" " has been called on this runner." ) return self._local_start_time_str @property def running_validation(self): pipeline = self.config.training_pipeline() return ( sum( MachineParams.instance_from( self.config.machine_params(VALID_MODE_STR) ).nprocesses ) > 0 or ( pipeline.rollout_storage_uuid is None and len(pipeline.valid_pipeline_stage.loss_names) > 0 ) ) and self.machine_id == 0 @staticmethod def init_context( mp_ctx: Optional[BaseContext] = None, multiprocessing_start_method: str = "forkserver", valid_start_methods: Tuple[str, ...] = ("forkserver", "spawn", "fork"), ): if mp_ctx is None: assert multiprocessing_start_method in valid_start_methods, ( f"multiprocessing_start_method must be one of {valid_start_methods}." f" Got '{multiprocessing_start_method}'" ) mp_ctx = mp.get_context(multiprocessing_start_method) elif multiprocessing_start_method != mp_ctx.get_start_method(): get_logger().warning( f"ignoring multiprocessing_start_method '{multiprocessing_start_method}'" f" and using given context with '{mp_ctx.get_start_method()}'" ) return mp_ctx def setup_callback_classes(self, callbacks: Optional[str]) -> Set[Callback]: """Get a list of Callback classes from a comma-separated list of files, paths, and/or functions. After separating the `callbacks` into a list of strings, each string should either be a: 1. Name of a function defined on the experiment config that, when called, returns an object with of type `Callback`. 2. Path to a python file containing a single class that inherits from `Callback`. 3. Module path (e.g. `path.to.module`) where this module contains a single class that inherits from `Callback`. """ if callbacks == "" or callbacks is None: return set() setup_dict = dict( name=f"{self.experiment_name}/{self.local_start_time_str}", config=self.config, mode=self.mode, ) callback_objects = set() files = callbacks.split(",") for filename in files: # Check if the `filename` is a function on the config if not any(k in filename for k in [".", "/"]): callback_func = getattr(self.config, filename, None) if callback_func is not None: callback = callback_func() callback.setup(**setup_dict) callback_objects.add(callback) continue # Otherwise find the Callback class in the file or module module_path = filename.replace("/", ".") if module_path.endswith(".py"): module_path = module_path[:-3] module = importlib.import_module(module_path) classes = inspect.getmembers(module, inspect.isclass) callback_classes = [ mod_class[1] for mod_class in classes if issubclass(mod_class[1], Callback) ] assert callback_classes == 1, ( f"Expected a single callback class in {filename}, but found {len(callback_classes)}." f" These classes were found: {callback_classes}." ) for mod_class in callback_classes: # NOTE: initialize the callback class callback = mod_class[1]() callback.setup(**setup_dict) callback_objects.add(callback) return callback_objects def _acquire_unique_local_start_time_string(self) -> str: """Creates a (unique) local start time string for this experiment. Ensures through file locks that the local start time string produced is unique. This implies that, if one has many experiments starting in parallel, at most one will be started every second (as the local start time string only records the time up to the current second). """ os.makedirs(self.output_dir, exist_ok=True) start_time_string_lock_path = os.path.abspath( os.path.join(self.output_dir, ".allenact_start_time_string.lock") ) try: with filelock.FileLock(start_time_string_lock_path, timeout=60): last_start_time_string_path = os.path.join( self.output_dir, ".allenact_last_start_time_string" ) pathlib.Path(last_start_time_string_path).touch() with open(last_start_time_string_path, "r") as f: last_start_time_string_list = f.readlines() while True: candidate_str = time.strftime( "%Y-%m-%d_%H-%M-%S", time.localtime(time.time()) ) if ( len(last_start_time_string_list) == 0 or last_start_time_string_list[0].strip() != candidate_str ): break time.sleep(0.2) with open(last_start_time_string_path, "w") as f: f.write(candidate_str) except filelock.Timeout as e: get_logger().exception( f"Could not acquire the lock for {start_time_string_lock_path} for 60 seconds," " this suggests an unexpected deadlock. Please close all AllenAct training processes," " delete this lockfile, and try again." ) raise e assert candidate_str is not None return candidate_str def worker_devices(self, mode: str): machine_params: MachineParams = MachineParams.instance_from( self.config.machine_params(mode) ) devices = machine_params.devices assert all_equal(devices) or all( d.index >= 0 for d in devices ), f"Cannot have a mix of CPU and GPU devices (`devices == {devices}`)" get_logger().info(f"Using {len(devices)} {mode} workers on devices {devices}") return devices def local_worker_ids(self, mode: str): machine_params: MachineParams = MachineParams.instance_from( self.config.machine_params(mode, machine_id=self.machine_id) ) ids = machine_params.local_worker_ids get_logger().info( f"Using local worker ids {ids} (total {len(ids)} workers in machine {self.machine_id})" ) return ids def init_visualizer(self, mode: str): if not self.disable_tensorboard: # Note: Avoid instantiating anything in machine_params (use Builder if needed) machine_params = MachineParams.instance_from( self.config.machine_params(mode) ) self.visualizer = machine_params.visualizer @staticmethod def init_process(mode: str, id: int, to_close_on_termination: OnPolicyRLEngine): ptitle(f"{mode}-{id}") def create_handler(termination_type: str): def handler(_signo, _frame): prefix = f"{termination_type} signal sent to worker {mode}-{id}." if to_close_on_termination.is_closed: get_logger().info( f"{prefix} Worker {mode}-{id} is already closed, exiting." ) sys.exit(0) elif not to_close_on_termination.is_closing: get_logger().info( f"{prefix} Forcing worker {mode}-{id} to close and exiting." ) # noinspection PyBroadException try: to_close_on_termination.close(True) except Exception: get_logger().error( f"Error occurred when closing the RL engine used by work {mode}-{id}." f" We cannot recover from this and will simply exit. The exception:\n" f"{traceback.format_exc()}" ) sys.exit(1) sys.exit(0) else: get_logger().info( f"{prefix} Worker {mode}-{id} is already closing, ignoring this signal." ) return handler signal.signal(signal.SIGTERM, create_handler("Termination")) signal.signal(signal.SIGINT, create_handler("Interrupt")) @staticmethod def init_worker(engine_class, args, kwargs): mode = kwargs["mode"] id = kwargs["worker_id"] worker = None try: worker = engine_class(*args, **kwargs) except Exception: get_logger().error(f"Encountered Exception. Terminating {mode} worker {id}") get_logger().exception(traceback.format_exc()) kwargs["results_queue"].put((f"{mode}_stopped", 1 + id)) finally: return worker @lazy_property def _get_callback_sensors(self) -> List[Sensor]: callback_sensors: List[Sensor] = [] for c in self.callbacks: sensors = c.callback_sensors() if sensors is not None: callback_sensors.extend(sensors) return callback_sensors @staticmethod def train_loop( id: int = 0, checkpoint: Optional[str] = None, restart_pipeline: bool = False, valid_on_initial_weights: bool = False, *engine_args, **engine_kwargs, ): engine_kwargs["mode"] = TRAIN_MODE_STR engine_kwargs["worker_id"] = id engine_kwargs_for_print = { k: (v if k != "initial_model_state_dict" else "[SUPPRESSED]") for k, v in engine_kwargs.items() } get_logger().info(f"train {id} args {engine_kwargs_for_print}") trainer: OnPolicyTrainer = OnPolicyRunner.init_worker( engine_class=OnPolicyTrainer, args=engine_args, kwargs=engine_kwargs ) if trainer is not None: OnPolicyRunner.init_process("Train", id, to_close_on_termination=trainer) trainer.train( checkpoint_file_name=checkpoint, restart_pipeline=restart_pipeline, valid_on_initial_weights=valid_on_initial_weights, ) @staticmethod def valid_loop(id: int = 0, *engine_args, **engine_kwargs): engine_kwargs["mode"] = VALID_MODE_STR engine_kwargs["worker_id"] = id get_logger().info(f"valid {id} args {engine_kwargs}") valid = OnPolicyRunner.init_worker( engine_class=OnPolicyInference, args=engine_args, kwargs=engine_kwargs ) if valid is not None: OnPolicyRunner.init_process("Valid", id, to_close_on_termination=valid) valid.process_checkpoints() # gets checkpoints via queue @staticmethod def test_loop(id: int = 0, *engine_args, **engine_kwargs): engine_kwargs["mode"] = TEST_MODE_STR engine_kwargs["worker_id"] = id get_logger().info(f"test {id} args {engine_kwargs}") test = OnPolicyRunner.init_worker(OnPolicyInference, engine_args, engine_kwargs) if test is not None: OnPolicyRunner.init_process("Test", id, to_close_on_termination=test) test.process_checkpoints() # gets checkpoints via queue def _initialize_start_train_or_start_test(self): self._is_closed = False if self.queues is not None: for k, q in self.queues.items(): try: out = q.get(timeout=1) raise RuntimeError( f"{k} queue was not empty before starting new training/testing (contained {out})." f" This should not happen, please report how you obtained this error" f" by creating an issue at https://github.com/allenai/allenact/issues." ) except queue.Empty: pass self.queues = { "results": self.mp_ctx.Queue(), "checkpoints": self.mp_ctx.Queue(), } self._local_start_time_str = self._acquire_unique_local_start_time_string() def get_port(self): passed_port = int(self.distributed_ip_and_port.split(":")[1]) if passed_port == 0: assert ( self.machine_id == 0 ), "Only runner with `machine_id` == 0 can search for a free port." distributed_port = find_free_port( self.distributed_ip_and_port.split(":")[0] ) else: distributed_port = passed_port get_logger().info( f"Engines on machine_id == {self.machine_id} using port {distributed_port} and seed {self.seed}" ) return distributed_port def start_train( self, checkpoint: Optional[str] = None, restart_pipeline: bool = False, max_sampler_processes_per_worker: Optional[int] = None, save_ckpt_after_every_pipeline_stage: bool = True, collect_valid_results: bool = False, valid_on_initial_weights: bool = False, try_restart_after_task_error: bool = False, save_ckpt_at_every_host: bool = False, ): self._initialize_start_train_or_start_test() self._collect_valid_results = collect_valid_results if not self.disable_config_saving: self.save_project_state() devices = self.worker_devices(TRAIN_MODE_STR) num_workers = len(devices) # Be extra careful to ensure that all models start # with the same initializations. set_seed(self.seed) initial_model_state_dict = self.config.create_model( sensor_preprocessor_graph=MachineParams.instance_from( self.config.machine_params(self.mode) ).sensor_preprocessor_graph ).state_dict() distributed_port = 0 if num_workers == 1 else self.get_port() if ( num_workers > 1 and "NCCL_ASYNC_ERROR_HANDLING" not in os.environ and "NCCL_BLOCKING_WAIT" not in os.environ ): # This ensures the NCCL distributed backend will throw errors # if we timeout at a call to `barrier()` os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "1" worker_ids = self.local_worker_ids(TRAIN_MODE_STR) if checkpoint is not None: if checkpoint[:8] == "wandb://": ckpt_dir = "/tmp/wandb_ckpts" os.makedirs(ckpt_dir, exist_ok=True) checkpoint = download_checkpoint_from_wandb( checkpoint, ckpt_dir, only_allow_one_ckpt=True ) model_hash = None for trainer_id in worker_ids: training_kwargs = dict( id=trainer_id, checkpoint=checkpoint, restart_pipeline=restart_pipeline, experiment_name=self.experiment_name, config=self.config, callback_sensors=self._get_callback_sensors, results_queue=self.queues["results"], checkpoints_queue=( self.queues["checkpoints"] if self.running_validation else None ), checkpoints_dir=self.checkpoint_dir(), seed=self.seed, deterministic_cudnn=self.deterministic_cudnn, mp_ctx=self.mp_ctx, num_workers=num_workers, device=devices[trainer_id], distributed_ip=self.distributed_ip_and_port.split(":")[0], distributed_port=distributed_port, max_sampler_processes_per_worker=max_sampler_processes_per_worker, save_ckpt_after_every_pipeline_stage=save_ckpt_after_every_pipeline_stage, initial_model_state_dict=( initial_model_state_dict if model_hash is None else model_hash ), first_local_worker_id=worker_ids[0], distributed_preemption_threshold=self.distributed_preemption_threshold, valid_on_initial_weights=valid_on_initial_weights, try_restart_after_task_error=try_restart_after_task_error, save_ckpt_at_every_host=save_ckpt_at_every_host, ) train: BaseProcess = self.mp_ctx.Process( target=self.train_loop, kwargs=training_kwargs, ) try: train.start() except (ValueError, OSError, ConnectionRefusedError, EOFError) as e: # If the `initial_model_state_dict` is too large we sometimes # run into errors passing it with multiprocessing. In such cases # we instead hash the state_dict and confirm, in each engine worker, that # this hash equals the model the engine worker instantiates. if ( (isinstance(e, ValueError) and e.args[0] == "too many fds") or (isinstance(e, OSError) and e.errno == 22) or (isinstance(e, ConnectionRefusedError) and e.errno == 111) or isinstance(e, EOFError) ): model_hash = md5_hash_of_state_dict(initial_model_state_dict) training_kwargs["initial_model_state_dict"] = model_hash train = self.mp_ctx.Process( target=self.train_loop, kwargs=training_kwargs, ) train.start() else: raise e self.processes[TRAIN_MODE_STR].append(train) get_logger().info( f"Started {len(self.processes[TRAIN_MODE_STR])} train processes" ) # Validation if self.running_validation: device = self.worker_devices(VALID_MODE_STR)[0] self.init_visualizer(VALID_MODE_STR) valid: BaseProcess = self.mp_ctx.Process( target=self.valid_loop, args=(0,), kwargs=dict( config=self.config, callback_sensors=self._get_callback_sensors, results_queue=self.queues["results"], checkpoints_queue=self.queues["checkpoints"], seed=12345, # TODO allow same order for randomly sampled tasks? Is this any useful anyway? deterministic_cudnn=self.deterministic_cudnn, deterministic_agents=self.deterministic_agents, mp_ctx=self.mp_ctx, device=device, max_sampler_processes_per_worker=max_sampler_processes_per_worker, ), ) valid.start() self.processes[VALID_MODE_STR].append(valid) get_logger().info( f"Started {len(self.processes[VALID_MODE_STR])} valid processes" ) else: get_logger().info( "No processes allocated to validation, no validation will be run." ) metrics_file_template: Optional[str] = None if self._collect_valid_results: metrics_dir = self.metric_path(self.local_start_time_str) os.makedirs(metrics_dir, exist_ok=True) suffix = f"__valid_{self.local_start_time_str}" metrics_file_template = os.path.join( metrics_dir, "metrics" + suffix + "{:012d}.json" ) # template for training steps get_logger().info( f"Saving valid metrics with template {metrics_file_template}" ) # Check output file can be written with open(metrics_file_template.format(0), "w") as f: json.dump([], f, indent=4, sort_keys=True, cls=NumpyJSONEncoder) valid_results = self.log_and_close( start_time_str=self.local_start_time_str, nworkers=len(worker_ids), # TODO num_workers once we forward metrics, metrics_file=metrics_file_template, ) if not self._collect_valid_results: return self.local_start_time_str else: return self.local_start_time_str, valid_results def start_test( self, checkpoint_path_dir_or_pattern: str, infer_output_dir: bool = False, approx_ckpt_step_interval: Optional[Union[float, int]] = None, max_sampler_processes_per_worker: Optional[int] = None, inference_expert: bool = False, ) -> List[Dict]: # Tester always runs on a single machine assert ( self.machine_id == 0 ), f"Received `machine_id={self.machine_id} for test. Only one machine supported." assert isinstance( checkpoint_path_dir_or_pattern, str ), "Must provide a --checkpoint path or pattern to test on." self.extra_tag += ( "__" * (len(self.extra_tag) > 0) + "enforced_test_expert" ) * inference_expert self._initialize_start_train_or_start_test() devices = self.worker_devices(TEST_MODE_STR) self.init_visualizer(TEST_MODE_STR) num_testers = len(devices) distributed_port = 0 if num_testers > 1: distributed_port = find_free_port() # Tester always runs on a single machine for tester_it in range(num_testers): test: BaseProcess = self.mp_ctx.Process( target=self.test_loop, args=(tester_it,), kwargs=dict( config=self.config, callback_sensors=self._get_callback_sensors, results_queue=self.queues["results"], checkpoints_queue=self.queues["checkpoints"], seed=12345, # TODO allow same order for randomly sampled tasks? Is this any useful anyway? deterministic_cudnn=self.deterministic_cudnn, deterministic_agents=self.deterministic_agents, mp_ctx=self.mp_ctx, num_workers=num_testers, device=devices[tester_it], max_sampler_processes_per_worker=max_sampler_processes_per_worker, distributed_port=distributed_port, enforce_expert=inference_expert, ), ) test.start() self.processes[TEST_MODE_STR].append(test) get_logger().info( f"Started {len(self.processes[TEST_MODE_STR])} test processes" ) checkpoint_paths = self.get_checkpoint_files( checkpoint_path_dir_or_pattern=checkpoint_path_dir_or_pattern, approx_ckpt_step_interval=approx_ckpt_step_interval, ) steps = [self.step_from_checkpoint(cp) for cp in checkpoint_paths] get_logger().info(f"Running test on {len(steps)} steps {steps}") for checkpoint_path in checkpoint_paths: # Make all testers work on each checkpoint for tester_it in range(num_testers): self.queues["checkpoints"].put(("eval", checkpoint_path)) # Signal all testers to terminate cleanly for _ in range(num_testers): self.queues["checkpoints"].put(("quit", None)) if self.save_dir_fmt == SaveDirFormat.NESTED: if infer_output_dir: # NOTE: we change output_dir here self.output_dir = self.checkpoint_log_folder_str(checkpoint_paths[0]) suffix = "" elif self.save_dir_fmt == SaveDirFormat.FLAT: suffix = f"__test_{self.local_start_time_str}" else: raise NotImplementedError metrics_dir = self.metric_path(self.local_start_time_str) os.makedirs(metrics_dir, exist_ok=True) metrics_file_path = os.path.join(metrics_dir, "metrics" + suffix + ".json") get_logger().info(f"Saving test metrics in {metrics_file_path}") # Check output file can be written with open(metrics_file_path, "w") as f: json.dump([], f, indent=4, sort_keys=True, cls=NumpyJSONEncoder) return self.log_and_close( start_time_str=self.checkpoint_start_time_str(checkpoint_paths[0]), nworkers=num_testers, test_steps=steps, metrics_file=metrics_file_path, ) @staticmethod def checkpoint_start_time_str(checkpoint_file_name): parts = checkpoint_file_name.split(os.path.sep) assert len(parts) > 1, f"{checkpoint_file_name} is not a valid checkpoint path" start_time_str = parts[-2] get_logger().info(f"Using checkpoint start time {start_time_str}") return start_time_str @staticmethod def checkpoint_log_folder_str(checkpoint_file_name): parts = checkpoint_file_name.split(os.path.sep) assert len(parts) > 1, f"{checkpoint_file_name} is not a valid checkpoint path" log_folder_str = os.path.sep.join(parts[:-2]) # remove checkpoints/*.pt get_logger().info(f"Using log folder {log_folder_str}") return log_folder_str @property def experiment_name(self): if len(self.extra_tag) > 0: return f"{self.config.tag()}_{self.extra_tag}" return self.config.tag() def checkpoint_dir( self, start_time_str: Optional[str] = None, create_if_none: bool = True ): path_parts = [ ( self.config.tag() if self.extra_tag == "" else os.path.join(self.config.tag(), self.extra_tag) ), start_time_str or self.local_start_time_str, ] if self.save_dir_fmt == SaveDirFormat.NESTED: folder = os.path.join( self.output_dir, *path_parts, "checkpoints", ) elif self.save_dir_fmt == SaveDirFormat.FLAT: folder = os.path.join( self.output_dir, "checkpoints", *path_parts, ) else: raise NotImplementedError if create_if_none: os.makedirs(folder, exist_ok=True) return folder def log_writer_path(self, start_time_str: str) -> str: if self.save_dir_fmt == SaveDirFormat.NESTED: if self.mode == TEST_MODE_STR: return os.path.join( self.output_dir, "test", self.config.tag(), self.local_start_time_str, ) path = os.path.join( self.output_dir, ( self.config.tag() if self.extra_tag == "" else os.path.join(self.config.tag(), self.extra_tag) ), start_time_str, "train_tb", ) return path elif self.save_dir_fmt == SaveDirFormat.FLAT: path = os.path.join( self.output_dir, "tb", ( self.config.tag() if self.extra_tag == "" else os.path.join(self.config.tag(), self.extra_tag) ), start_time_str, ) if self.mode == TEST_MODE_STR: path = os.path.join(path, "test", self.local_start_time_str) return path else: raise NotImplementedError def metric_path(self, start_time_str: str) -> str: if self.save_dir_fmt == SaveDirFormat.NESTED: return os.path.join( self.output_dir, "test", self.config.tag(), start_time_str, ) elif self.save_dir_fmt == SaveDirFormat.FLAT: return os.path.join( self.output_dir, "metrics", ( self.config.tag() if self.extra_tag == "" else os.path.join(self.config.tag(), self.extra_tag) ), start_time_str, ) else: raise NotImplementedError def save_project_state(self): path_parts = [ ( self.config.tag() if self.extra_tag == "" else os.path.join(self.config.tag(), self.extra_tag) ), self.local_start_time_str, ] if self.save_dir_fmt == SaveDirFormat.NESTED: base_dir = os.path.join( self.output_dir, *path_parts, "used_configs", ) elif self.save_dir_fmt == SaveDirFormat.FLAT: base_dir = os.path.join( self.output_dir, "used_configs", *path_parts, ) else: raise NotImplementedError os.makedirs(base_dir, exist_ok=True) # Saving current git diff try: sha, diff_str = get_git_diff_of_project() with open(os.path.join(base_dir, f"{sha}.patch"), "w") as f: f.write(diff_str) get_logger().info(f"Git diff saved to {base_dir}") except subprocess.CalledProcessError: get_logger().warning( "Failed to get a git diff of the current project." f" Is it possible that {os.getcwd()} is not under version control?" ) # Saving configs if self.loaded_config_src_files is not None: for src_path in self.loaded_config_src_files: if src_path == CONFIG_KWARGS_STR: # We also save key-word arguments passed to the experiment # initializer. save_path = os.path.join(base_dir, "config_kwargs.json") assert not os.path.exists( save_path ), f"{save_path} should not already exist." with open(save_path, "w") as f: json.dump(json.loads(self.loaded_config_src_files[src_path]), f) continue assert os.path.isfile(src_path), f"Config file {src_path} not found" src_path = os.path.abspath(src_path) # To prevent overwriting files with the same name, we loop # here until we find a prefix (if necessary) to prevent # name collisions. k = -1 while True: prefix = "" if k == -1 else f"namecollision{k}__" k += 1 dst_path = os.path.join( base_dir, f"{prefix}{os.path.basename(src_path)}", ) if not os.path.exists(dst_path): os.makedirs(os.path.dirname(dst_path), exist_ok=True) with open(src_path, "r") as f: file_contents = f.read() with open(dst_path, "w") as f: f.write( f"### THIS FILE ORIGINALLY LOCATED AT '{src_path}'\n\n{file_contents}" ) break get_logger().info(f"Config files saved to {base_dir}") for callback in self.callbacks: callback.after_save_project_state(base_dir=base_dir) def _update_keys( self, d: Union[Dict[str, Any], str], tag_if_not_a_loss: str, mode: str, stage_component_uuid: Optional[str] = None, ) -> Union[Dict[str, Any], str]: midfix = "-" if stage_component_uuid is None else f"-{stage_component_uuid}-" def _convert(key: str): if key.startswith("losses/"): return f"{mode}{midfix}{key}" else: return f"{mode}{midfix}{tag_if_not_a_loss}/{key}" if isinstance(d, str): return _convert(d) return {_convert(k): v for k, v in d.items()} def _process_logging_packages( self, log_writer: Optional[SummaryWriter], pkgs: Union[LoggingPackage, List[LoggingPackage]], last_steps: Optional[int], last_storage_uuid_to_total_experiences: Optional[Dict[str, int]], last_time: Optional[float], all_results: Optional[List[Any]] = None, ): mode = pkgs[0].mode assert all( pkg.mode == mode for pkg in pkgs ), "All logging packages must be the same mode." assert mode == self.mode or ( mode == VALID_MODE_STR and self.mode == TRAIN_MODE_STR ), ( "Logging package mode must match the logger mode except when training where the logging package may" "be of mode 'valid'." ) training = mode == TRAIN_MODE_STR # Are we logging training packages current_time = time.time() training_steps = pkgs[0].training_steps storage_uuid_to_total_experiences = pkgs[0].storage_uuid_to_total_experiences callback_metric_means = dict() def update_keys_misc( key_or_dict: Union[str, Dict[str, Any]], stage_component_uuid: Optional[str] = None, ): # Important to use mode and not self.mode here return self._update_keys( d=key_or_dict, tag_if_not_a_loss="misc", mode=mode, stage_component_uuid=stage_component_uuid, ) def update_keys_metric( key_or_dict: Union[str, Dict[str, Any]], stage_component_uuid: Optional[str] = None, ): # Important to use mode and not self.mode here return self._update_keys( d=key_or_dict, tag_if_not_a_loss="metrics", mode=mode, stage_component_uuid=stage_component_uuid, ) if training and log_writer is not None: log_writer.add_scalar( tag=update_keys_misc("pipeline_stage"), scalar_value=pkgs[0].pipeline_stage, global_step=training_steps, ) callback_metric_means[update_keys_misc("pipeline_stage")] = pkgs[ 0 ].pipeline_stage storage_uuid_to_total_experiences_key = {} for storage_uuid, val in storage_uuid_to_total_experiences.items(): total_experiences_key = update_keys_misc( f"{storage_uuid}_total_experiences" ) storage_uuid_to_total_experiences_key[storage_uuid] = total_experiences_key if training and log_writer is not None: log_writer.add_scalar( tag=total_experiences_key, scalar_value=val, global_step=training_steps, ) callback_metric_means[total_experiences_key] = val metrics_and_info_tracker = ScalarMeanTracker() scalar_name_to_total_storage_experience = {} scalar_name_to_total_experiences_key = {} storage_uuid_to_stage_component_uuids = defaultdict(lambda: set()) metric_dicts_list, render, checkpoint_file_name = [], {}, [] tasks_callback_data = [] for pkg in pkgs: metrics_and_info_tracker.add_scalars( scalars=update_keys_metric(pkg.metrics_tracker.means()), n=update_keys_metric(pkg.metrics_tracker.counts()), ) tasks_callback_data.extend(pkg.task_callback_data) metric_dicts_list.extend(pkg.metric_dicts) if pkg.viz_data is not None: render.update(pkg.viz_data) checkpoint_file_name.append(pkg.checkpoint_file_name) for ( (stage_component_uuid, storage_uuid), info_tracker, ) in pkg.info_trackers.items(): if stage_component_uuid is not None: storage_uuid_to_stage_component_uuids[storage_uuid].add( stage_component_uuid ) info_means = update_keys_misc( info_tracker.means(), stage_component_uuid, ) info_counts = update_keys_misc( info_tracker.counts(), stage_component_uuid, ) metrics_and_info_tracker.add_scalars( scalars=info_means, n=info_counts, ) total_exp_for_storage = pkg.storage_uuid_to_total_experiences[ storage_uuid ] if stage_component_uuid is None: assert total_exp_for_storage == training_steps for scalar_name in info_means: if scalar_name in scalar_name_to_total_storage_experience: assert ( total_exp_for_storage == scalar_name_to_total_storage_experience[scalar_name] ), ( f"For metric {scalar_name}: there is disagreement between the training steps parameter" f" across different workers ({total_exp_for_storage} !=" f" {scalar_name_to_total_storage_experience[scalar_name]}). This suggests an error in " f" AllenAct, please report this issue at https://github.com/allenai/allenact/issues." ) else: scalar_name_to_total_storage_experience[scalar_name] = ( total_exp_for_storage ) scalar_name_to_total_experiences_key[scalar_name] = ( storage_uuid_to_total_experiences_key[storage_uuid] ) if any(checkpoint_file_name): ckpt_to_store = None for ckpt in checkpoint_file_name: if ckpt is not None: ckpt_to_store = ckpt assert ckpt_to_store is not None checkpoint_file_name = [ckpt_to_store] # assert all_equal( # checkpoint_file_name # ), f"All {mode} logging packages must have the same checkpoint_file_name." message = [ f"{mode.upper()}: {training_steps} rollout steps ({pkgs[0].storage_uuid_to_total_experiences})" ] metrics_and_info_means = metrics_and_info_tracker.means() callback_metric_means.update(metrics_and_info_means) for k in sorted( metrics_and_info_means.keys(), key=lambda mean_key: (mean_key.count("/"), mean_key), ): if log_writer is not None: log_writer.add_scalar( tag=k, scalar_value=metrics_and_info_means[k], global_step=scalar_name_to_total_storage_experience.get( k, training_steps ), ) short_key = ( "/".join(k.split("/")[1:]) if k.startswith(f"{mode}-") and "/" in k else k ) message.append(f"{short_key} {metrics_and_info_means[k]:.3g}") if training: # Log information about FPS and EPS (experiences per second, for non-rollout storage). # Not needed during testing or validation. message += [f"elapsed_time {(current_time - last_time):.3g}s"] if last_steps > 0: fps = (training_steps - last_steps) / (current_time - last_time) message += [f"approx_fps {fps:.3g}"] approx_fps_key = update_keys_misc("approx_fps") if log_writer is not None: log_writer.add_scalar(approx_fps_key, fps, training_steps) callback_metric_means[approx_fps_key] = fps for ( storage_uuid, last_total_exp, ) in last_storage_uuid_to_total_experiences.items(): if storage_uuid in storage_uuid_to_total_experiences: cur_total_exp = storage_uuid_to_total_experiences[storage_uuid] eps = (cur_total_exp - last_total_exp) / (current_time - last_time) message += [f"{storage_uuid}/approx_eps {eps:.3g}"] for stage_component_uuid in storage_uuid_to_stage_component_uuids[ storage_uuid ]: approx_eps_key = update_keys_misc( f"approx_eps", stage_component_uuid, ) callback_metric_means[approx_eps_key] = eps scalar_name_to_total_experiences_key[approx_eps_key] = ( storage_uuid_to_total_experiences_key[storage_uuid] ) if log_writer is not None: log_writer.add_scalar( approx_eps_key, eps, cur_total_exp, ) metrics_and_info_means_with_metrics_dicts_list = copy.deepcopy( metrics_and_info_means ) metrics_and_info_means_with_metrics_dicts_list.update( {"training_steps": training_steps, "tasks": metric_dicts_list} ) if all_results is not None: all_results.append(metrics_and_info_means_with_metrics_dicts_list) num_tasks = sum([pkg.num_non_empty_metrics_dicts_added for pkg in pkgs]) num_tasks_completed_key = update_keys_misc("num_tasks_completed_since_last_log") if log_writer is not None: log_writer.add_scalar(num_tasks_completed_key, num_tasks, training_steps) callback_metric_means[num_tasks_completed_key] = num_tasks message.append(f"new_tasks_completed {num_tasks}") if not training: message.append(f"checkpoint {checkpoint_file_name[0]}") get_logger().info(" ".join(message)) for callback in self.callbacks: if mode == TRAIN_MODE_STR: callback.on_train_log( metrics=metric_dicts_list, metric_means=callback_metric_means, step=training_steps, checkpoint_file_name=checkpoint_file_name[0], tasks_data=tasks_callback_data, scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key, ) if mode == VALID_MODE_STR: callback.on_valid_log( metrics=metrics_and_info_means_with_metrics_dicts_list, metric_means=callback_metric_means, step=training_steps, checkpoint_file_name=checkpoint_file_name[0], tasks_data=tasks_callback_data, scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key, ) if mode == TEST_MODE_STR: callback.on_test_log( metrics=metrics_and_info_means_with_metrics_dicts_list, metric_means=callback_metric_means, step=training_steps, checkpoint_file_name=checkpoint_file_name[0], tasks_data=tasks_callback_data, scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key, ) if self.visualizer is not None: self.visualizer.log( log_writer=log_writer, task_outputs=metric_dicts_list, render=render, num_steps=training_steps, ) return training_steps, storage_uuid_to_total_experiences, current_time def process_valid_package( self, log_writer: Optional[SummaryWriter], pkg: LoggingPackage, all_results: Optional[List[Any]] = None, ): return self._process_logging_packages( log_writer=log_writer, pkgs=[pkg], last_steps=None, last_storage_uuid_to_total_experiences=None, last_time=None, all_results=all_results, ) def process_train_packages( self, log_writer: Optional[SummaryWriter], pkgs: List[LoggingPackage], last_steps: int, last_storage_uuid_to_total_experiences: Dict[str, int], last_time: float, ): return self._process_logging_packages( log_writer=log_writer, pkgs=pkgs, last_steps=last_steps, last_storage_uuid_to_total_experiences=last_storage_uuid_to_total_experiences, last_time=last_time, ) def process_test_packages( self, log_writer: Optional[SummaryWriter], pkgs: List[LoggingPackage], all_results: Optional[List[Any]] = None, ): return self._process_logging_packages( log_writer=log_writer, pkgs=pkgs, last_steps=None, last_storage_uuid_to_total_experiences=None, last_time=None, all_results=all_results, ) def log_and_close( self, start_time_str: str, nworkers: int, test_steps: Sequence[int] = (), metrics_file: Optional[str] = None, ) -> List[Dict]: ptitle(f"AllenAct-Logging-{self.local_start_time_str}") finalized = False log_writer: Optional[SummaryWriter] = None if not self.disable_tensorboard: log_writer = SummaryWriter( log_dir=self.log_writer_path(start_time_str), filename_suffix=f"__{self.mode}_{self.local_start_time_str}", ) # To aggregate/buffer metrics from trainers/testers collected: List[LoggingPackage] = [] last_train_steps = 0 last_storage_uuid_to_total_experiences = {} last_train_time = time.time() # test_steps = sorted(test_steps, reverse=True) eval_results: List[Dict] = [] unfinished_workers = nworkers try: while True: try: package: Union[ LoggingPackage, Union[Tuple[str, Any], Tuple[str, Any, Any]] ] = self.queues["results"].get(timeout=1) if isinstance(package, LoggingPackage): pkg_mode = package.mode if pkg_mode == TRAIN_MODE_STR: collected.append(package) if len(collected) >= nworkers: collected = sorted( collected, key=lambda pkg: ( pkg.training_steps, *sorted( pkg.storage_uuid_to_total_experiences.items() ), ), ) if ( collected[nworkers - 1].training_steps == collected[0].training_steps and collected[ nworkers - 1 ].storage_uuid_to_total_experiences == collected[0].storage_uuid_to_total_experiences ): # ensure all workers have provided the same training_steps and total_experiences ( last_train_steps, last_storage_uuid_to_total_experiences, last_train_time, ) = self.process_train_packages( log_writer=log_writer, pkgs=collected[:nworkers], last_steps=last_train_steps, last_storage_uuid_to_total_experiences=last_storage_uuid_to_total_experiences, last_time=last_train_time, ) collected = collected[nworkers:] elif len(collected) > 2 * nworkers: get_logger().warning( f"Unable to aggregate train packages from all {nworkers} workers" f"after {len(collected)} packages collected" ) elif ( pkg_mode == VALID_MODE_STR ): # they all come from a single worker if ( package.training_steps is not None ): # no validation samplers self.process_valid_package( log_writer=log_writer, pkg=package, all_results=( eval_results if self._collect_valid_results else None ), ) if metrics_file is not None: with open( metrics_file.format(package.training_steps), "w" ) as f: json.dump( eval_results[-1], f, indent=4, sort_keys=True, cls=NumpyJSONEncoder, ) get_logger().info( "Written valid results file {}".format( metrics_file.format( package.training_steps ), ) ) if ( finalized and self.queues["checkpoints"].empty() ): # assume queue is actually empty after trainer finished and no checkpoints in queue break elif pkg_mode == TEST_MODE_STR: collected.append(package) if len(collected) >= nworkers: collected = sorted( collected, key=lambda x: x.training_steps ) # sort by num_steps if ( collected[nworkers - 1].training_steps == collected[0].training_steps ): # ensure nworkers have provided the same num_steps self.process_test_packages( log_writer=log_writer, pkgs=collected[:nworkers], all_results=eval_results, ) collected = collected[nworkers:] with open(metrics_file, "w") as f: json.dump( eval_results, f, indent=4, sort_keys=True, cls=NumpyJSONEncoder, ) get_logger().info( f"Updated {metrics_file} up to checkpoint" f" {test_steps[len(eval_results) - 1]}" ) else: get_logger().error( f"Runner received unknown package of type {pkg_mode}" ) else: pkg_mode = package[0] if pkg_mode == "train_stopped": if package[1] == 0: finalized = True if not self.running_validation: get_logger().info( "Terminating runner after trainer done (no validation)" ) break else: raise Exception( f"Train worker {package[1] - 1} abnormally terminated" ) elif pkg_mode == "valid_stopped": raise Exception( f"Valid worker {package[1] - 1} abnormally terminated" ) elif pkg_mode == "test_stopped": if package[1] == 0: unfinished_workers -= 1 if unfinished_workers == 0: get_logger().info( "Last tester finished. Terminating" ) finalized = True break else: raise RuntimeError( f"Test worker {package[1] - 1} abnormally terminated" ) else: get_logger().error( f"Runner received invalid package tuple {package}" ) except queue.Empty as _: if all( p.exitcode is not None for p in itertools.chain(*self.processes.values()) ): break except KeyboardInterrupt: get_logger().info("KeyboardInterrupt. Terminating runner.") except Exception: get_logger().error("Encountered Exception. Terminating runner.") get_logger().exception(traceback.format_exc()) finally: if finalized: get_logger().info("Done") if log_writer is not None: log_writer.close() self.close() return eval_results def get_checkpoint_files( self, checkpoint_path_dir_or_pattern: str, approx_ckpt_step_interval: Optional[int] = None, ): if "wandb://" == checkpoint_path_dir_or_pattern[:8]: eval_dir = "/tmp/wandb_ckpts_to_eval/{}".format(self.local_start_time_str) os.makedirs(eval_dir, exist_ok=True) return download_checkpoint_from_wandb( checkpoint_path_dir_or_pattern, eval_dir, only_allow_one_ckpt=False ) if os.path.isdir(checkpoint_path_dir_or_pattern): # The fragment is a path to a directory, lets use this directory # as the base dir to search for checkpoints checkpoint_path_dir_or_pattern = os.path.join( checkpoint_path_dir_or_pattern, "*.pt" ) ckpt_paths = glob.glob(checkpoint_path_dir_or_pattern, recursive=True) if len(ckpt_paths) == 0: raise FileNotFoundError( f"Could not find any checkpoints at {os.path.abspath(checkpoint_path_dir_or_pattern)}, is it possible" f" the path has been mispecified?" ) step_count_ckpt_pairs = [(self.step_from_checkpoint(p), p) for p in ckpt_paths] step_count_ckpt_pairs.sort() ckpts_paths = [p for _, p in step_count_ckpt_pairs] step_counts = np.array([sc for sc, _ in step_count_ckpt_pairs]) if approx_ckpt_step_interval is not None: assert ( approx_ckpt_step_interval > 0 ), "`approx_ckpt_step_interval` must be >0" inds_to_eval = set() for i in range( math.ceil(step_count_ckpt_pairs[-1][0] / approx_ckpt_step_interval) + 1 ): inds_to_eval.add( int(np.argmin(np.abs(step_counts - i * approx_ckpt_step_interval))) ) ckpts_paths = [ckpts_paths[ind] for ind in sorted(list(inds_to_eval))] return ckpts_paths @staticmethod def step_from_checkpoint(ckpt_path: str) -> int: parts = os.path.basename(ckpt_path).split("__") for part in parts: if "steps_" in part: possible_num = part.split("_")[-1].split(".")[0] if possible_num.isdigit(): return int(possible_num) get_logger().warning( f"The checkpoint {os.path.basename(ckpt_path)} does not follow the checkpoint naming convention" f" used by AllenAct. As a fall back we must load the checkpoint into memory to find the" f" training step count, this may increase startup time if the checkpoints are large or many" f" must be loaded in sequence." ) ckpt = torch.load(ckpt_path, map_location="cpu") return ckpt["total_steps"] def close(self, verbose=True): if self._is_closed: return def logif(s: Union[str, Exception]): if verbose: if isinstance(s, str): get_logger().info(s) elif isinstance(s, Exception): get_logger().exception(traceback.format_exc()) else: raise NotImplementedError() # First send termination signals for process_type in self.processes: for it, process in enumerate(self.processes[process_type]): if process.is_alive(): logif(f"Terminating {process_type} {it}") process.terminate() # Now join processes for process_type in self.processes: for it, process in enumerate(self.processes[process_type]): try: logif(f"Joining {process_type} {it}") process.join(1) logif(f"Closed {process_type} {it}") except Exception as e: logif(f"Exception raised when closing {process_type} {it}") logif(e) self.processes.clear() self._is_closed = True def __del__(self): self.close(verbose=True) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close(verbose=True) ================================================ FILE: allenact/algorithms/onpolicy_sync/storage.py ================================================ # Original work Copyright (c) Facebook, Inc. and its affiliates. # Modified work Copyright (c) Allen Institute for AI # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import abc import random from typing import ( Union, List, Dict, Tuple, Sequence, cast, Optional, Callable, Any, Generator, ) import gym import numpy as np import torch import allenact.utils.spaces_utils as su from allenact.algorithms.onpolicy_sync.policy import ( FullMemorySpecType, ObservationType, ActionType, ) from allenact.base_abstractions.misc import Memory from allenact.utils.system import get_logger class ExperienceStorage(abc.ABC): @abc.abstractmethod def initialize(self, *, observations: ObservationType, **kwargs): raise NotImplementedError @abc.abstractmethod def add( self, observations: ObservationType, memory: Optional[Memory], actions: torch.Tensor, action_log_probs: torch.Tensor, value_preds: torch.Tensor, rewards: torch.Tensor, masks: torch.Tensor, ): """ # Parameters observations : Observations after taking `actions` memory: Memory after having observed the last set of observations. actions: Actions taken to reach the current state, i.e. taking these actions has led to a new state with new `observations`. action_log_probs : Log probs of `actions` value_preds : Value predictions corresponding to the last observations (i.e. the states before taking `actions`). rewards : Rewards from taking `actions` in the last set of states. masks : Masks corresponding to the current states, having 0 entries where `observations` correspond to observations from the beginning of a new episode. """ raise NotImplementedError def before_updates(self, **kwargs): pass def after_updates(self, **kwargs) -> int: pass @abc.abstractmethod def to(self, device: torch.device): pass @abc.abstractmethod def set_partition(self, index: int, num_parts: int): raise NotImplementedError @property @abc.abstractmethod def total_experiences(self) -> int: raise NotImplementedError class RolloutStorage(ExperienceStorage, abc.ABC): # noinspection PyMethodOverriding @abc.abstractmethod def initialize( self, *, observations: ObservationType, num_samplers: int, recurrent_memory_specification: FullMemorySpecType, action_space: gym.Space, **kwargs, ): raise NotImplementedError @abc.abstractmethod def agent_input_for_next_step(self) -> Dict[str, Any]: raise NotImplementedError @abc.abstractmethod def sampler_select(self, keep_list: Sequence[int]): raise NotImplementedError class StreamingStorageMixin(abc.ABC): @abc.abstractmethod def next_batch(self) -> Dict[str, Any]: raise NotImplementedError def reset_stream(self): raise NotImplementedError @abc.abstractmethod def empty(self) -> bool: raise NotImplementedError class MiniBatchStorageMixin(abc.ABC): @abc.abstractmethod def batched_experience_generator( self, num_mini_batch: int, ) -> Generator[Dict[str, Any], None, None]: raise NotImplementedError class RolloutBlockStorage(RolloutStorage, MiniBatchStorageMixin): """Class for storing rollout information for RL trainers.""" FLATTEN_SEPARATOR: str = "._AUTOFLATTEN_." def __init__(self, init_size: int = 50): self.full_size = init_size self.flattened_to_unflattened: Dict[str, Dict[str, List[str]]] = { "memory": dict(), "observations": dict(), } self.unflattened_to_flattened: Dict[str, Dict[Tuple[str, ...], str]] = { "memory": dict(), "observations": dict(), } self.dim_names = ["step", "sampler", None] self.memory_specification: Optional[FullMemorySpecType] = None self.action_space: Optional[gym.Space] = None self.memory_first_last: Optional[Memory] = None self._observations_full: Memory = Memory() self._value_preds_full: Optional[torch.Tensor] = None self._returns_full: Optional[torch.Tensor] = None self._rewards_full: Optional[torch.Tensor] = None self._action_log_probs_full: Optional[torch.Tensor] = None self.step = 0 self._total_steps = 0 self._before_update_called = False self.device = torch.device("cpu") # self._advantages and self._normalized_advantages are only computed # when `before_updates` is called self._advantages: Optional[torch.Tensor] = None self._normalized_advantages: Optional[torch.Tensor] = None self._masks_full: Optional[torch.Tensor] = None self._actions_full: Optional[torch.Tensor] = None self._prev_actions_full: Optional[torch.Tensor] = None def initialize( self, *, observations: ObservationType, num_samplers: int, recurrent_memory_specification: FullMemorySpecType, action_space: gym.Space, **kwargs, ): if self.memory_specification is None: self.memory_specification = recurrent_memory_specification or {} self.action_space = action_space self.memory_first_last: Memory = self.create_memory( spec=self.memory_specification, num_samplers=num_samplers, ).to(self.device) for key in self.memory_specification: self.flattened_to_unflattened["memory"][key] = [key] self.unflattened_to_flattened["memory"][(key,)] = key self._masks_full = torch.zeros( self.full_size + 1, num_samplers, 1, device=self.device ) action_flat_dim = su.flatdim(self.action_space) self._actions_full = torch.zeros( self.full_size, num_samplers, action_flat_dim, device=self.device ) self._prev_actions_full = torch.zeros( self.full_size + 1, num_samplers, action_flat_dim, device=self.device ) assert self.step == 0, "Must call `after_updates` before calling `initialize`" self.insert_observations(observations=observations, time_step=0) self.prev_actions[0].zero_() # Have to zero previous actions self.masks[0].zero_() # Have to zero masks @property def total_experiences(self) -> int: return self._total_steps @total_experiences.setter def total_experiences(self, value: int): self._total_steps = value def set_partition(self, index: int, num_parts: int): pass @property def value_preds(self) -> torch.Tensor: return self._value_preds_full[: self.step + 1] @property def rewards(self) -> torch.Tensor: return self._rewards_full[: self.step] @property def returns(self) -> torch.Tensor: return self._returns_full[: self.step + 1] @property def action_log_probs(self) -> torch.Tensor: return self._action_log_probs_full[: self.step] @property def actions(self) -> torch.Tensor: return self._actions_full[: self.step] @property def prev_actions(self) -> torch.Tensor: return self._prev_actions_full[: self.step + 1] @property def masks(self) -> torch.Tensor: return self._masks_full[: self.step + 1] @property def observations(self) -> Memory: return self._observations_full.slice(dim=0, start=0, stop=self.step + 1) @staticmethod def create_memory( spec: Optional[FullMemorySpecType], num_samplers: int, ) -> Memory: if spec is None: return Memory() memory = Memory() for key in spec: dims_template, dtype = spec[key] dim_names = ["step"] + [d[0] for d in dims_template] sampler_dim = dim_names.index("sampler") all_dims = [2] + [d[1] for d in dims_template] all_dims[sampler_dim] = num_samplers memory.check_append( key=key, tensor=torch.zeros(*all_dims, dtype=dtype), sampler_dim=sampler_dim, ) return memory def to(self, device: torch.device): for key in [ "_observations_full", "memory_first_last", "_actions_full", "_prev_actions_full", "_masks_full", "_rewards_full", "_value_preds_full", "_returns_full", "_action_log_probs_full", ]: val = getattr(self, key) if val is not None: setattr(self, key, val.to(device)) self.device = device def insert_observations( self, observations: ObservationType, time_step: int, ): self.insert_tensors( storage=self._observations_full, storage_name="observations", unflattened=observations, time_step=time_step, ) def insert_memory( self, memory: Optional[Memory], time_step: int, ): if memory is None: assert len(self.memory_first_last) == 0 return # `min(time_step, 1)` as we only store the first and last memories: # * first memory is used for loss computation when the agent model has to compute # all its outputs again given the full batch. # * last memory ised used by the agent when collecting rollouts self.insert_tensors( storage=self.memory_first_last, storage_name="memory", unflattened=memory, time_step=min(time_step, 1), ) def insert_tensors( self, storage: Memory, storage_name: str, unflattened: Union[ObservationType, Memory], prefix: str = "", path: Sequence[str] = (), time_step: int = 0, ): path = list(path) for name in unflattened: current_data = unflattened[name] if isinstance(current_data, Dict): self.insert_tensors( storage=storage, storage_name=storage_name, unflattened=cast(ObservationType, current_data), prefix=prefix + name + self.FLATTEN_SEPARATOR, path=path + [name], time_step=time_step, ) continue sampler_dim = self.dim_names.index("sampler") if isinstance(current_data, tuple): sampler_dim = current_data[1] current_data = current_data[0] flatten_name = prefix + name if flatten_name not in storage: assert storage_name == "observations" storage[flatten_name] = ( torch.zeros_like(current_data) # type:ignore .repeat( self.full_size + 1, # required for observations (and memory) *(1 for _ in range(len(current_data.shape))), ) .to(self.device), sampler_dim, ) assert ( flatten_name not in self.flattened_to_unflattened[storage_name] ), f"new flattened name {flatten_name} already existing in flattened spaces[{storage_name}]" self.flattened_to_unflattened[storage_name][flatten_name] = path + [ name ] self.unflattened_to_flattened[storage_name][ tuple(path + [name]) ] = flatten_name try: if storage_name == "observations": # current_data has a step dimension assert time_step >= 0 storage[flatten_name][0][time_step : time_step + 1].copy_( current_data ) elif storage_name == "memory": # current_data does not have a step dimension storage[flatten_name][0][time_step].copy_(current_data) else: raise NotImplementedError except: get_logger().error( f"Error while inserting data in storage for name {flatten_name}" ) raise def create_tensor_storage( self, num_steps: int, template: torch.Tensor ) -> torch.Tensor: return torch.cat([torch.zeros_like(template).to(self.device)] * num_steps) def _double_storage_size(self): def pad_tensor_with_zeros(old_t: Optional[torch.Tensor]): if old_t is None: return None assert old_t.shape[0] in [self.full_size, self.full_size + 1] padded_t = torch.zeros( old_t.shape[0] + self.full_size, *old_t.shape[1:], dtype=old_t.dtype, device=old_t.device, ) padded_t[: old_t.shape[0]] = old_t return padded_t for key in list(self._observations_full.keys()): obs_tensor, sampler_dim = self._observations_full[key] self._observations_full[key] = ( pad_tensor_with_zeros(obs_tensor), sampler_dim, ) self._actions_full = pad_tensor_with_zeros(self._actions_full) self._prev_actions_full = pad_tensor_with_zeros(self._prev_actions_full) self._masks_full = pad_tensor_with_zeros(self._masks_full) self._rewards_full = pad_tensor_with_zeros(self._rewards_full) self._value_preds_full = pad_tensor_with_zeros(self._value_preds_full) self._returns_full = pad_tensor_with_zeros(self._returns_full) self._action_log_probs_full = pad_tensor_with_zeros(self._action_log_probs_full) self.full_size *= 2 def add( self, observations: ObservationType, memory: Optional[Memory], actions: torch.Tensor, action_log_probs: torch.Tensor, value_preds: torch.Tensor, rewards: torch.Tensor, masks: torch.Tensor, ): """See `ExperienceStorage.add` documentation.""" assert ( len(masks.shape) == 2 and masks.shape[1] == 1 ), f"Can only add a single step worth of data at a time (mask shape = {masks.shape})." self.total_experiences += masks.shape[0] if self.step == self.full_size: self._double_storage_size() elif self.step > self.full_size: raise RuntimeError self.insert_observations(observations, time_step=self.step + 1) self.insert_memory(memory, time_step=self.step + 1) assert actions.shape == self._actions_full.shape[1:] self._actions_full[self.step].copy_(actions) # type:ignore self._prev_actions_full[self.step + 1].copy_(actions) # type:ignore self._masks_full[self.step + 1].copy_(masks) # type:ignore if self._rewards_full is None: # We delay the instantiation of storage for `rewards`, `value_preds`, `action_log_probs` and `returns` # as we do not, a priori, know what shape these will be. For instance, if we are in a multi-agent setting # then there may be many rewards (one for each agent). self._rewards_full = self.create_tensor_storage( self.full_size, rewards.unsqueeze(0) ) # add step value_returns_template = value_preds.unsqueeze(0) # add step self._value_preds_full = self.create_tensor_storage( self.full_size + 1, value_returns_template ) self._returns_full = self.create_tensor_storage( self.full_size + 1, value_returns_template ) self._action_log_probs_full = self.create_tensor_storage( self.full_size, action_log_probs.unsqueeze(0) ) self._value_preds_full[self.step].copy_(value_preds) # type:ignore self._rewards_full[self.step].copy_(rewards) # type:ignore self._action_log_probs_full[self.step].copy_( # type:ignore action_log_probs ) self.step += 1 self._before_update_called = False # We set the below to be None just for extra safety. self._advantages = None self._normalized_advantages = None def sampler_select(self, keep_list: Sequence[int]): keep_list = list(keep_list) if self._actions_full.shape[1] == len(keep_list): # samplers dim return # we are keeping everything, no need to copy self._observations_full = self._observations_full.sampler_select(keep_list) self.memory_first_last = self.memory_first_last.sampler_select(keep_list) self._actions_full = self._actions_full[:, keep_list] self._prev_actions_full = self._prev_actions_full[:, keep_list] self._action_log_probs_full = self._action_log_probs_full[:, keep_list] self._masks_full = self._masks_full[:, keep_list] if self._rewards_full is not None: self._value_preds_full = self._value_preds_full[:, keep_list] self._rewards_full = self._rewards_full[:, keep_list] self._returns_full = self._returns_full[:, keep_list] def before_updates( self, *, next_value: torch.Tensor, use_gae: bool, gamma: float, tau: float, adv_stats_callback: Callable[[torch.Tensor], Dict[str, torch.Tensor]], **kwargs, ): assert len(kwargs) == 0 self.compute_returns( next_value=next_value, use_gae=use_gae, gamma=gamma, tau=tau, ) self._advantages = self.returns[:-1] - self.value_preds[:-1] adv_stats = adv_stats_callback(self._advantages) self._normalized_advantages = (self._advantages - adv_stats["mean"]) / ( adv_stats["std"] + 1e-5 ) self._before_update_called = True def after_updates(self, **kwargs): assert len(kwargs) == 0 for storage in [self.observations, self.memory_first_last]: for key in storage: storage[key][0][0].copy_(storage[key][0][-1]) if self._masks_full is not None: self.masks[0].copy_(self.masks[-1]) if self._prev_actions_full is not None: self.prev_actions[0].copy_(self.prev_actions[-1]) self._before_update_called = False self._advantages = None self._normalized_advantages = None self.step = 0 @staticmethod def _extend_tensor_with_ones(stored_tensor: torch.Tensor, desired_num_dims: int): # Ensure broadcast to all flattened dimensions extended_shape = stored_tensor.shape + (1,) * ( desired_num_dims - len(stored_tensor.shape) ) return stored_tensor.view(*extended_shape) def compute_returns( self, next_value: torch.Tensor, use_gae: bool, gamma: float, tau: float ): extended_mask = self._extend_tensor_with_ones( self.masks, desired_num_dims=len(self.value_preds.shape) ) extended_rewards = self._extend_tensor_with_ones( self.rewards, desired_num_dims=len(self.value_preds.shape) ) if use_gae: self.value_preds[-1] = next_value gae = 0 for step in reversed(range(extended_rewards.shape[0])): delta = ( extended_rewards[step] + gamma * self.value_preds[step + 1] * extended_mask[step + 1] - self.value_preds[step] ) gae = delta + gamma * tau * extended_mask[step + 1] * gae # type:ignore self.returns[step] = gae + self.value_preds[step] else: self.returns[-1] = next_value for step in reversed(range(extended_rewards.shape[0])): self.returns[step] = ( self.returns[step + 1] * gamma * extended_mask[step + 1] + extended_rewards[step] ) def batched_experience_generator( self, num_mini_batch: int, ): assert self._before_update_called, ( "self._before_update_called() must be called before" " attempting to generated batched rollouts." ) num_samplers = self.rewards.shape[1] assert num_samplers >= num_mini_batch, ( f"The number of task samplers ({num_samplers}) " f"must be greater than or equal to the number of " f"mini batches ({num_mini_batch})." ) inds = np.round( np.linspace(0, num_samplers, num_mini_batch + 1, endpoint=True) ).astype(np.int32) pairs = list(zip(inds[:-1], inds[1:])) random.shuffle(pairs) for start_ind, end_ind in pairs: cur_samplers = list(range(start_ind, end_ind)) memory_batch = self.memory_first_last.step_squeeze(0).sampler_select( cur_samplers ) observations_batch = self.unflatten_observations( self.observations.slice(dim=0, stop=-1).sampler_select(cur_samplers) ) actions_batch = [] prev_actions_batch = [] value_preds_batch = [] return_batch = [] masks_batch = [] old_action_log_probs_batch = [] adv_targ = [] norm_adv_targ = [] for ind in cur_samplers: actions_batch.append(self.actions[:, ind]) prev_actions_batch.append(self.prev_actions[:-1, ind]) value_preds_batch.append(self.value_preds[:-1, ind]) return_batch.append(self.returns[:-1, ind]) masks_batch.append(self.masks[:-1, ind]) old_action_log_probs_batch.append(self.action_log_probs[:, ind]) adv_targ.append(self._advantages[:, ind]) norm_adv_targ.append(self._normalized_advantages[:, ind]) actions_batch = torch.stack(actions_batch, 1) # type:ignore prev_actions_batch = torch.stack(prev_actions_batch, 1) # type:ignore value_preds_batch = torch.stack(value_preds_batch, 1) # type:ignore return_batch = torch.stack(return_batch, 1) # type:ignore masks_batch = torch.stack(masks_batch, 1) # type:ignore old_action_log_probs_batch = torch.stack( # type:ignore old_action_log_probs_batch, 1 ) adv_targ = torch.stack(adv_targ, 1) # type:ignore norm_adv_targ = torch.stack(norm_adv_targ, 1) # type:ignore yield { "observations": observations_batch, "memory": memory_batch, "actions": su.unflatten(self.action_space, actions_batch), "prev_actions": su.unflatten(self.action_space, prev_actions_batch), "values": value_preds_batch, "returns": return_batch, "masks": masks_batch, "old_action_log_probs": old_action_log_probs_batch, "adv_targ": adv_targ, "norm_adv_targ": norm_adv_targ, "bsize": int(np.prod(masks_batch.shape[:2])), } def unflatten_observations(self, flattened_batch: Memory) -> ObservationType: result: ObservationType = {} for name in flattened_batch: full_path = self.flattened_to_unflattened["observations"][name] cur_dict = result for part in full_path[:-1]: if part not in cur_dict: cur_dict[part] = {} cur_dict = cast(ObservationType, cur_dict[part]) cur_dict[full_path[-1]] = flattened_batch[name][0] return result def pick_observation_step(self, step: int) -> ObservationType: return self.unflatten_observations(self.observations.step_select(step)) def pick_memory_step(self, step: int) -> Memory: assert step in [0, self.step, -1], "Can only access the first or last memory." return self.memory_first_last.step_squeeze(min(step, 1)) def pick_prev_actions_step(self, step: int) -> ActionType: return su.unflatten(self.action_space, self.prev_actions[step : step + 1]) def agent_input_for_next_step(self) -> Dict[str, Any]: return { "observations": self.pick_observation_step(self.step), "memory": self.pick_memory_step(self.step), "prev_actions": self.pick_prev_actions_step(self.step), "masks": self.masks[self.step : self.step + 1], } ================================================ FILE: allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py ================================================ # Original work Copyright (c) Facebook, Inc. and its affiliates. # Modified work Copyright (c) Allen Institute for AI # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import os import signal import time import traceback from multiprocessing.connection import Connection from multiprocessing.context import BaseContext from multiprocessing.process import BaseProcess from threading import Thread from typing import ( Any, Callable, Dict, Generator, Iterator, List, Optional, Sequence, Set, Tuple, Union, cast, ) import numpy as np from gym.spaces.dict import Dict as SpaceDict from setproctitle import setproctitle as ptitle from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import SensorSuite, Sensor from allenact.base_abstractions.task import TaskSampler from allenact.utils.misc_utils import partition_sequence from allenact.utils.system import get_logger from allenact.utils.tensor_utils import tile_images try: # Use torch.multiprocessing if we can. # We have yet to find a reason to not use it and # you are required to use it when sending a torch.Tensor # between processes import torch.multiprocessing as mp except ImportError: import multiprocessing as mp # type: ignore DEFAULT_MP_CONTEXT_TYPE = "forkserver" COMPLETE_TASK_METRICS_KEY = "__AFTER_TASK_METRICS__" COMPLETE_TASK_CALLBACK_KEY = "__AFTER_TASK_CALLBACK__" STEP_COMMAND = "step" NEXT_TASK_COMMAND = "next_task" RENDER_COMMAND = "render" CLOSE_COMMAND = "close" OBSERVATION_SPACE_COMMAND = "observation_space" ACTION_SPACE_COMMAND = "action_space" CALL_COMMAND = "call" SAMPLER_COMMAND = "call_sampler" ATTR_COMMAND = "attr" SAMPLER_ATTR_COMMAND = "sampler_attr" RESET_COMMAND = "reset" SEED_COMMAND = "seed" PAUSE_COMMAND = "pause" RESUME_COMMAND = "resume" class DelaySignalHandling: # Modified from https://stackoverflow.com/a/21919644 def __init__(self): self.int_signal_received: Optional[Any] = None self.term_signal_received: Optional[Any] = None self.old_int_handler = None self.old_term_handler = None def __enter__(self): self.int_signal_received: Optional[Any] = None self.term_signal_received: Optional[Any] = None self.old_int_handler = signal.signal(signal.SIGINT, self.int_handler) self.old_term_handler = signal.signal(signal.SIGTERM, self.term_handler) def int_handler(self, sig, frame): self.int_signal_received = (sig, frame) get_logger().debug("SIGINT received. Delaying KeyboardInterrupt.") def term_handler(self, sig, frame): self.term_signal_received = (sig, frame) get_logger().debug("SIGTERM received. Delaying termination.") def __exit__(self, type, value, traceback): signal.signal(signal.SIGINT, self.old_int_handler) signal.signal(signal.SIGTERM, self.old_term_handler) if self.term_signal_received: # For some reason there appear to be cases where the original termination # handler is not callable. It is unclear to me exactly why this is the case # but here we add a guard to double check that the handler is callable and, # if it's not, we re-send the termination signal to the process and let # the python internals handle it (note that we've already reset the termination # handler to what it was originaly above in the signal.signal(...) code). if callable(self.old_term_handler): self.old_term_handler(*self.term_signal_received) else: get_logger().debug( "Termination handler could not be called after delaying signal handling." f" Resending the SIGTERM signal. Last (sig, frame) == ({self.term_signal_received})." ) os.kill(os.getpid(), signal.SIGTERM) if self.int_signal_received: if callable(self.old_int_handler): self.old_int_handler(*self.int_signal_received) else: signal.default_int_handler(*self.int_signal_received) class VectorSampledTasks: """Vectorized collection of tasks. Creates multiple processes where each process runs its own TaskSampler. Each process generates one Task from its TaskSampler at a time and this class allows for interacting with these tasks in a vectorized manner. When a task on a process completes, the process samples another task from its task sampler. All the tasks are synchronized (for step and new_task methods). # Attributes make_sampler_fn : function which creates a single TaskSampler. sampler_fn_args : sequence of dictionaries describing the args to pass to make_sampler_fn on each individual process. auto_resample_when_done : automatically sample a new Task from the TaskSampler when the Task completes. If False, a new Task will not be resampled until all Tasks on all processes have completed. This functionality is provided for seamless training of vectorized Tasks. multiprocessing_start_method : the multiprocessing method used to spawn worker processes. Valid methods are ``{'spawn', 'forkserver', 'fork'}`` ``'forkserver'`` is the recommended method as it works well with CUDA. If ``'fork'`` is used, the subproccess must be started before any other GPU useage. """ observation_space: SpaceDict _workers: List[Union[mp.Process, Thread, BaseProcess]] _is_waiting: bool _num_task_samplers: int _auto_resample_when_done: bool _mp_ctx: BaseContext _connection_read_fns: List[Callable[[], Any]] _connection_write_fns: List[Callable[[Any], None]] _read_timeout: Optional[float] def __init__( self, make_sampler_fn: Callable[..., TaskSampler], sampler_fn_args: Sequence[Dict[str, Any]] = None, callback_sensors: Optional[Sequence[Sensor]] = None, auto_resample_when_done: bool = True, multiprocessing_start_method: Optional[str] = "forkserver", mp_ctx: Optional[BaseContext] = None, should_log: bool = True, max_processes: Optional[int] = None, read_timeout: Optional[ float ] = 60, # Seconds to wait for a task to return a response before timing out ) -> None: self._is_waiting = False self._is_closed = True self.should_log = should_log self.max_processes = max_processes self.read_timeout = read_timeout assert ( sampler_fn_args is not None and len(sampler_fn_args) > 0 ), "number of processes to be created should be greater than 0" self._num_task_samplers = len(sampler_fn_args) self._num_processes = ( self._num_task_samplers if max_processes is None else min(max_processes, self._num_task_samplers) ) self._auto_resample_when_done = auto_resample_when_done assert (multiprocessing_start_method is None) != ( mp_ctx is None ), "Exactly one of `multiprocessing_start_method`, and `mp_ctx` must be not None." if multiprocessing_start_method is not None: assert multiprocessing_start_method in self._valid_start_methods, ( "multiprocessing_start_method must be one of {}. Got '{}'" ).format(self._valid_start_methods, multiprocessing_start_method) self._mp_ctx = mp.get_context(multiprocessing_start_method) else: self._mp_ctx = cast(BaseContext, mp_ctx) self.npaused_per_process = [0] * self._num_processes self.sampler_index_to_process_ind_and_subprocess_ind: Optional[ List[List[int]] ] = None self._reset_sampler_index_to_process_ind_and_subprocess_ind() self._workers: Optional[List[Union[mp.Process, Thread, BaseProcess]]] = None for args in sampler_fn_args: args["mp_ctx"] = self._mp_ctx ( connection_poll_fns, connection_read_fns, self._connection_write_fns, ) = self._spawn_workers( # noqa make_sampler_fn=make_sampler_fn, sampler_fn_args_list=[ args_list for args_list in self._partition_to_processes(sampler_fn_args) ], callback_sensor_suite=( SensorSuite(callback_sensors) if isinstance(callback_sensors, Sequence) else callback_sensors ), ) self._connection_read_fns = [ self._create_read_function_with_timeout( read_fn=read_fn, poll_fn=poll_fn, timeout=self.read_timeout ) for read_fn, poll_fn in zip(connection_read_fns, connection_poll_fns) ] self._is_closed = False for write_fn in self._connection_write_fns: write_fn((OBSERVATION_SPACE_COMMAND, None)) # Note that we increase the read timeout below as initialization can take some time observation_spaces = [ space for read_fn in self._connection_read_fns for space in read_fn(timeout_to_use=5 * self.read_timeout if self.read_timeout is not None else None) # type: ignore ] if any(os is None for os in observation_spaces): raise NotImplementedError( "It appears that the `all_observation_spaces_equal`" " is not True for some task sampler created by" " VectorSampledTasks. This is not currently supported." ) if any(observation_spaces[0] != os for os in observation_spaces): raise NotImplementedError( "It appears that the observation spaces of the samplers" " created in VectorSampledTasks are not equal." " This is not currently supported." ) self.observation_space = observation_spaces[0] for write_fn in self._connection_write_fns: write_fn((ACTION_SPACE_COMMAND, None)) self.action_spaces = [ space for read_fn in self._connection_read_fns for space in read_fn() ] @staticmethod def _create_read_function_with_timeout( *, read_fn: Callable[[], Any], poll_fn: Callable[[float], bool], timeout: Optional[float], ) -> Callable[[], Any]: def read_with_timeout(timeout_to_use: Optional[float] = timeout): if timeout_to_use is not None: # noinspection PyArgumentList if not poll_fn(timeout=timeout_to_use): raise TimeoutError( f"Did not receive output from `VectorSampledTask` worker for {timeout_to_use} seconds." ) return read_fn() return read_with_timeout def _reset_sampler_index_to_process_ind_and_subprocess_ind(self): self.sampler_index_to_process_ind_and_subprocess_ind = [ [i, j] for i, part in enumerate( partition_sequence([1] * self._num_task_samplers, self._num_processes) ) for j in range(len(part)) ] def _partition_to_processes(self, seq: Union[Iterator, Sequence]): subparts_list: List[List] = [[] for _ in range(self._num_processes)] seq = list(seq) assert len(seq) == len(self.sampler_index_to_process_ind_and_subprocess_ind) for sampler_index, (process_ind, subprocess_ind) in enumerate( self.sampler_index_to_process_ind_and_subprocess_ind ): assert len(subparts_list[process_ind]) == subprocess_ind subparts_list[process_ind].append(seq[sampler_index]) return subparts_list @property def is_closed(self) -> bool: """Has the vector task been closed.""" return self._is_closed @property def num_unpaused_tasks(self) -> int: """Number of unpaused processes. # Returns Number of unpaused processes. """ return self._num_task_samplers - sum(self.npaused_per_process) @property def mp_ctx(self): """Get the multiprocessing process used by the vector task. # Returns The multiprocessing context. """ return self._mp_ctx @staticmethod def _task_sampling_loop_worker( worker_id: Union[int, str], connection_read_fn: Callable, connection_write_fn: Callable, make_sampler_fn: Callable[..., TaskSampler], sampler_fn_args_list: List[Dict[str, Any]], callback_sensor_suite: Optional[SensorSuite], auto_resample_when_done: bool, should_log: bool, child_pipe: Optional[Connection] = None, parent_pipe: Optional[Connection] = None, ) -> None: """process worker for creating and interacting with the Tasks/TaskSampler.""" ptitle(f"VectorSampledTask: {worker_id}") sp_vector_sampled_tasks = SingleProcessVectorSampledTasks( make_sampler_fn=make_sampler_fn, sampler_fn_args_list=sampler_fn_args_list, callback_sensor_suite=callback_sensor_suite, auto_resample_when_done=auto_resample_when_done, should_log=should_log, ) if parent_pipe is not None: parent_pipe.close() # Means this pipe will close when the calling process closes it try: while True: read_input = connection_read_fn() # TODO: Was the below necessary? # with DelaySignalHandling(): # # Delaying signal handling here is necessary to ensure that we don't # # (when processing a SIGTERM/SIGINT signal) attempt to send data to # # a generator while it is already processing other data. if len(read_input) == 3: sampler_index, command, data = read_input assert command != CLOSE_COMMAND, "Must close all processes at once." assert ( command != RESUME_COMMAND ), "Must resume all task samplers at once." if command == PAUSE_COMMAND: sp_vector_sampled_tasks.pause_at(sampler_index=sampler_index) connection_write_fn("done") else: connection_write_fn( sp_vector_sampled_tasks.command_at( sampler_index=sampler_index, command=command, data=data, ) ) else: commands, data_list = read_input assert ( commands != PAUSE_COMMAND ), "Cannot pause all task samplers at once." if commands == CLOSE_COMMAND: # Will close the `sp_vector_sampled_tasks` in the `finally` clause below break elif commands == RESUME_COMMAND: sp_vector_sampled_tasks.resume_all() connection_write_fn("done") else: if isinstance(commands, str): commands = [ commands ] * sp_vector_sampled_tasks.num_unpaused_tasks connection_write_fn( sp_vector_sampled_tasks.command( commands=commands, data_list=data_list ) ) except KeyboardInterrupt: if should_log: get_logger().info(f"Worker {worker_id} KeyboardInterrupt") except Exception as e: get_logger().error( f"Worker {worker_id} encountered an exception:\n{traceback.format_exc()}" ) raise e finally: try: sp_vector_sampled_tasks.close() except Exception: pass if child_pipe is not None: child_pipe.close() if should_log: get_logger().info(f"Worker {worker_id} closing.") def _spawn_workers( self, make_sampler_fn: Callable[..., TaskSampler], sampler_fn_args_list: Sequence[Sequence[Dict[str, Any]]], callback_sensor_suite: Optional[SensorSuite], ) -> Tuple[ List[Callable[[], bool]], List[Callable[[], Any]], List[Callable[[Any], None]] ]: parent_connections, worker_connections = zip( *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_processes)] ) self._workers = [] k = 0 id: Union[int, str] for id, (worker_conn, parent_conn, current_sampler_fn_args_list) in enumerate( zip(worker_connections, parent_connections, sampler_fn_args_list) ): if len(current_sampler_fn_args_list) != 1: id = f"{id}({k}-{k + len(current_sampler_fn_args_list) - 1})" k += len(current_sampler_fn_args_list) if self.should_log: get_logger().info( f"Starting {id}-th VectorSampledTask worker with args {current_sampler_fn_args_list}" ) ps = self._mp_ctx.Process( # type: ignore target=self._task_sampling_loop_worker, kwargs=dict( worker_id=id, connection_read_fn=worker_conn.recv, connection_write_fn=worker_conn.send, make_sampler_fn=make_sampler_fn, sampler_fn_args_list=current_sampler_fn_args_list, callback_sensor_suite=callback_sensor_suite, auto_resample_when_done=self._auto_resample_when_done, should_log=self.should_log, child_pipe=worker_conn, parent_pipe=parent_conn, ), ) self._workers.append(ps) ps.daemon = True ps.start() worker_conn.close() # Means this pipe will close when the child process closes it time.sleep( 0.1 ) # Useful to ensure things don't lock up when spawning many envs return ( [p.poll for p in parent_connections], [p.recv for p in parent_connections], [p.send for p in parent_connections], ) def next_task(self, **kwargs): """Move to the the next Task for all TaskSamplers. # Parameters kwargs : key word arguments passed to the `next_task` function of the samplers. # Returns List of initial observations for each of the new tasks. """ return self.command( commands=NEXT_TASK_COMMAND, data_list=[kwargs] * self.num_unpaused_tasks ) def get_observations(self): """Get observations for all unpaused tasks. # Returns List of observations for each of the unpaused tasks. """ return self.call( ["get_observations"] * self.num_unpaused_tasks, ) def command_at( self, sampler_index: int, command: str, data: Optional[Any] = None ) -> Any: """Runs the command on the selected task and returns the result. # Parameters # Returns Result of the command. """ self._is_waiting = True ( process_ind, subprocess_ind, ) = self.sampler_index_to_process_ind_and_subprocess_ind[sampler_index] self._connection_write_fns[process_ind]((subprocess_ind, command, data)) result = self._connection_read_fns[process_ind]() self._is_waiting = False return result def call_at( self, sampler_index: int, function_name: str, function_args: Optional[List[Any]] = None, ) -> Any: """Calls a function (which is passed by name) on the selected task and returns the result. # Parameters index : Which task to call the function on. function_name : The name of the function to call on the task. function_args : Optional function args. # Returns Result of calling the function. """ return self.command_at( sampler_index=sampler_index, command=CALL_COMMAND, data=(function_name, function_args), ) def next_task_at(self, sampler_index: int) -> List[RLStepResult]: """Move to the the next Task from the TaskSampler in index_process process in the vector. # Parameters index_process : Index of the process to be reset. # Returns List of length one containing the observations the newly sampled task. """ return [ self.command_at( sampler_index=sampler_index, command=NEXT_TASK_COMMAND, data=None ) ] def step_at(self, sampler_index: int, action: Any) -> List[RLStepResult]: """Step in the index_process task in the vector. # Parameters sampler_index : Index of the sampler to be reset. action : The action to take. # Returns List containing the output of step method on the task in the indexed process. """ return [ self.command_at( sampler_index=sampler_index, command=STEP_COMMAND, data=action ) ] def async_step(self, actions: Sequence[Any]) -> None: """Asynchronously step in the vectorized Tasks. # Parameters actions : actions to be performed in the vectorized Tasks. """ self._is_waiting = True for write_fn, action in zip( self._connection_write_fns, self._partition_to_processes(actions) ): write_fn((STEP_COMMAND, action)) def wait_step(self) -> List[Dict[str, Any]]: """Wait until all the asynchronized processes have synchronized.""" observations = [] for read_fn in self._connection_read_fns: observations.extend(read_fn()) self._is_waiting = False return observations def step(self, actions: Sequence[Any]): """Perform actions in the vectorized tasks. # Parameters actions: List of size _num_samplers containing action to be taken in each task. # Returns List of outputs from the step method of tasks. """ self.async_step(actions) return self.wait_step() def reset_all(self): """Reset all task samplers to their initial state (except for the RNG seed).""" self.command(commands=RESET_COMMAND, data_list=None) def set_seeds(self, seeds: List[int]): """Sets new tasks' RNG seeds. # Parameters seeds: List of size _num_samplers containing new RNG seeds. """ self.command(commands=SEED_COMMAND, data_list=seeds) def close(self) -> None: if self._is_closed: return if self._is_waiting: for read_fn in self._connection_read_fns: try: # noinspection PyArgumentList read_fn(0) # Time out immediately except Exception: pass for write_fn in self._connection_write_fns: try: write_fn((CLOSE_COMMAND, None)) except Exception: pass for process in self._workers: try: process.join(timeout=0.1) except Exception: pass for process in self._workers: if process.is_alive(): process.kill() self._is_closed = True def pause_at(self, sampler_index: int) -> None: """Pauses computation on the Task in process `index` without destroying the Task. This is useful for not needing to call steps on all Tasks when only some are active (for example during the last samples of running eval). # Parameters index : which process to pause. All indexes after this one will be shifted down by one. """ if self._is_waiting: for read_fn in self._connection_read_fns: read_fn() ( process_ind, subprocess_ind, ) = self.sampler_index_to_process_ind_and_subprocess_ind[sampler_index] self.command_at(sampler_index=sampler_index, command=PAUSE_COMMAND, data=None) for i in range( sampler_index + 1, len(self.sampler_index_to_process_ind_and_subprocess_ind) ): other_process_and_sub_process_inds = ( self.sampler_index_to_process_ind_and_subprocess_ind[i] ) if other_process_and_sub_process_inds[0] == process_ind: other_process_and_sub_process_inds[1] -= 1 else: break self.sampler_index_to_process_ind_and_subprocess_ind.pop(sampler_index) self.npaused_per_process[process_ind] += 1 def resume_all(self) -> None: """Resumes any paused processes.""" self._is_waiting = True for connection_write_fn in self._connection_write_fns: connection_write_fn((RESUME_COMMAND, None)) for connection_read_fn in self._connection_read_fns: connection_read_fn() self._is_waiting = False self._reset_sampler_index_to_process_ind_and_subprocess_ind() for i in range(len(self.npaused_per_process)): self.npaused_per_process[i] = 0 def command( self, commands: Union[List[str], str], data_list: Optional[List] ) -> List[Any]: """""" self._is_waiting = True if isinstance(commands, str): commands = [commands] * self.num_unpaused_tasks if data_list is None: data_list = [None] * self.num_unpaused_tasks for write_fn, subcommands, subdata_list in zip( self._connection_write_fns, self._partition_to_processes(commands), self._partition_to_processes(data_list), ): write_fn((subcommands, subdata_list)) results = [] for read_fn in self._connection_read_fns: results.extend(read_fn()) self._is_waiting = False return results def call( self, function_names: Union[str, List[str]], function_args_list: Optional[List[Any]] = None, ) -> List[Any]: """Calls a list of functions (which are passed by name) on the corresponding task (by index). # Parameters function_names : The name of the functions to call on the tasks. function_args_list : List of function args for each function. If provided, len(function_args_list) should be as long as len(function_names). # Returns List of results of calling the functions. """ self._is_waiting = True if isinstance(function_names, str): function_names = [function_names] * self.num_unpaused_tasks if function_args_list is None: function_args_list = [None] * len(function_names) assert len(function_names) == len(function_args_list) func_names_and_args_list = zip(function_names, function_args_list) for write_fn, func_names_and_args in zip( self._connection_write_fns, self._partition_to_processes(func_names_and_args_list), ): write_fn((CALL_COMMAND, func_names_and_args)) results = [] for read_fn in self._connection_read_fns: results.extend(read_fn()) self._is_waiting = False return results def attr_at(self, sampler_index: int, attr_name: str) -> Any: """Gets the attribute (specified by name) on the selected task and returns it. # Parameters index : Which task to call the function on. attr_name : The name of the function to call on the task. # Returns Result of calling the function. """ return self.command_at(sampler_index, command=ATTR_COMMAND, data=attr_name) def attr(self, attr_names: Union[List[str], str]) -> List[Any]: """Gets the attributes (specified by name) on the tasks. # Parameters attr_names : The name of the functions to call on the tasks. # Returns List of results of calling the functions. """ if isinstance(attr_names, str): attr_names = [attr_names] * self.num_unpaused_tasks return self.command(commands=ATTR_COMMAND, data_list=attr_names) def render( self, mode: str = "human", *args, **kwargs ) -> Union[np.ndarray, None, List[np.ndarray]]: """Render observations from all Tasks in a tiled image or list of images.""" images = self.command( commands=RENDER_COMMAND, data_list=[(args, {"mode": "rgb", **kwargs})] * self.num_unpaused_tasks, ) if mode == "raw_rgb_list": return images tile = tile_images(images) if mode == "human": import cv2 cv2.imshow("vectask", tile[:, :, ::-1]) cv2.waitKey(1) return None elif mode == "rgb_array": return tile else: raise NotImplementedError @property def _valid_start_methods(self) -> Set[str]: return {"forkserver", "spawn", "fork"} def __del__(self): self.close() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() class SingleProcessVectorSampledTasks(object): """Vectorized collection of tasks. Simultaneously handles the state of multiple TaskSamplers and their associated tasks. Allows for interacting with these tasks in a vectorized manner. When a task completes, another task is sampled from the appropriate task sampler. All the tasks are synchronized (for step and new_task methods). # Attributes make_sampler_fn : function which creates a single TaskSampler. sampler_fn_args : sequence of dictionaries describing the args to pass to make_sampler_fn on each individual process. auto_resample_when_done : automatically sample a new Task from the TaskSampler when the Task completes. If False, a new Task will not be resampled until all Tasks on all processes have completed. This functionality is provided for seamless training of vectorized Tasks. """ observation_space: SpaceDict _vector_task_generators: List[Generator] _num_task_samplers: int _auto_resample_when_done: bool def __init__( self, make_sampler_fn: Callable[..., TaskSampler], sampler_fn_args_list: Sequence[Dict[str, Any]] = None, callback_sensor_suite: Optional[SensorSuite] = None, auto_resample_when_done: bool = True, should_log: bool = True, ) -> None: self._is_closed = True assert ( sampler_fn_args_list is not None and len(sampler_fn_args_list) > 0 ), "number of processes to be created should be greater than 0" self._num_task_samplers = len(sampler_fn_args_list) self._auto_resample_when_done = auto_resample_when_done self.should_log = should_log self._vector_task_generators: List[Generator] = self._create_generators( make_sampler_fn=make_sampler_fn, sampler_fn_args=[{"mp_ctx": None, **args} for args in sampler_fn_args_list], callback_sensor_suite=callback_sensor_suite, ) self._is_closed = False observation_spaces = [ vsi.send((OBSERVATION_SPACE_COMMAND, None)) for vsi in self._vector_task_generators ] if any(os is None for os in observation_spaces): raise NotImplementedError( "It appears that the `all_observation_spaces_equal`" " is not True for some task sampler created by" " VectorSampledTasks. This is not currently supported." ) if any(observation_spaces[0] != os for os in observation_spaces): raise NotImplementedError( "It appears that the observation spaces of the samplers" " created in VectorSampledTasks are not equal." " This is not currently supported." ) self.observation_space = observation_spaces[0] self.action_spaces = [ vsi.send((ACTION_SPACE_COMMAND, None)) for vsi in self._vector_task_generators ] self._paused: List[Tuple[int, Generator]] = [] @property def is_closed(self) -> bool: """Has the vector task been closed.""" return self._is_closed @property def mp_ctx(self) -> Optional[BaseContext]: return None @property def num_unpaused_tasks(self) -> int: """Number of unpaused processes. # Returns Number of unpaused processes. """ return self._num_task_samplers - len(self._paused) @staticmethod def _task_sampling_loop_generator_fn( worker_id: int, make_sampler_fn: Callable[..., TaskSampler], sampler_fn_args: Dict[str, Any], callback_sensor_suite: Optional[SensorSuite], auto_resample_when_done: bool, should_log: bool, ) -> Generator: """Generator for working with Tasks/TaskSampler.""" task_sampler = make_sampler_fn(**sampler_fn_args) current_task = task_sampler.next_task() if current_task is None: raise RuntimeError( "Newly created task sampler had `None` as it's first task. This likely means that" " it was not provided with any tasks to generate. This can happen if, e.g., during testing" " you have started more processes than you had tasks to test. Currently this is not supported:" " every task sampler must be able to generate at least one task." ) try: command, data = yield "started" while command != CLOSE_COMMAND: if command == STEP_COMMAND: step_result: RLStepResult = current_task.step(data) if current_task.is_done(): metrics = current_task.metrics() if metrics is not None and len(metrics) != 0: if step_result.info is None: step_result = step_result.clone({"info": {}}) step_result.info[COMPLETE_TASK_METRICS_KEY] = metrics if callback_sensor_suite is not None: task_callback_data = callback_sensor_suite.get_observations( env=current_task.env, task=current_task ) if step_result.info is None: step_result = step_result.clone({"info": {}}) step_result.info[COMPLETE_TASK_CALLBACK_KEY] = ( task_callback_data ) if auto_resample_when_done: current_task = task_sampler.next_task() if current_task is None: step_result = step_result.clone({"observation": None}) else: step_result = step_result.clone( {"observation": current_task.get_observations()} ) command, data = yield step_result elif command == NEXT_TASK_COMMAND: if data is not None: current_task = task_sampler.next_task(**data) else: current_task = task_sampler.next_task() observations = current_task.get_observations() command, data = yield observations elif command == RENDER_COMMAND: command, data = yield current_task.render(*data[0], **data[1]) elif ( command == OBSERVATION_SPACE_COMMAND or command == ACTION_SPACE_COMMAND ): res = getattr(current_task, command) command, data = yield res elif command == CALL_COMMAND: function_name, function_args = data if function_args is None or len(function_args) == 0: result = getattr(current_task, function_name)() else: result = getattr(current_task, function_name)(*function_args) command, data = yield result elif command == SAMPLER_COMMAND: function_name, function_args = data if function_args is None or len(function_args) == 0: result = getattr(task_sampler, function_name)() else: result = getattr(task_sampler, function_name)(*function_args) command, data = yield result elif command == ATTR_COMMAND: property_name = data result = getattr(current_task, property_name) command, data = yield result elif command == SAMPLER_ATTR_COMMAND: property_name = data result = getattr(task_sampler, property_name) command, data = yield result elif command == RESET_COMMAND: task_sampler.reset() current_task = task_sampler.next_task() if current_task is None: raise RuntimeError( "After resetting the task sampler it seems to have" " no new tasks (the `task_sampler.next_task()` call" " returned `None` after the reset). This suggests that" " the task sampler's reset method was not implemented" f" correctly (task sampler type is {type(task_sampler)})." ) command, data = yield "done" elif command == SEED_COMMAND: task_sampler.set_seed(data) command, data = yield "done" else: raise NotImplementedError() except KeyboardInterrupt: if should_log: get_logger().info( "SingleProcessVectorSampledTask {} KeyboardInterrupt".format( worker_id ) ) except Exception as e: get_logger().error(traceback.format_exc()) raise e finally: if should_log: get_logger().info( "SingleProcessVectorSampledTask {} closing.".format(worker_id) ) task_sampler.close() def _create_generators( self, make_sampler_fn: Callable[..., TaskSampler], sampler_fn_args: Sequence[Dict[str, Any]], callback_sensor_suite: Optional[SensorSuite], ) -> List[Generator]: generators = [] for id, current_sampler_fn_args in enumerate(sampler_fn_args): if self.should_log: get_logger().info( f"Starting {id}-th SingleProcessVectorSampledTasks generator with args {current_sampler_fn_args}." ) generators.append( self._task_sampling_loop_generator_fn( worker_id=id, make_sampler_fn=make_sampler_fn, sampler_fn_args=current_sampler_fn_args, callback_sensor_suite=callback_sensor_suite, auto_resample_when_done=self._auto_resample_when_done, should_log=self.should_log, ) ) if next(generators[-1]) != "started": raise RuntimeError("Generator failed to start.") return generators def next_task(self, **kwargs): """Move to the the next Task for all TaskSamplers. # Parameters kwargs : key word arguments passed to the `next_task` function of the samplers. # Returns List of initial observations for each of the new tasks. """ return [ g.send((NEXT_TASK_COMMAND, kwargs)) for g in self._vector_task_generators ] def get_observations(self): """Get observations for all unpaused tasks. # Returns List of observations for each of the unpaused tasks. """ return self.call( ["get_observations"] * self.num_unpaused_tasks, ) def next_task_at(self, index_process: int) -> List[RLStepResult]: """Move to the the next Task from the TaskSampler in index_process process in the vector. # Parameters index_process : Index of the generator to be reset. # Returns List of length one containing the observations the newly sampled task. """ return [ self._vector_task_generators[index_process].send((NEXT_TASK_COMMAND, None)) ] def step_at(self, index_process: int, action: int) -> List[RLStepResult]: """Step in the index_process task in the vector. # Parameters index_process : Index of the process to be reset. action : The action to take. # Returns List containing the output of step method on the task in the indexed process. """ return self._vector_task_generators[index_process].send((STEP_COMMAND, action)) def step(self, actions: List[List[int]]): """Perform actions in the vectorized tasks. # Parameters actions: List of size _num_samplers containing action to be taken in each task. # Returns List of outputs from the step method of tasks. """ return [ g.send((STEP_COMMAND, action)) for g, action in zip(self._vector_task_generators, actions) ] def reset_all(self): """Reset all task samplers to their initial state (except for the RNG seed).""" return [g.send((RESET_COMMAND, None)) for g in self._vector_task_generators] def set_seeds(self, seeds: List[int]): """Sets new tasks' RNG seeds. # Parameters seeds: List of size _num_samplers containing new RNG seeds. """ return [ g.send((SEED_COMMAND, seed)) for g, seed in zip(self._vector_task_generators, seeds) ] def close(self) -> None: if self._is_closed: return for g in self._vector_task_generators: try: try: g.send((CLOSE_COMMAND, None)) except StopIteration: pass except KeyboardInterrupt: pass self._is_closed = True def pause_at(self, sampler_index: int) -> None: """Pauses computation on the Task in process `index` without destroying the Task. This is useful for not needing to call steps on all Tasks when only some are active (for example during the last samples of running eval). # Parameters index : which process to pause. All indexes after this one will be shifted down by one. """ generator = self._vector_task_generators.pop(sampler_index) self._paused.append((sampler_index, generator)) def resume_all(self) -> None: """Resumes any paused processes.""" for index, generator in reversed(self._paused): self._vector_task_generators.insert(index, generator) self._paused = [] def command_at( self, sampler_index: int, command: str, data: Optional[Any] = None ) -> Any: """Calls a function (which is passed by name) on the selected task and returns the result. # Parameters index : Which task to call the function on. function_name : The name of the function to call on the task. function_args : Optional function args. # Returns Result of calling the function. """ return self._vector_task_generators[sampler_index].send((command, data)) def command( self, commands: Union[List[str], str], data_list: Optional[List] ) -> List[Any]: """""" if isinstance(commands, str): commands = [commands] * self.num_unpaused_tasks if data_list is None: data_list = [None] * self.num_unpaused_tasks return [ g.send((command, data)) for g, command, data in zip( self._vector_task_generators, commands, data_list ) ] def call_at( self, sampler_index: int, function_name: str, function_args: Optional[List[Any]] = None, ) -> Any: """Calls a function (which is passed by name) on the selected task and returns the result. # Parameters index : Which task to call the function on. function_name : The name of the function to call on the task. function_args : Optional function args. # Returns Result of calling the function. """ return self._vector_task_generators[sampler_index].send( (CALL_COMMAND, (function_name, function_args)) ) def call( self, function_names: Union[str, List[str]], function_args_list: Optional[List[Any]] = None, ) -> List[Any]: """Calls a list of functions (which are passed by name) on the corresponding task (by index). # Parameters function_names : The name of the functions to call on the tasks. function_args_list : List of function args for each function. If provided, len(function_args_list) should be as long as len(function_names). # Returns List of results of calling the functions. """ if isinstance(function_names, str): function_names = [function_names] * self.num_unpaused_tasks if function_args_list is None: function_args_list = [None] * len(function_names) assert len(function_names) == len(function_args_list) return [ g.send((CALL_COMMAND, args)) for g, args in zip( self._vector_task_generators, zip(function_names, function_args_list) ) ] def attr_at(self, sampler_index: int, attr_name: str) -> Any: """Gets the attribute (specified by name) on the selected task and returns it. # Parameters index : Which task to call the function on. attr_name : The name of the function to call on the task. # Returns Result of calling the function. """ return self._vector_task_generators[sampler_index].send( (ATTR_COMMAND, attr_name) ) def attr(self, attr_names: Union[List[str], str]) -> List[Any]: """Gets the attributes (specified by name) on the tasks. # Parameters attr_names : The name of the functions to call on the tasks. # Returns List of results of calling the functions. """ if isinstance(attr_names, str): attr_names = [attr_names] * self.num_unpaused_tasks return [ g.send((ATTR_COMMAND, attr_name)) for g, attr_name in zip(self._vector_task_generators, attr_names) ] def render( self, mode: str = "human", *args, **kwargs ) -> Union[np.ndarray, None, List[np.ndarray]]: """Render observations from all Tasks in a tiled image or a list of images.""" images = [ g.send((RENDER_COMMAND, (args, {"mode": "rgb", **kwargs}))) for g in self._vector_task_generators ] if mode == "raw_rgb_list": return images for index, _ in reversed(self._paused): images.insert(index, np.zeros_like(images[0])) tile = tile_images(images) if mode == "human": import cv2 cv2.imshow("vectask", tile[:, :, ::-1]) cv2.waitKey(1) return None elif mode == "rgb_array": return tile else: raise NotImplementedError def __del__(self): self.close() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() ================================================ FILE: allenact/base_abstractions/__init__.py ================================================ ================================================ FILE: allenact/base_abstractions/callbacks.py ================================================ import abc from typing import List, Dict, Any, Sequence, Optional from allenact.base_abstractions.experiment_config import ExperimentConfig from allenact.base_abstractions.sensor import Sensor try: from typing import Literal except ImportError: from typing_extensions import Literal class Callback(abc.ABC): def setup( self, name: str, config: ExperimentConfig, mode: Literal["train", "valid", "test"], **kwargs, ) -> None: """Called once before training begins.""" def on_train_log( self, *, metrics: List[Dict[str, Any]], metric_means: Dict[str, float], tasks_data: List[Any], step: int, scalar_name_to_total_experiences_key: Dict[str, str], checkpoint_file_name: str, **kwargs, ) -> None: """Called once train is supposed to log.""" def on_valid_log( self, *, metrics: Dict[str, Any], metric_means: Dict[str, float], tasks_data: List[Any], step: int, scalar_name_to_total_experiences_key: Dict[str, str], checkpoint_file_name: str, **kwargs, ) -> None: """Called after validation ends.""" def on_test_log( self, *, metrics: Dict[str, Any], metric_means: Dict[str, float], tasks_data: List[Any], step: int, scalar_name_to_total_experiences_key: Dict[str, str], checkpoint_file_name: str, **kwargs, ) -> None: """Called after test ends.""" def after_save_project_state(self, base_dir: str) -> None: """Called after saving the project state in base_dir.""" def callback_sensors(self) -> Optional[Sequence[Sensor]]: """Determines the data returned to the `tasks_data` parameter in the above *_log functions.""" ================================================ FILE: allenact/base_abstractions/distributions.py ================================================ import abc from collections import OrderedDict from typing import Any, Union, Callable, TypeVar, Dict, Optional, cast, Protocol import gym import torch import torch.nn as nn from torch.distributions.utils import lazy_property from allenact.algorithms.onpolicy_sync.misc import TrackingInfoType from allenact.base_abstractions.sensor import AbstractExpertActionSensor as Expert from allenact.utils import spaces_utils as su from allenact.utils.misc_utils import all_unique TeacherForcingAnnealingType = TypeVar("TeacherForcingAnnealingType") """ Modify standard PyTorch distributions so they are compatible with this code. """ class Distr(abc.ABC): @abc.abstractmethod def log_prob(self, actions: Any): """Return the log probability/ies of the provided action/s.""" raise NotImplementedError() @abc.abstractmethod def entropy(self): """Return the entropy or entropies.""" raise NotImplementedError() @abc.abstractmethod def sample(self, sample_shape=torch.Size()): """Sample actions.""" raise NotImplementedError() def mode(self): """If available, return the action(s) with highest probability. It will only be called if using deterministic agents. """ raise NotImplementedError() class CategoricalDistr(torch.distributions.Categorical, Distr): """A categorical distribution extending PyTorch's Categorical. probs or logits are assumed to be passed with step and sampler dimensions as in: [step, samplers, ...] """ def mode(self): return self._param.argmax(dim=-1, keepdim=False) # match sample()'s shape def log_prob(self, value: torch.Tensor): if value.shape == self.logits.shape[:-1]: return super(CategoricalDistr, self).log_prob(value=value) elif value.shape == self.logits.shape[:-1] + (1,): return ( super(CategoricalDistr, self) .log_prob(value=value.squeeze(-1)) .unsqueeze(-1) ) else: raise NotImplementedError( "Broadcasting in categorical distribution is disabled as it often leads" f" to unexpected results. We have that `value.shape == {value.shape}` but" f" expected a shape of " f" `self.logits.shape[:-1] == {self.logits.shape[:-1]}` or" f" `self.logits.shape[:-1] + (1,) == {self.logits.shape[:-1] + (1,)}`" ) @lazy_property def log_probs_tensor(self): return torch.log_softmax(self.logits, dim=-1) @lazy_property def probs_tensor(self): return torch.softmax(self.logits, dim=-1) class ConditionalDistr(Distr): """Action distribution conditional which is conditioned on other information (i.e. part of a hierarchical distribution) # Attributes action_group_name : the identifier of the group of actions (`OrderedDict`) produced by this `ConditionalDistr` """ action_group_name: str def __init__( self, distr_conditioned_on_input_fn_or_instance: Union[Callable, Distr], action_group_name: str, *distr_conditioned_on_input_args, **distr_conditioned_on_input_kwargs, ): """Initialize an ConditionalDistr. # Parameters distr_conditioned_on_input_fn_or_instance : Callable to generate `ConditionalDistr` given sampled actions, or given `Distr`. action_group_name : the identifier of the group of actions (`OrderedDict`) produced by this `ConditionalDistr` distr_conditioned_on_input_args : positional arguments for Callable `distr_conditioned_on_input_fn_or_instance` distr_conditioned_on_input_kwargs : keyword arguments for Callable `distr_conditioned_on_input_fn_or_instance` """ self.distr: Optional[Distr] = None self.distr_conditioned_on_input_fn: Optional[Callable] = None self.distr_conditioned_on_input_args = distr_conditioned_on_input_args self.distr_conditioned_on_input_kwargs = distr_conditioned_on_input_kwargs if isinstance(distr_conditioned_on_input_fn_or_instance, Distr): self.distr = distr_conditioned_on_input_fn_or_instance else: self.distr_conditioned_on_input_fn = ( distr_conditioned_on_input_fn_or_instance ) self.action_group_name = action_group_name def log_prob(self, actions): return self.distr.log_prob(actions) def entropy(self): return self.distr.entropy() def condition_on_input(self, **ready_actions): if self.distr is None: assert all( key not in self.distr_conditioned_on_input_kwargs for key in ready_actions ) self.distr = self.distr_conditioned_on_input_fn( *self.distr_conditioned_on_input_args, **self.distr_conditioned_on_input_kwargs, **ready_actions, ) def reset(self): if (self.distr is not None) and ( self.distr_conditioned_on_input_fn is not None ): self.distr = None def sample(self, sample_shape=torch.Size()) -> OrderedDict: return OrderedDict([(self.action_group_name, self.distr.sample(sample_shape))]) def mode(self) -> OrderedDict: return OrderedDict([(self.action_group_name, self.distr.mode())]) class SequentialDistr(Distr): def __init__(self, *conditional_distrs: ConditionalDistr): action_group_names = [cd.action_group_name for cd in conditional_distrs] assert all_unique( action_group_names ), f"All conditional distribution `action_group_name`, must be unique, given names {action_group_names}" self.conditional_distrs = conditional_distrs def sample(self, sample_shape=torch.Size()): actions = OrderedDict() for cd in self.conditional_distrs: cd.condition_on_input(**actions) actions.update(cd.sample(sample_shape=sample_shape)) return actions def mode(self): actions = OrderedDict() for cd in self.conditional_distrs: cd.condition_on_input(**actions) actions.update(cd.mode()) return actions def conditional_entropy(self): total = 0 for cd in self.conditional_distrs: total = total + cd.entropy() return total def entropy(self): raise NotImplementedError( "Please use 'conditional_entropy' instead of 'entropy' as the `entropy_method_name` " "parameter in your loss when using `SequentialDistr`." ) def log_prob( self, actions: Dict[str, Any], return_dict: bool = False ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: assert len(actions) == len( self.conditional_distrs ), f"{len(self.conditional_distrs)} conditional distributions for {len(actions)} action groups" res: Union[int, torch.Tensor, Dict[str, torch.Tensor]] = ( 0 if not return_dict else OrderedDict() ) for cd in self.conditional_distrs: cd.condition_on_input(**actions) current_log_prob = cd.log_prob(actions[cd.action_group_name]) if not return_dict: res = res + current_log_prob else: res[cd.action_group_name] = current_log_prob return res class TrackingCallback(Protocol): def __call__(self, type: TrackingInfoType, info: Dict[str, Any], n: int): ... class TeacherForcingDistr(Distr): def __init__( self, distr: Distr, obs: Dict[str, Any], action_space: gym.spaces.Space, num_active_samplers: Optional[int], approx_steps: Optional[int], teacher_forcing: Optional[TeacherForcingAnnealingType], tracking_callback: Optional[TrackingCallback], always_enforce: bool = False, ): self.distr = distr self.is_sequential = isinstance(self.distr, SequentialDistr) # action_space is a gym.spaces.Dict for SequentialDistr, or any gym.Space for other Distr self.action_space = action_space self.num_active_samplers = num_active_samplers self.approx_steps = approx_steps self.teacher_forcing = teacher_forcing self.tracking_callback = tracking_callback self.always_enforce = always_enforce assert ( "expert_action" in obs ), "When using teacher forcing, obs must contain an `expert_action` uuid" obs_space = Expert.flagged_space( self.action_space, use_dict_as_groups=self.is_sequential ) self.expert = su.unflatten(obs_space, obs["expert_action"]) def enforce( self, sample: Any, action_space: gym.spaces.Space, teacher: OrderedDict, teacher_force_info: Optional[Dict[str, Any]], action_name: Optional[str] = None, ): actions = su.flatten(action_space, sample) assert ( len(actions.shape) == 3 ), f"Got flattened actions with shape {actions.shape} (it should be [1 x `samplers` x `flatdims`])" if self.num_active_samplers is not None: assert actions.shape[1] == self.num_active_samplers expert_actions = su.flatten(action_space, teacher[Expert.ACTION_POLICY_LABEL]) assert ( expert_actions.shape == actions.shape ), f"expert actions shape {expert_actions.shape} doesn't match the model's {actions.shape}" # expert_success is 0 if the expert action could not be computed and otherwise equals 1. expert_action_exists_mask = teacher[Expert.EXPERT_SUCCESS_LABEL] if not self.always_enforce: teacher_forcing_mask = ( torch.distributions.bernoulli.Bernoulli( torch.tensor(self.teacher_forcing(self.approx_steps)) ) .sample(expert_action_exists_mask.shape) .long() .to(actions.device) ) * expert_action_exists_mask else: teacher_forcing_mask = expert_action_exists_mask if teacher_force_info is not None: teacher_force_info[ "teacher_ratio/sampled{}".format( f"_{action_name}" if action_name is not None else "" ) ] = (teacher_forcing_mask.float().mean().item()) extended_shape = teacher_forcing_mask.shape + (1,) * ( len(actions.shape) - len(teacher_forcing_mask.shape) ) actions = torch.where( teacher_forcing_mask.byte().view(extended_shape), expert_actions, actions ) return su.unflatten(action_space, actions) def log_prob(self, actions: Any): return self.distr.log_prob(actions) def entropy(self): return self.distr.entropy() def conditional_entropy(self): if hasattr(self.distr, "conditional_entropy"): return self.distr.conditional_entropy() raise NotImplementedError( f"`conditional_entropy` is not defined for {self.distr}." ) def sample(self, sample_shape=torch.Size()): teacher_force_info: Optional[Dict[str, Any]] = None if self.approx_steps is not None: teacher_force_info = { "teacher_ratio/enforced": self.teacher_forcing(self.approx_steps), } if self.is_sequential: res = OrderedDict() for cd in cast(SequentialDistr, self.distr).conditional_distrs: cd.condition_on_input(**res) action_group_name = cd.action_group_name res[action_group_name] = self.enforce( cd.sample(sample_shape)[action_group_name], cast(gym.spaces.Dict, self.action_space)[action_group_name], self.expert[action_group_name], teacher_force_info, action_group_name, ) else: res = self.enforce( self.distr.sample(sample_shape), self.action_space, self.expert, teacher_force_info, ) if self.tracking_callback is not None and self.num_active_samplers is not None: self.tracking_callback( type=TrackingInfoType.TEACHER_FORCING, info=teacher_force_info, n=self.num_active_samplers, ) return res class AddBias(nn.Module): """Adding bias parameters to input values.""" def __init__(self, bias: torch.FloatTensor): """Initializer. # Parameters bias : data to use as the initial values of the bias. """ super(AddBias, self).__init__() self._bias = nn.Parameter(bias.unsqueeze(1), requires_grad=True) def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: # type: ignore """Adds the stored bias parameters to `x`.""" assert x.dim() in [2, 4] if x.dim() == 2: bias = self._bias.t().view(1, -1) else: bias = self._bias.t().view(1, -1, 1, 1) return x + bias # type:ignore ================================================ FILE: allenact/base_abstractions/experiment_config.py ================================================ """Defines the `ExperimentConfig` abstract class used as the basis of all experiments.""" import abc from typing import Dict, Any, Optional, List, Union, Sequence, Tuple, cast import torch import torch.nn as nn from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import TrainingPipeline, Builder from allenact.utils.system import get_logger from allenact.utils.viz_utils import VizSuite def split_processes_onto_devices(nprocesses: int, ndevices: int): assert ( nprocesses == 0 or nprocesses >= ndevices ), "NUM_PROCESSES {} < ndevices {}".format(nprocesses, ndevices) res = [0] * ndevices for it in range(nprocesses): res[it % ndevices] += 1 return res class MachineParams(object): def __init__( self, nprocesses: Union[int, Sequence[int]], devices: Union[ None, int, str, torch.device, Sequence[Union[int, str, torch.device]] ] = None, sensor_preprocessor_graph: Optional[ Union[SensorPreprocessorGraph, Builder[SensorPreprocessorGraph]] ] = None, sampler_devices: Union[ None, int, str, torch.device, Sequence[Union[int, str, torch.device]] ] = None, visualizer: Optional[Union[VizSuite, Builder[VizSuite]]] = None, gpu_ids: Union[int, Sequence[int]] = None, local_worker_ids: Optional[List[int]] = None, ): assert ( gpu_ids is None or devices is None ), "only one of `gpu_ids` or `devices` should be set." if gpu_ids is not None: get_logger().warning( "The `gpu_ids` parameter will be deprecated, use `devices` instead." ) devices = gpu_ids self.nprocesses = ( nprocesses if isinstance(nprocesses, Sequence) else (nprocesses,) ) self.devices: Tuple[torch.device, ...] = self._standardize_devices( devices=devices, nworkers=len(self.nprocesses) ) self._sensor_preprocessor_graph_maybe_builder = sensor_preprocessor_graph self.sampler_devices: Tuple[torch.device, ...] = ( None if sampler_devices is None else self._standardize_devices( devices=sampler_devices, nworkers=len(self.nprocesses) ) ) self._visualizer_maybe_builder = visualizer self._sensor_preprocessor_graph_cached: Optional[SensorPreprocessorGraph] = None self._visualizer_cached: Optional[VizSuite] = None self.local_worker_ids: Optional[List[int]] = None self.set_local_worker_ids(local_worker_ids) def set_local_worker_ids(self, local_worker_ids: Optional[List[int]]): self.local_worker_ids = local_worker_ids or list(range(len(self.devices))) assert all(0 <= id < len(self.devices) for id in self.local_worker_ids), ( f"Passed {len(self.local_worker_ids)} local worker ids {self.local_worker_ids}" f" for {len(self.devices)} total devices (workers)" ) @classmethod def instance_from( cls, machine_params: Union["MachineParams", Dict[str, Any]] ) -> "MachineParams": if isinstance(machine_params, cls): return machine_params assert isinstance(machine_params, Dict) return cls(**machine_params) @staticmethod def _standardize_devices( devices: Optional[ Union[int, str, torch.device, Sequence[Union[int, str, torch.device]]] ], nworkers: int, ) -> Tuple[torch.device, ...]: if devices is None or (isinstance(devices, Sequence) and len(devices) == 0): devices = torch.device("cpu") if not isinstance(devices, Sequence): devices = (devices,) * nworkers assert len(devices) == nworkers, ( f"The number of devices (len({devices})={len(devices)})" f" must equal the number of workers ({nworkers})" ) devices = tuple( torch.device("cpu") if d == -1 else torch.device(d) for d in devices # type: ignore ) for d in devices: if d != torch.device("cpu"): try: torch.cuda.get_device_capability(d) # type: ignore except Exception: raise RuntimeError( f"It appears the cuda device {d} is not available on your system." ) return cast(Tuple[torch.device, ...], devices) @property def sensor_preprocessor_graph(self) -> Optional[SensorPreprocessorGraph]: if self._sensor_preprocessor_graph_maybe_builder is None: return None if self._sensor_preprocessor_graph_cached is None: if isinstance(self._sensor_preprocessor_graph_maybe_builder, Builder): self._sensor_preprocessor_graph_cached = ( self._sensor_preprocessor_graph_maybe_builder() ) else: self._sensor_preprocessor_graph_cached = ( self._sensor_preprocessor_graph_maybe_builder ) return self._sensor_preprocessor_graph_cached def set_visualizer(self, viz: VizSuite): if self._visualizer_cached is None: self._visualizer_maybe_builder = viz else: get_logger().warning("Ignoring viz (already instantiated)") @property def visualizer(self) -> Optional[VizSuite]: if self._visualizer_maybe_builder is None: return None if self._visualizer_cached is None: if isinstance(self._visualizer_maybe_builder, Builder): self._visualizer_cached = self._visualizer_maybe_builder() else: self._visualizer_cached = self._visualizer_maybe_builder return self._visualizer_cached class FrozenClassVariables(abc.ABCMeta): """Metaclass for ExperimentConfig. Ensures ExperimentConfig class-level attributes cannot be modified. ExperimentConfig attributes can still be modified at the object level. """ def __setattr__(cls, attr, value): if isinstance(cls, type) and ( attr != "__abstractmethods__" and not attr.startswith("_abc_") ): raise RuntimeError( "Cannot edit class-level attributes.\n" "Changing the values of class-level attributes is disabled in ExperimentConfig classes.\n" "This is to prevent problems that can occur otherwise when using multiprocessing.\n" "If you wish to change the value of a configuration, please do so for an instance of that" " configuration.\nTriggered by attempting to modify {}".format( cls.__name__ ) ) else: super().__setattr__(attr, value) class ExperimentConfig(metaclass=FrozenClassVariables): """Abstract class used to define experiments. Instead of using yaml or text files, experiments in our framework are defined as a class. In particular, to define an experiment one must define a new class inheriting from this class which implements all of the below methods. The below methods will then be called when running the experiment. """ @abc.abstractmethod def tag(self) -> str: """A string describing the experiment.""" raise NotImplementedError() @abc.abstractmethod def training_pipeline(self, **kwargs) -> TrainingPipeline: """Creates the training pipeline. # Parameters kwargs : Extra kwargs. Currently unused. # Returns An instantiate `TrainingPipeline` object. """ raise NotImplementedError() @abc.abstractmethod def machine_params( self, mode="train", **kwargs ) -> Union[MachineParams, Dict[str, Any]]: """Parameters used to specify machine information. Machine information includes at least (1) the number of processes to train with and (2) the gpu devices indices to use. mode : Whether or not the machine parameters should be those for "train", "valid", or "test". kwargs : Extra kwargs. # Returns A dictionary of the form `{"nprocesses": ..., "gpu_ids": ..., ...}`. Here `nprocesses` must be a non-negative integer, `gpu_ids` must be a sequence of non-negative integers (if empty, then everything will be run on the cpu). """ raise NotImplementedError() @abc.abstractmethod def create_model(self, **kwargs) -> nn.Module: """Create the neural model.""" raise NotImplementedError() @abc.abstractmethod def make_sampler_fn(self, **kwargs) -> TaskSampler: """Create the TaskSampler given keyword arguments. These `kwargs` will be generated by one of `ExperimentConfig.train_task_sampler_args`, `ExperimentConfig.valid_task_sampler_args`, or `ExperimentConfig.test_task_sampler_args` depending on whether the user has chosen to train, validate, or test. """ raise NotImplementedError() def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: """Specifies the training parameters for the `process_ind`th training process. These parameters are meant be passed as keyword arguments to `ExperimentConfig.make_sampler_fn` to generate a task sampler. # Parameters process_ind : The unique index of the training process (`0 ≤ process_ind < total_processes`). total_processes : The total number of training processes. devices : Gpu devices (if any) to use. seeds : The seeds to use, if any. deterministic_cudnn : Whether or not to use deterministic cudnn. # Returns The parameters for `make_sampler_fn` """ raise NotImplementedError() def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: """Specifies the validation parameters for the `process_ind`th validation process. See `ExperimentConfig.train_task_sampler_args` for parameter definitions. """ raise NotImplementedError() def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: """Specifies the test parameters for the `process_ind`th test process. See `ExperimentConfig.train_task_sampler_args` for parameter definitions. """ raise NotImplementedError() ================================================ FILE: allenact/base_abstractions/misc.py ================================================ import abc from typing import ( Dict, Any, TypeVar, Sequence, NamedTuple, Optional, List, Union, Generic, ) import attr import torch EnvType = TypeVar("EnvType") DistributionType = TypeVar("DistributionType") ModelType = TypeVar("ModelType") ObservationType = Dict[str, Union[torch.Tensor, Dict[str, Any]]] class RLStepResult(NamedTuple): observation: Optional[Any] reward: Optional[Union[float, List[float]]] done: Optional[bool] info: Optional[Dict[str, Any]] def clone(self, new_info: Dict[str, Any]): return RLStepResult( observation=( self.observation if "observation" not in new_info else new_info["observation"] ), reward=self.reward if "reward" not in new_info else new_info["reward"], done=self.done if "done" not in new_info else new_info["done"], info=self.info if "info" not in new_info else new_info["info"], ) def merge(self, other: "RLStepResult"): return RLStepResult( observation=( self.observation if other.observation is None else other.observation ), reward=self.reward if other.reward is None else other.reward, done=self.done if other.done is None else other.done, info={ **(self.info if self.info is not None else {}), **(other.info if other is not None else {}), }, ) class ActorCriticOutput(tuple, Generic[DistributionType]): distributions: DistributionType values: torch.FloatTensor extras: Dict[str, Any] # noinspection PyTypeChecker def __new__( cls, distributions: DistributionType, values: torch.FloatTensor, extras: Dict[str, Any], ): self = tuple.__new__(cls, (distributions, values, extras)) self.distributions = distributions self.values = values self.extras = extras return self def __repr__(self) -> str: return ( f"Group(distributions={self.distributions}," f" values={self.values}," f" extras={self.extras})" ) class Memory(Dict): def __init__(self, *args, **kwargs): super().__init__() if len(args) > 0: assert len(args) == 1, ( "Only one of Sequence[Tuple[str, Tuple[torch.Tensor, int]]]" "or Dict[str, Tuple[torch.Tensor, int]] accepted as unnamed args" ) if isinstance(args[0], Sequence): for key, tensor_dim in args[0]: assert ( len(tensor_dim) == 2 ), "Only Tuple[torch.Tensor, int]] accepted as second item in Tuples" tensor, dim = tensor_dim self.check_append(key, tensor, dim) elif isinstance(args[0], Dict): for key in args[0]: assert ( len(args[0][key]) == 2 ), "Only Tuple[torch.Tensor, int]] accepted as values in Dict" tensor, dim = args[0][key] self.check_append(key, tensor, dim) elif len(kwargs) > 0: for key in kwargs: assert ( len(kwargs[key]) == 2 ), "Only Tuple[torch.Tensor, int]] accepted as keyword arg" tensor, dim = kwargs[key] self.check_append(key, tensor, dim) def check_append( self, key: str, tensor: torch.Tensor, sampler_dim: int ) -> "Memory": """Appends a new memory type given its identifier, its memory tensor and its sampler dim. # Parameters key: string identifier of the memory type tensor: memory tensor sampler_dim: sampler dimension # Returns Updated Memory """ assert isinstance(key, str), "key {} must be str".format(key) assert isinstance( tensor, torch.Tensor ), "tensor {} must be torch.Tensor".format(tensor) assert isinstance(sampler_dim, int), "sampler_dim {} must be int".format( sampler_dim ) assert key not in self, "Reused key {}".format(key) assert ( 0 <= sampler_dim < len(tensor.shape) ), "Got sampler_dim {} for tensor with shape {}".format( sampler_dim, tensor.shape ) self[key] = (tensor, sampler_dim) return self def tensor(self, key: str) -> torch.Tensor: """Returns the memory tensor for a given memory type. # Parameters key: string identifier of the memory type # Returns Memory tensor for type `key` """ assert key in self, "Missing key {}".format(key) return self[key][0] def sampler_dim(self, key: str) -> int: """Returns the sampler dimension for the given memory type. # Parameters key: string identifier of the memory type # Returns The sampler dim """ assert key in self, "Missing key {}".format(key) return self[key][1] def sampler_select(self, keep: Sequence[int]) -> "Memory": """Equivalent to PyTorch index_select along the `sampler_dim` of each memory type. # Parameters keep: a list of sampler indices to keep # Returns Selected memory """ res = Memory() valid = False for name in self: sampler_dim = self.sampler_dim(name) tensor = self.tensor(name) assert len(keep) == 0 or ( 0 <= min(keep) and max(keep) < tensor.shape[sampler_dim] ), "Got min(keep)={} max(keep)={} for memory type {} with shape {}, dim {}".format( min(keep), max(keep), name, tensor.shape, sampler_dim ) if tensor.shape[sampler_dim] > len(keep): tensor = tensor.index_select( dim=sampler_dim, index=torch.as_tensor( list(keep), dtype=torch.int64, device=tensor.device ), ) res.check_append(name, tensor, sampler_dim) valid = True if valid: return res return self def set_tensor(self, key: str, tensor: torch.Tensor) -> "Memory": """Replaces tensor for given key with an updated version. # Parameters key: memory type identifier to update tensor: updated tensor # Returns Updated memory """ assert key in self, "Missing key {}".format(key) assert ( tensor.shape == self[key][0].shape ), "setting tensor with shape {} for former {}".format( tensor.shape, self[key][0].shape ) self[key] = (tensor, self[key][1]) return self def step_select(self, step: int) -> "Memory": """Equivalent to slicing with length 1 for the `step` (i.e first) dimension in rollouts storage. # Parameters step: step to keep # Returns Sliced memory with a single step """ res = Memory() for key in self: tensor = self.tensor(key) assert ( tensor.shape[0] > step ), "attempting to access step {} for memory type {} of shape {}".format( step, key, tensor.shape ) if step != -1: res.check_append( key, self.tensor(key)[step : step + 1, ...], self.sampler_dim(key) ) else: res.check_append( key, self.tensor(key)[step:, ...], self.sampler_dim(key) ) return res def step_squeeze(self, step: int) -> "Memory": """Equivalent to simple indexing for the `step` (i.e first) dimension in rollouts storage. # Parameters step: step to keep # Returns Sliced memory with a single step (and squeezed step dimension) """ res = Memory() for key in self: tensor = self.tensor(key) assert ( tensor.shape[0] > step ), "attempting to access step {} for memory type {} of shape {}".format( step, key, tensor.shape ) res.check_append( key, self.tensor(key)[step, ...], self.sampler_dim(key) - 1 ) return res def slice( self, dim: int, start: Optional[int] = None, stop: Optional[int] = None, step: int = 1, ) -> "Memory": """Slicing for dimensions that have same extents in all memory types. It also accepts negative indices. # Parameters dim: the dimension to slice start: the index of the first item to keep if given (default 0 if None) stop: the index of the first item to discard if given (default tensor size along `dim` if None) step: the increment between consecutive indices (default 1) # Returns Sliced memory """ checked = False total: Optional[int] = None res = Memory() for key in self: tensor = self.tensor(key) assert ( len(tensor.shape) > dim ), f"attempting to access dim {dim} for memory type {key} of shape {tensor.shape}" if not checked: total = tensor.shape[dim] checked = True assert ( total == tensor.shape[dim] ), f"attempting to slice along non-uniform dimension {dim}" if start is not None or stop is not None or step != 1: slice_tuple = ( (slice(None),) * dim + (slice(start, stop, step),) + (slice(None),) * (len(tensor.shape) - (1 + dim)) ) sliced_tensor = tensor[slice_tuple] res.check_append( key=key, tensor=sliced_tensor, sampler_dim=self.sampler_dim(key), ) else: res.check_append( key, tensor, self.sampler_dim(key), ) return res def to(self, device: torch.device) -> "Memory": for key in self: tensor = self.tensor(key) if tensor.device != device: self.set_tensor(key, tensor.to(device)) return self class Loss(abc.ABC): pass @attr.s(kw_only=True) class LossOutput: value: torch.Tensor = attr.ib() info: Dict[str, Union[float, int]] = attr.ib() per_epoch_info: Dict[str, Union[float, int]] = attr.ib() batch_memory: Memory = attr.ib() stream_memory: Memory = attr.ib() bsize: int = attr.ib() class GenericAbstractLoss(Loss): # noinspection PyMethodOverriding @abc.abstractmethod def loss( # type: ignore self, *, # No positional arguments model: ModelType, batch: ObservationType, batch_memory: Memory, stream_memory: Memory, ) -> LossOutput: """Computes the loss. Loss after processing a batch of data with (part of) a model (possibly with memory). We support two different types of memory: `batch_memory` and `stream_memory` that can be used to compute losses and share computation. ## `batch_memory` During the update phase of training, the following steps happen in order: 1. A `batch` of data is sampled from an `ExperienceStorage` (which stores data possibly collected during previous rollout steps). 2. This `batch` is passed to each of the specified `GenericAbstractLoss`'s and is used, along with the `model`, to compute each such loss. 3. The losses are summed together, gradients are computed by backpropagation, and an update step is taken. 4. The process loops back to (1) with a new batch until. Now supposed that the computation used by a `GenericAbstractLoss` (`LossA`) can be shared across multiple of the `GenericAbstractLoss`'s (`LossB`, ...). For instance, `LossA` might run the visual encoder of `model` across all the images contained in `batch` so that it can compute a classification loss while `LossB` would like to run the same visual encoder on the same images to compute a depth-prediction loss. Without having some sort of memory, you would need to rerun this visual encoder on all images multiple times, wasting computational resources. This is where `batch_memory` comes in: `LossA` is can store the visual representations it computed in `batch_memory` and then `LossB` can access them. Note that the `batch_memory` will be reinitialized after each new `batch` is sampled. ## `stream_memory` As described above, `batch_memory` treats each batch as its own independent collection of data. But what if your `ExperienceStorage` samples its batches in a streaming fashion? E.g. your `ExperienceStorage` might be a fixed collection of expert trajectories for use with imitation learning. In this case you can't simply treat each batch independently: you might want to save information from one batch to use in another. The simplest case of this would be if your agent `model` uses an RNN and produces a recurrent hidden state. In this case, the hidden state from the end of one batch should be used at the start of computations for the next batch. To allow for this, you can use the `stream_memory`. `stream_memory` is not cleared across batches but, **importantly**, `stream_memory` is detached from the computation graph after each backpropagation step so that the size of the computation graph does not grow unboundedly. # Parameters model: model to run on data batch (both assumed to be on the same device) batch: data to use as input for model (already on the same device as model) batch_memory: See above. stream_memory: See above. # Returns A tuple with: current_loss: total loss current_info: additional information about the current loss batch_memory: `batch_memory` memory after processing current data batch, see above. stream_memory: `stream_memory` memory after processing current data batch, see above. bsize: batch size """ raise NotImplementedError() ================================================ FILE: allenact/base_abstractions/preprocessor.py ================================================ import abc from typing import List, Any, Dict from typing import Sequence from typing import Union import gym import networkx as nx import torch from gym.spaces import Dict as SpaceDict from allenact.utils.experiment_utils import Builder class Preprocessor(abc.ABC): """Represents a preprocessor that transforms data from a sensor or another preprocessor to the input of agents or other preprocessors. The user of this class needs to implement the process method and the user is also required to set the below attributes: # Attributes: input_uuids : List of input universally unique ids. uuid : Universally unique id. observation_space : ``gym.Space`` object corresponding to processed observation spaces. """ input_uuids: List[str] uuid: str observation_space: gym.Space def __init__( self, input_uuids: List[str], output_uuid: str, observation_space: gym.Space, **kwargs: Any ) -> None: self.uuid = output_uuid self.input_uuids = input_uuids self.observation_space = observation_space @abc.abstractmethod def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: """Returns processed observations from sensors or other preprocessors. # Parameters obs : Dict with available observations and processed observations. # Returns Processed observation. """ raise NotImplementedError() @abc.abstractmethod def to(self, device: torch.device) -> "Preprocessor": raise NotImplementedError() class SensorPreprocessorGraph: """Represents a graph of preprocessors, with each preprocessor being identified through a universally unique id. Allows for the construction of observations that are a function of sensor readings. For instance, perhaps rather than giving your agent a raw RGB image, you'd rather first pass that image through a pre-trained convolutional network and only give your agent the resulting features (see e.g. the `ResNetPreprocessor` class). # Attributes preprocessors : List containing preprocessors with required input uuids, output uuid of each sensor must be unique. observation_spaces: The observation spaces of the values returned when calling `get_observations`. By default (see the `additionally_exposed_uuids` parameter to to change this default) the observations returned by the `SensorPreprocessorGraph` **include only the sink nodes** of the graph (i.e. those that are not used by any other preprocessor). Thus if one of the input preprocessors takes as input the `'YOUR_SENSOR_UUID'` sensor, then `'YOUR_SENSOR_UUID'` will not be returned when calling `get_observations`. device: The `torch.device` upon which the preprocessors are run. """ preprocessors: Dict[str, Preprocessor] observation_spaces: SpaceDict device: torch.device def __init__( self, source_observation_spaces: SpaceDict, preprocessors: Sequence[Union[Preprocessor, Builder[Preprocessor]]], additional_output_uuids: Sequence[str] = tuple(), ) -> None: """Initializer. # Parameters source_observation_spaces : The observation spaces of all sensors before preprocessing. This generally should be the output of `SensorSuite.observation_spaces`. preprocessors : The preprocessors that will be included in the graph. additional_output_uuids: As described in the documentation for this class, the observations returned when calling `get_observations` only include, by default, those observations that are not processed by any preprocessor. If you'd like to include observations that would otherwise not be included, the uuids of these sensors should be included as a sequence of strings here. """ self.device: torch.device = torch.device("cpu") obs_spaces: Dict[str, gym.Space] = { k: source_observation_spaces[k] for k in source_observation_spaces } self.preprocessors: Dict[str, Preprocessor] = {} for preprocessor in preprocessors: if isinstance(preprocessor, Builder): preprocessor = preprocessor() assert ( preprocessor.uuid not in self.preprocessors ), "'{}' is duplicated preprocessor uuid".format(preprocessor.uuid) self.preprocessors[preprocessor.uuid] = preprocessor obs_spaces[preprocessor.uuid] = preprocessor.observation_space g = nx.DiGraph() for k in obs_spaces: g.add_node(k) for k in self.preprocessors: for j in self.preprocessors[k].input_uuids: g.add_edge(j, k) assert nx.is_directed_acyclic_graph( g ), "preprocessors do not form a direct acyclic graph" # noinspection PyCallingNonCallable self.observation_spaces = SpaceDict( spaces={ uuid: obs_spaces[uuid] for uuid in obs_spaces if uuid in additional_output_uuids or g.out_degree(uuid) == 0 } ) # ensure dependencies are precomputed self.compute_order = [n for n in nx.dfs_preorder_nodes(g)] def get(self, uuid: str) -> Preprocessor: """Return preprocessor with the given `uuid`. # Parameters uuid : The unique id of the preprocessor. # Returns The preprocessor with unique id `uuid`. """ return self.preprocessors[uuid] def to(self, device: torch.device) -> "SensorPreprocessorGraph": for k, v in self.preprocessors.items(): self.preprocessors[k] = v.to(device) self.device = device return self def get_observations( self, obs: Dict[str, Any], *args: Any, **kwargs: Any ) -> Dict[str, Any]: """Get processed observations. # Returns Collect observations processed from all sensors and return them packaged inside a Dict. """ for uuid in self.compute_order: if uuid not in obs: obs[uuid] = self.preprocessors[uuid].process(obs) return {uuid: obs[uuid] for uuid in self.observation_spaces} class PreprocessorGraph(SensorPreprocessorGraph): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) raise DeprecationWarning( "`PreprocessorGraph` has been deprecated, use `SensorPreprocessorGraph` instead." ) class ObservationSet: def __init__(self, *args, **kwargs) -> None: raise DeprecationWarning( "`ObservationSet` has been deprecated. Use `SensorPreprocessorGraph` instead." ) ================================================ FILE: allenact/base_abstractions/sensor.py ================================================ # Original work Copyright (c) Facebook, Inc. and its affiliates. # Modified work Copyright (c) Allen Institute for AI # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from collections import OrderedDict from typing import ( Generic, Dict, Any, Optional, TYPE_CHECKING, TypeVar, Sequence, Union, Tuple, cast, ) import abc import gym import gym.spaces as gyms import numpy as np from torch.distributions.utils import lazy_property from allenact.base_abstractions.misc import EnvType from allenact.utils import spaces_utils as su from allenact.utils.misc_utils import prepare_locals_for_super from allenact.utils.system import get_logger if TYPE_CHECKING: from allenact.base_abstractions.task import SubTaskType else: SubTaskType = TypeVar("SubTaskType", bound="Task") SpaceDict = gyms.Dict class Sensor(Generic[EnvType, SubTaskType]): """Represents a sensor that provides data from the environment to agent. The user of this class needs to implement the get_observation method and the user is also required to set the below attributes: # Attributes uuid : universally unique id. observation_space : ``gym.Space`` object corresponding to observation of sensor. """ uuid: str observation_space: gym.Space def __init__(self, uuid: str, observation_space: gym.Space, **kwargs: Any) -> None: self.uuid = uuid self.observation_space = observation_space def get_observation( self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any ) -> Any: """Returns observations from the environment (or task). # Parameters env : The environment the sensor is used upon. task : (Optionally) a Task from which the sensor should get data. # Returns Current observation for Sensor. """ raise NotImplementedError() class SensorSuite(Generic[EnvType]): """Represents a set of sensors, with each sensor being identified through a unique id. # Attributes sensors: list containing sensors for the environment, uuid of each sensor must be unique. """ sensors: Dict[str, Sensor[EnvType, Any]] observation_spaces: gyms.Dict def __init__(self, sensors: Sequence[Sensor]) -> None: """Initializer. # Parameters param sensors: the sensors that will be included in the suite. """ self.sensors = OrderedDict() spaces: OrderedDict[str, gym.Space] = OrderedDict() for sensor in sensors: assert ( sensor.uuid not in self.sensors ), "'{}' is duplicated sensor uuid".format(sensor.uuid) self.sensors[sensor.uuid] = sensor spaces[sensor.uuid] = sensor.observation_space self.observation_spaces = SpaceDict(spaces=spaces) def get(self, uuid: str) -> Sensor: """Return sensor with the given `uuid`. # Parameters uuid : The unique id of the sensor # Returns The sensor with unique id `uuid`. """ return self.sensors[uuid] def get_observations( self, env: EnvType, task: Optional[SubTaskType], **kwargs: Any ) -> Dict[str, Any]: """Get all observations corresponding to the sensors in the suite. # Parameters env : The environment from which to get the observation. task : (Optionally) the task from which to get the observation. # Returns Data from all sensors packaged inside a Dict. """ return { uuid: sensor.get_observation(env=env, task=task, **kwargs) # type: ignore for uuid, sensor in self.sensors.items() } class AbstractExpertSensor(Sensor[EnvType, SubTaskType], abc.ABC): """Base class for sensors that obtain the expert action for a given task (if available).""" ACTION_POLICY_LABEL: str = "action_or_policy" EXPERT_SUCCESS_LABEL: str = "expert_success" _NO_GROUPS_LABEL: str = "__dummy_expert_group__" def __init__( self, action_space: Optional[Union[gym.Space, int]] = None, uuid: str = "expert_sensor_type_uuid", expert_args: Optional[Dict[str, Any]] = None, nactions: Optional[int] = None, use_dict_as_groups: bool = True, **kwargs: Any, ) -> None: """Initialize an `ExpertSensor`. # Parameters action_space : The action space of the agent. This is necessary in order for this sensor to know what its output observation space is. uuid : A string specifying the unique ID of this sensor. expert_args : This sensor obtains an expert action from the task by calling the `query_expert` method of the task. `expert_args` are any keyword arguments that should be passed to the `query_expert` method when called. nactions : [DEPRECATED] The number of actions available to the agent, corresponds to an `action_space` of `gym.spaces.Discrete(nactions)`. use_dict_as_groups : Whether to use the top-level action_space of type `gym.spaces.Dict` as action groups. """ if isinstance(action_space, int): action_space = gym.spaces.Discrete(action_space) elif action_space is None: assert ( nactions is not None ), "One of `action_space` or `nactions` must be not `None`." get_logger().warning( "The `nactions` parameter to `AbstractExpertSensor` is deprecated and will be removed, please use" " the `action_space` parameter instead." ) action_space = gym.spaces.Discrete(nactions) self.action_space = action_space self.use_groups = ( isinstance(action_space, gym.spaces.Dict) and use_dict_as_groups ) self.group_spaces = ( self.action_space if self.use_groups else OrderedDict( [ ( self._NO_GROUPS_LABEL, self.action_space, ) ] ) ) self.expert_args: Dict[str, Any] = expert_args or {} assert ( "expert_sensor_group_name" not in self.expert_args ), "`expert_sensor_group_name` is reserved for `AbstractExpertSensor`" observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) @classmethod @abc.abstractmethod def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict: """gym space resulting from wrapping the given action space (or a derived space, as in `AbstractExpertPolicySensor`) together with a binary action space corresponding to an expert success flag, in a Dict space. # Parameters group_space : The source action space to be (optionally used to derive a policy space,) flagged and wrapped """ raise NotImplementedError @classmethod def flagged_space( cls, action_space: gym.spaces.Space, use_dict_as_groups: bool = True ) -> gym.spaces.Dict: """gym space resulting from wrapping the given action space (or every highest-level entry in a Dict action space), together with binary action space corresponding to an expert success flag, in a Dict space. # Parameters action_space : The agent's action space (to be flagged and wrapped) use_dict_as_groups : Flag enabling every highest-level entry in a Dict action space to be independently flagged. """ use_groups = isinstance(action_space, gym.spaces.Dict) and use_dict_as_groups if not use_groups: return cls.flagged_group_space(action_space) else: return gym.spaces.Dict( [ ( group_space, cls.flagged_group_space(action_space[group_space]), ) for group_space in cast(gym.spaces.Dict, action_space) ] ) def _get_observation_space(self) -> gym.spaces.Dict: """The observation space of the expert sensor. For the most basic discrete agent's ExpertActionSensor, it will equal `gym.spaces.Dict([ (self.ACTION_POLICY_LABEL, self.action_space), (self.EXPERT_SUCCESS_LABEL, gym.spaces.Discrete(2))])`, where the first entry hosts the expert action index and the second equals 0 if and only if the expert failed to generate a true expert action. """ return self.flagged_space(self.action_space, use_dict_as_groups=self.use_groups) @lazy_property def _zeroed_observation(self) -> Union[OrderedDict, Tuple]: # AllenAct-style flattened space (to easily generate an all-zeroes action as an array) flat_space = su.flatten_space(self.observation_space) # torch point to correctly unflatten `Discrete` for zeroed output flat_zeroed = su.torch_point(flat_space, np.zeros_like(flat_space.sample())) # unflatten zeroed output and convert to numpy return su.numpy_point( self.observation_space, su.unflatten(self.observation_space, flat_zeroed) ) def flatten_output(self, unflattened): return ( su.flatten( self.observation_space, su.torch_point(self.observation_space, unflattened), ) .cpu() .numpy() ) @abc.abstractmethod def query_expert( self, task: SubTaskType, expert_sensor_group_name: Optional[str], ) -> Tuple[Any, bool]: """Query the expert for the given task (and optional group name). # Returns A tuple (x, y) where x is the expert action or policy and y is False \ if the expert could not determine the optimal action (otherwise True). Here y \ is used for masking. Even when y is False, x should still lie in the space of \ possible values (e.g. if x is the expert policy then x should be the correct length, \ sum to 1, and have non-negative entries). """ raise NotImplementedError def get_observation( self, env: EnvType, task: SubTaskType, *args: Any, **kwargs: Any ) -> Union[OrderedDict, Tuple]: # If the task is completed, we needn't (perhaps can't) find the expert # action from the (current) terminal state. if task.is_done(): return self.flatten_output(self._zeroed_observation) actions_or_policies = OrderedDict() for group_name in self.group_spaces: action_or_policy, expert_was_successful = self.query_expert( task=task, expert_sensor_group_name=group_name ) actions_or_policies[group_name] = OrderedDict( [ (self.ACTION_POLICY_LABEL, action_or_policy), (self.EXPERT_SUCCESS_LABEL, expert_was_successful), ] ) return self.flatten_output( actions_or_policies if self.use_groups else actions_or_policies[self._NO_GROUPS_LABEL] ) class AbstractExpertActionSensor(AbstractExpertSensor, abc.ABC): def __init__( self, action_space: Optional[Union[gym.Space, int]] = None, uuid: str = "expert_action", expert_args: Optional[Dict[str, Any]] = None, nactions: Optional[int] = None, use_dict_as_groups: bool = True, **kwargs: Any, ) -> None: super().__init__(**prepare_locals_for_super(locals())) @classmethod def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict: """gym space resulting from wrapping the given action space, together with a binary action space corresponding to an expert success flag, in a Dict space. # Parameters group_space : The action space to be flagged and wrapped """ return gym.spaces.Dict( [ (cls.ACTION_POLICY_LABEL, group_space), (cls.EXPERT_SUCCESS_LABEL, gym.spaces.Discrete(2)), ] ) class ExpertActionSensor(AbstractExpertActionSensor): """(Deprecated) A sensor that obtains the expert action from a given task (if available).""" def query_expert( self, task: SubTaskType, expert_sensor_group_name: Optional[str] ) -> Tuple[Any, bool]: return task.query_expert( **self.expert_args, expert_sensor_group_name=expert_sensor_group_name ) class AbstractExpertPolicySensor(AbstractExpertSensor, abc.ABC): def __init__( self, action_space: Optional[Union[gym.Space, int]] = None, uuid: str = "expert_policy", expert_args: Optional[Dict[str, Any]] = None, nactions: Optional[int] = None, use_dict_as_groups: bool = True, **kwargs: Any, ) -> None: super().__init__(**prepare_locals_for_super(locals())) @classmethod def flagged_group_space(cls, group_space: gym.spaces.Space) -> gym.spaces.Dict: """gym space resulting from wrapping the policy space corresponding to `allenact.utils.spaces_utils.policy_space(group_space)` together with a binary action space corresponding to an expert success flag, in a Dict space. # Parameters group_space : The source action space to be used to derive a policy space, flagged and wrapped """ return gym.spaces.Dict( [ (cls.ACTION_POLICY_LABEL, su.policy_space(group_space)), (cls.EXPERT_SUCCESS_LABEL, gym.spaces.Discrete(2)), ] ) class ExpertPolicySensor(AbstractExpertPolicySensor): """(Deprecated) A sensor that obtains the expert policy from a given task (if available).""" def query_expert( self, task: SubTaskType, expert_sensor_group_name: Optional[str] ) -> Tuple[Any, bool]: return task.query_expert( **self.expert_args, expert_sensor_group_name=expert_sensor_group_name ) ================================================ FILE: allenact/base_abstractions/task.py ================================================ # Original work Copyright (c) Facebook, Inc. and its affiliates. # Modified work Copyright (c) Allen Institute for AI # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Defines the primary data structures by which agents interact with their environment.""" import abc from typing import Any, Dict, Generic, List, Optional, Sequence, Tuple, TypeVar, Union import gym import numpy as np from gym.spaces.dict import Dict as SpaceDict from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor, SensorSuite from allenact.utils.misc_utils import deprecated EnvType = TypeVar("EnvType") class Task(Generic[EnvType]): """An abstract class defining a, goal directed, 'task.' Agents interact with their environment through a task by taking a `step` after which they receive new observations, rewards, and (potentially) other useful information. A Task is a helpful generalization of the OpenAI gym's `Env` class and allows for multiple tasks (e.g. point and object navigation) to be defined on a single environment (e.g. AI2-THOR). # Attributes env : The environment. sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer. task_info : Dictionary of (k, v) pairs defining task goals and other task information. max_steps : The maximum number of steps an agent can take an in the task before it is considered failed. observation_space: The observation space returned on each step from the sensors. """ env: EnvType sensor_suite: SensorSuite[EnvType] task_info: Dict[str, Any] max_steps: int observation_space: SpaceDict def __init__( self, env: EnvType, sensors: Union[SensorSuite, Sequence[Sensor]], task_info: Dict[str, Any], max_steps: int, **kwargs ) -> None: self.env = env self.sensor_suite = ( SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors ) self.task_info = task_info self.max_steps = max_steps self.observation_space = self.sensor_suite.observation_spaces self._num_steps_taken = 0 self._total_reward: Union[float, List[float]] = 0.0 def get_observations(self, **kwargs) -> Any: return self.sensor_suite.get_observations(env=self.env, task=self, **kwargs) @property @abc.abstractmethod def action_space(self) -> gym.Space: """Task's action space. # Returns The action space for the task. """ raise NotImplementedError() @abc.abstractmethod def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: """Render the current task state. Rendered task state can come in any supported modes. # Parameters mode : The mode in which to render. For example, you might have a 'rgb' mode that renders the agent's egocentric viewpoint or a 'dev' mode returning additional information. args : Extra args. kwargs : Extra kwargs. # Returns An numpy array corresponding to the requested render. """ raise NotImplementedError() def _increment_num_steps_taken(self) -> None: """Helper function that increases the number of steps counter by one.""" self._num_steps_taken += 1 def step(self, action: Any) -> RLStepResult: """Take an action in the environment (one per agent). Takes the action in the environment and returns observations (& rewards and any additional information) corresponding to the agent's new state. Note that this function should not be overwritten without care (instead implement the `_step` function). # Parameters action : The action to take, should be of the same form as specified by `self.action_space`. # Returns A `RLStepResult` object encoding the new observations, reward, and (possibly) additional information. """ assert not self.is_done() sr = self._step(action=action) # If reward is Sequence, it's assumed to follow the same order imposed by spaces' flatten operation if isinstance(sr.reward, Sequence): if isinstance(self._total_reward, Sequence): for it, rew in enumerate(sr.reward): self._total_reward[it] += float(rew) else: self._total_reward = [float(r) for r in sr.reward] else: self._total_reward += float(sr.reward) # type:ignore self._increment_num_steps_taken() # TODO: We need a better solution to the below. It's not a good idea # to pre-increment the step counter as this might play poorly with `_step` # if it relies on some aspect of the current number of steps taken. return sr.clone({"done": sr.done or self.is_done()}) @abc.abstractmethod def _step(self, action: Any) -> RLStepResult: """Helper function called by `step` to take a step by each agent in the environment. Takes the action in the environment and returns observations (& rewards and any additional information) corresponding to the agent's new state. This function is called by the (public) `step` function and is what should be implemented when defining your new task. Having separate `_step` be separate from `step` is useful as this allows the `step` method to perform bookkeeping (e.g. keeping track of the number of steps), without having `_step` as a separate method, everyone implementing `step` would need to copy this bookkeeping code. # Parameters action : The action to take. # Returns A `RLStepResult` object encoding the new observations, reward, and (possibly) additional information. """ raise NotImplementedError() def reached_max_steps(self) -> bool: """Has the agent reached the maximum number of steps.""" return self.num_steps_taken() >= self.max_steps @abc.abstractmethod def reached_terminal_state(self) -> bool: """Has the agent reached a terminal state (excluding reaching the maximum number of steps).""" raise NotImplementedError() def is_done(self) -> bool: """Did the agent reach a terminal state or performed the maximum number of steps.""" return self.reached_terminal_state() or self.reached_max_steps() def num_steps_taken(self) -> int: """Number of steps taken by the agent in the task so far.""" return self._num_steps_taken @deprecated def action_names(self) -> Tuple[str, ...]: """Action names of the Task instance. This function has been deprecated and will be removed. This function is a hold-over from when the `Task` abstraction only considered `gym.space.Discrete` action spaces (in which case it makes sense name these actions). This implementation of `action_names` requires that a `class_action_names` method has been defined. This method should be overwritten if `class_action_names` requires key word arguments to determine the number of actions. """ if hasattr(self, "class_action_names"): return self.class_action_names() else: raise NotImplementedError( "`action_names` requires that a function `class_action_names` be defined." " This said, please do not use this functionality as it has been deprecated and will be removed." " If you would like an `action_names` function for your task, feel free to define one" " with the knowledge that the AllenAct internals will ignore it." ) @abc.abstractmethod def close(self) -> None: """Closes the environment and any other files opened by the Task (if applicable).""" raise NotImplementedError() def metrics(self) -> Dict[str, Any]: """Computes metrics related to the task after the task's completion. By default this function is automatically called during training and the reported metrics logged to tensorboard. # Returns A dictionary where every key is a string (the metric's name) and the value is the value of the metric. """ return { "ep_length": self.num_steps_taken(), "reward": self.cumulative_reward, "task_info": self.task_info, } def query_expert(self, **kwargs) -> Tuple[Any, bool]: """(Deprecated) Query the expert policy for this task. The new correct way to include this functionality is through the definition of a class derived from `allenact.base_abstractions.sensor.AbstractExpertActionSensor` or `allenact.base_abstractions.sensor.AbstractExpertPolicySensor`, where a `query_expert` method must be defined. # Returns A tuple (x, y) where x is the expert action (or policy) and y is False \ if the expert could not determine the optimal action (otherwise True). Here y \ is used for masking. Even when y is False, x should still lie in the space of \ possible values (e.g. if x is the expert policy then x should be the correct length, \ sum to 1, and have non-negative entries). """ return None, False @property def cumulative_reward(self) -> float: """Mean per-agent total cumulative in the task so far. # Returns Mean per-agent cumulative reward as a float. """ return ( np.mean(self._total_reward).item() if isinstance(self._total_reward, Sequence) else self._total_reward ) SubTaskType = TypeVar("SubTaskType", bound=Task) class TaskSampler(abc.ABC): """Abstract class defining a how new tasks are sampled.""" @property @abc.abstractmethod def length(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ raise NotImplementedError() @property @abc.abstractmethod def last_sampled_task(self) -> Optional[Task]: """Get the most recently sampled Task. # Returns The most recently sampled Task. """ raise NotImplementedError() @abc.abstractmethod def next_task(self, force_advance_scene: bool = False) -> Optional[Task]: """Get the next task in the sampler's stream. # Parameters force_advance_scene : Used to (if applicable) force the task sampler to use a new scene for the next task. This is useful if, during training, you would like to train with one scene for some number of steps and then explicitly control when you begin training with the next scene. # Returns The next Task in the sampler's stream if a next task exists. Otherwise None. """ raise NotImplementedError() @abc.abstractmethod def close(self) -> None: """Closes any open environments or streams. Should be run when done sampling. """ raise NotImplementedError() @property @abc.abstractmethod def all_observation_spaces_equal(self) -> bool: """Checks if all observation spaces of tasks that can be sampled are equal. This will almost always simply return `True`. A case in which it should return `False` includes, for example, a setting where you design a `TaskSampler` that can generate different types of tasks, i.e. point navigation tasks and object navigation tasks. In this case, these different tasks may output different types of observations. # Returns True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ raise NotImplementedError() @abc.abstractmethod def reset(self) -> None: """Resets task sampler to its original state (except for any seed).""" raise NotImplementedError() @abc.abstractmethod def set_seed(self, seed: int) -> None: """Sets new RNG seed. # Parameters seed : New seed. """ raise NotImplementedError() ================================================ FILE: allenact/embodiedai/__init__.py ================================================ ================================================ FILE: allenact/embodiedai/aux_losses/__init__.py ================================================ ================================================ FILE: allenact/embodiedai/aux_losses/losses.py ================================================ # Original work Copyright (c) Facebook, Inc. and its affiliates. # Modified work Copyright (c) Allen Institute for AI # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Defining the auxiliary loss for actor critic type models. Several of the losses defined in this file are modified versions of those found in https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/ """ import abc from typing import Dict, cast, Tuple, Sequence import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ObservationType, ) from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput def _bernoulli_subsample_mask_like(masks, p=0.1): return (torch.rand_like(masks) <= p).float() class MultiAuxTaskNegEntropyLoss(AbstractActorCriticLoss): """Used in multiple auxiliary tasks setting. Add a negative entropy loss over all the task weights. """ UUID = "multitask_entropy" # make sure this is unique def __init__(self, task_names: Sequence[str], *args, **kwargs): super().__init__(*args, **kwargs) self.num_tasks = len(task_names) self.task_names = task_names def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], *args, **kwargs, ) -> Tuple[torch.FloatTensor, Dict[str, float]]: task_weights = actor_critic_output.extras[self.UUID] task_weights = task_weights.view(-1, self.num_tasks) entropy = CategoricalDistr(task_weights).entropy() avg_loss = (-entropy).mean() avg_task_weights = task_weights.mean(dim=0) # (K) outputs = {"entropy_loss": cast(torch.Tensor, avg_loss).item()} for i in range(self.num_tasks): outputs["weight_" + self.task_names[i]] = cast( torch.Tensor, avg_task_weights[i] ).item() return ( avg_loss, outputs, ) class AuxiliaryLoss(AbstractActorCriticLoss): """Base class of auxiliary loss. Any auxiliary task loss should inherit from it, and implement the `get_aux_loss` function. """ def __init__(self, auxiliary_uuid: str, *args, **kwargs): super().__init__(*args, **kwargs) self.auxiliary_uuid = auxiliary_uuid def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], *args, **kwargs, ) -> Tuple[torch.Tensor, Dict[str, float]]: # auxiliary loss return self.get_aux_loss( **actor_critic_output.extras[self.auxiliary_uuid], observations=batch["observations"], actions=batch["actions"], masks=batch["masks"], ) @abc.abstractmethod def get_aux_loss( self, aux_model: nn.Module, observations: ObservationType, obs_embeds: torch.Tensor, actions: torch.Tensor, beliefs: torch.Tensor, masks: torch.Tensor, *args, **kwargs, ): raise NotImplementedError() def _propagate_final_beliefs_to_all_steps( beliefs: torch.Tensor, masks: torch.Tensor, num_sampler: int, num_steps: int, ): final_beliefs = torch.zeros_like(beliefs) # (T, B, *) start_locs_list = [] end_locs_list = [] for i in range(num_sampler): # right shift: to locate the 1 before 0 and ignore the 1st element end_locs = torch.where(masks[1:, i] == 0)[0] # maybe [], dtype=torch.Long start_locs = torch.cat( [torch.tensor([0]).to(end_locs), end_locs + 1] ) # add the first element start_locs_list.append(start_locs) end_locs = torch.cat( [end_locs, torch.tensor([num_steps - 1]).to(end_locs)] ) # add the last element end_locs_list.append(end_locs) for st, ed in zip(start_locs, end_locs): final_beliefs[st : ed + 1, i] = beliefs[ed, i] return final_beliefs, start_locs_list, end_locs_list class InverseDynamicsLoss(AuxiliaryLoss): """Auxiliary task of Inverse Dynamics from Auxiliary Tasks Speed Up Learning PointGoal Navigation (Ye, 2020) https://arxiv.org/abs/2007.04561 originally from Curiosity-driven Exploration by Self-supervised Prediction (Pathak, 2017) https://arxiv.org/abs/1705.05363.""" UUID = "InvDyn" def __init__( self, subsample_rate: float = 0.2, subsample_min_num: int = 10, *args, **kwargs ): """Subsample the valid samples by the rate of `subsample_rate`, if the total num of the valid samples is larger than `subsample_min_num`.""" super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs) self.cross_entropy_loss = nn.CrossEntropyLoss(reduction="none") self.subsample_rate = subsample_rate self.subsample_min_num = subsample_min_num def get_aux_loss( self, aux_model: nn.Module, observations: ObservationType, obs_embeds: torch.FloatTensor, actions: torch.FloatTensor, beliefs: torch.FloatTensor, masks: torch.FloatTensor, *args, **kwargs, ): ## we discard the last action in the batch num_steps, num_sampler = actions.shape # T, B actions = cast(torch.LongTensor, actions) actions = actions[:-1] # (T-1, B) ## find the final belief state based on masks # we did not compute loss here as model.forward is compute-heavy masks = masks.squeeze(-1) # (T, B) final_beliefs, _, _ = _propagate_final_beliefs_to_all_steps( beliefs, masks, num_sampler, num_steps, ) ## compute CE loss decoder_in = torch.cat( [obs_embeds[:-1], obs_embeds[1:], final_beliefs[:-1]], dim=2 ) # (T-1, B, *) preds = aux_model(decoder_in) # (T-1, B, A) # cross entropy loss require class dim at 1 loss = self.cross_entropy_loss( preds.view((num_steps - 1) * num_sampler, -1), # ((T-1)*B, A) actions.flatten(), # ((T-1)*B,) ) loss = loss.view(num_steps - 1, num_sampler) # (T-1, B) # def vanilla_valid_losses(loss, num_sampler, end_locs_batch): # ## this is just used to verify the vectorized version works correctly. # ## not used for experimentation # valid_losses = [] # for i in range(num_sampler): # end_locs = end_locs_batch[i] # for j in range(len(end_locs)): # if j == 0: # start_loc = 0 # else: # start_loc = end_locs[j - 1] + 1 # end_loc = end_locs[j] # if end_loc - start_loc <= 0: # the episode only 1-step # continue # valid_losses.append(loss[start_loc:end_loc, i]) # if len(valid_losses) == 0: # valid_losses = torch.zeros(1, dtype=torch.float).to(loss) # else: # valid_losses = torch.cat(valid_losses) # (sum m, ) # return valid_losses # valid_losses = masks[1:] * loss # (T-1, B) # valid_losses0 = vanilla_valid_losses(loss, num_sampler, end_locs_batch) # assert valid_losses0.sum() == valid_losses.sum() num_valid_losses = torch.count_nonzero(masks[1:]) if num_valid_losses < self.subsample_min_num: # don't subsample subsample_rate = 1.0 else: subsample_rate = self.subsample_rate loss_masks = masks[1:] * _bernoulli_subsample_mask_like( masks[1:], subsample_rate ) num_valid_losses = torch.count_nonzero(loss_masks) avg_loss = (loss * loss_masks).sum() / torch.clamp(num_valid_losses, min=1.0) return ( avg_loss, { "total": cast(torch.Tensor, avg_loss).item(), }, ) class TemporalDistanceLoss(AuxiliaryLoss): """Auxiliary task of Temporal Distance from Auxiliary Tasks Speed Up Learning PointGoal Navigation (Ye, 2020) https://arxiv.org/abs/2007.04561.""" UUID = "TempDist" def __init__(self, num_pairs: int = 8, epsiode_len_min: int = 5, *args, **kwargs): super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs) self.num_pairs = num_pairs self.epsiode_len_min = float(epsiode_len_min) def get_aux_loss( self, aux_model: nn.Module, observations: ObservationType, obs_embeds: torch.FloatTensor, actions: torch.FloatTensor, beliefs: torch.FloatTensor, masks: torch.FloatTensor, *args, **kwargs, ): ## we discard the last action in the batch num_steps, num_sampler = actions.shape # T, B ## find the final belief state based on masks # we did not compute loss here as model.forward is compute-heavy masks = masks.squeeze(-1) # (T, B) ( final_beliefs, start_locs_list, end_locs_list, ) = _propagate_final_beliefs_to_all_steps( beliefs, masks, num_sampler, num_steps, ) ## also find the locs_batch of shape (M, 3) # the last dim: [0] is on num_sampler loc, [1] and [2] is start and end locs # of one episode # in other words: at locs_batch[m, 0] in num_sampler dim, there exists one episode # starting from locs_batch[m, 1], ends at locs_batch[m, 2] (included) locs_batch = [] for i in range(num_sampler): locs_batch.append( torch.stack( [ i * torch.ones_like(start_locs_list[i]), start_locs_list[i], end_locs_list[i], ], dim=-1, ) ) # shape (M[i], 3) locs_batch = torch.cat(locs_batch) # shape (M, 3) temporal_dist_max = ( locs_batch[:, 2] - locs_batch[:, 1] ).float() # end - start, (M) # create normalizer that ignores too short episode, otherwise 1/T normalizer = torch.where( temporal_dist_max > self.epsiode_len_min, 1.0 / temporal_dist_max, torch.tensor([0]).to(temporal_dist_max), ) # (M) # sample valid pairs: sampled_pairs shape (M, num_pairs, 3) # where M is the num of total episodes in the batch locs = locs_batch.cpu().numpy() # as torch.randint only support int, not tensor sampled_pairs = np.random.randint( np.repeat(locs[:, [1]], 2 * self.num_pairs, axis=-1), # (M, 2*k) np.repeat(locs[:, [2]] + 1, 2 * self.num_pairs, axis=-1), # (M, 2*k) ).reshape( (-1, self.num_pairs, 2) ) # (M, k, 2) sampled_pairs_batch = torch.from_numpy(sampled_pairs).to( locs_batch ) # (M, k, 2) num_sampler_batch = locs_batch[:, [0]].expand( -1, 2 * self.num_pairs ) # (M, 1) -> (M, 2*k) num_sampler_batch = num_sampler_batch.reshape( -1, self.num_pairs, 2 ) # (M, k, 2) sampled_obs_embeds = obs_embeds[ sampled_pairs_batch, num_sampler_batch ] # (M, k, 2, H1) sampled_final_beliefs = final_beliefs[ sampled_pairs_batch, num_sampler_batch ] # (M, k, 2, H2) features = torch.cat( [ sampled_obs_embeds[:, :, 0], sampled_obs_embeds[:, :, 1], sampled_final_beliefs[:, :, 0], ], dim=-1, ) # (M, k, 2*H1 + H2) pred_temp_dist = aux_model(features).squeeze(-1) # (M, k) true_temp_dist = ( sampled_pairs_batch[:, :, 1] - sampled_pairs_batch[:, :, 0] ).float() # (M, k) pred_error = (pred_temp_dist - true_temp_dist) * normalizer.unsqueeze(1) loss = 0.5 * (pred_error).pow(2) avg_loss = loss.mean() return ( avg_loss, { "total": cast(torch.Tensor, avg_loss).item(), }, ) class CPCALoss(AuxiliaryLoss): """Auxiliary task of CPC|A from Auxiliary Tasks Speed Up Learning PointGoal Navigation (Ye, 2020) https://arxiv.org/abs/2007.04561 originally from Neural Predictive Belief Representations (Guo, 2018) https://arxiv.org/abs/1811.06407.""" UUID = "CPCA" def __init__( self, planning_steps: int = 8, subsample_rate: float = 0.2, *args, **kwargs ): super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs) self.planning_steps = planning_steps self.subsample_rate = subsample_rate self.cross_entropy_loss = nn.BCEWithLogitsLoss(reduction="none") def get_aux_loss( self, aux_model: nn.Module, observations: ObservationType, obs_embeds: torch.Tensor, actions: torch.Tensor, beliefs: torch.Tensor, masks: torch.Tensor, *args, **kwargs, ): # prepare for autoregressive inputs: c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) <-> z_{t+k} ## where b_t = RNN(b_{t-1}, z_t, a_{t-1}), prev action is optional num_steps, num_sampler, obs_embed_size = obs_embeds.shape # T, N, H_O assert 0 < self.planning_steps <= num_steps ## prepare positive and negatives that sample from all the batch positives = obs_embeds # (T, N, -1) negative_inds = torch.randperm(num_steps * num_sampler).to(positives.device) negatives = torch.gather( # input[index[i,j]][j] positives.view(num_steps * num_sampler, -1), dim=0, index=negative_inds.view(num_steps * num_sampler, 1).expand( num_steps * num_sampler, positives.shape[-1] ), ).view( num_steps, num_sampler, -1 ) # (T, N, -1) ## prepare action sequences and initial beliefs action_embedding = aux_model.action_embedder(actions) # (T, N, -1) action_embed_size = action_embedding.size(-1) action_padding = torch.zeros( self.planning_steps - 1, num_sampler, action_embed_size ).to( action_embedding ) # (k-1, N, -1) action_padded = torch.cat( (action_embedding, action_padding), dim=0 ) # (T+k-1, N, -1) ## unfold function will create consecutive action sequences action_seq = ( action_padded.unfold(dimension=0, size=self.planning_steps, step=1) .permute(3, 0, 1, 2) .view(self.planning_steps, num_steps * num_sampler, action_embed_size) ) # (k, T*N, -1) ## beliefs GRU output beliefs = beliefs.view(num_steps * num_sampler, -1).unsqueeze(0) # (1, T*N, -1) # get future contexts c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) future_contexts_all, _ = aux_model.context_model( action_seq, beliefs ) # (k, T*N, -1) ## NOTE: future_contexts_all starting from next step t+1 to t+k, not t to t+k-1 future_contexts_all = future_contexts_all.view( self.planning_steps, num_steps, num_sampler, -1 ).permute( 1, 0, 2, 3 ) # (k, T, N, -1) # get all the classifier scores I(c_{t+1:t+k}; z_{t+1:t+k}) positives_padding = torch.zeros( self.planning_steps, num_sampler, obs_embed_size ).to( positives ) # (k, N, -1) positives_padded = torch.cat( (positives[1:], positives_padding), dim=0 ) # (T+k-1, N, -1) positives_expanded = positives_padded.unfold( dimension=0, size=self.planning_steps, step=1 ).permute( 0, 3, 1, 2 ) # (T, k, N, -1) positives_logits = aux_model.classifier( torch.cat([positives_expanded, future_contexts_all], -1) ) # (T, k, N, 1) positive_loss = self.cross_entropy_loss( positives_logits, torch.ones_like(positives_logits) ) # (T, k, N, 1) negatives_padding = torch.zeros( self.planning_steps, num_sampler, obs_embed_size ).to( negatives ) # (k, N, -1) negatives_padded = torch.cat( (negatives[1:], negatives_padding), dim=0 ) # (T+k-1, N, -1) negatives_expanded = negatives_padded.unfold( dimension=0, size=self.planning_steps, step=1 ).permute( 0, 3, 1, 2 ) # (T, k, N, -1) negatives_logits = aux_model.classifier( torch.cat([negatives_expanded, future_contexts_all], -1) ) # (T, k, N, 1) negative_loss = self.cross_entropy_loss( negatives_logits, torch.zeros_like(negatives_logits) ) # (T, k, N, 1) # Masking to get valid scores ## masks: Note which timesteps [1, T+k+1] could have valid queries, at distance (k) (note offset by 1) ## we will extract the **diagonals** as valid_masks from masks later as below ## the vertical axis is (absolute) real timesteps, the horizontal axis is (relative) planning timesteps ## | - - - - - | ## | . | ## | , . | ## | . , . | ## | , . , . | ## | , . , . | ## | , . , | ## | , . | ## | , | ## | - - - - - | masks = masks.squeeze(-1) # (T, N) pred_masks = torch.ones( num_steps + self.planning_steps, self.planning_steps, num_sampler, 1, dtype=torch.bool, ).to( beliefs.device ) # (T+k, k, N, 1) pred_masks[num_steps - 1 :] = ( False # GRU(b_t, a_{t:t+k-1}) is invalid when t >= T, as we don't have real z_{t+1} ) for j in range(1, self.planning_steps + 1): # for j-step predictions pred_masks[: j - 1, j - 1] = ( False # Remove the upper triangle above the diagnonal (but I think this is unnecessary for valid_masks) ) for n in range(num_sampler): has_zeros_batch = torch.where(masks[:, n] == 0)[0] # in j-step prediction, timesteps z -> z + j are disallowed as those are the first j timesteps of a new episode # z-> z-1 because of pred_masks being offset by 1 for z in has_zeros_batch: pred_masks[z - 1 : z - 1 + j, j - 1, n] = ( False # can affect j timesteps ) # instead of the whole range, we actually are only comparing a window i:i+k for each query/target i - for each, select the appropriate k # we essentially gather diagonals from this full mask, t of them, k long valid_diagonals = [ torch.diagonal(pred_masks, offset=-i) for i in range(num_steps) ] # pull the appropriate k per timestep valid_masks = ( torch.stack(valid_diagonals, dim=0).permute(0, 3, 1, 2).float() ) # (T, N, 1, k) -> (T, k, N, 1) # print(valid_masks.int().squeeze(-1)); print(masks) # verify its correctness loss_masks = valid_masks * _bernoulli_subsample_mask_like( valid_masks, self.subsample_rate ) # (T, k, N, 1) num_valid_losses = torch.count_nonzero(loss_masks) avg_positive_loss = (positive_loss * loss_masks).sum() / torch.clamp( num_valid_losses, min=1.0 ) avg_negative_loss = (negative_loss * loss_masks).sum() / torch.clamp( num_valid_losses, min=1.0 ) avg_loss = avg_positive_loss + avg_negative_loss return ( avg_loss, { "total": cast(torch.Tensor, avg_loss).item(), "positive_loss": cast(torch.Tensor, avg_positive_loss).item(), "negative_loss": cast(torch.Tensor, avg_negative_loss).item(), }, ) class CPCASoftMaxLoss(AuxiliaryLoss): """Auxiliary task of CPC|A with multi class softmax.""" UUID = "cpcA_SOFTMAX" def __init__( self, planning_steps: int = 8, subsample_rate: float = 1, allow_skipping: bool = True, *args, **kwargs, ): super().__init__(auxiliary_uuid=self.UUID, *args, **kwargs) self.planning_steps = planning_steps self.subsample_rate = subsample_rate self.cross_entropy_loss = nn.CrossEntropyLoss( reduction="none" ) # nn.BCEWithLogitsLoss(reduction="none") self.allow_skipping = allow_skipping def get_aux_loss( self, aux_model: nn.Module, observations: ObservationType, obs_embeds: torch.Tensor, actions: torch.Tensor, beliefs: torch.Tensor, masks: torch.Tensor, *args, **kwargs, ): # prepare for autoregressive inputs: c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) <-> z_{t+k} ## where b_t = RNN(b_{t-1}, z_t, a_{t-1}), prev action is optional num_steps, num_samplers, obs_embed_size = obs_embeds.shape # T, N, H_O ##visual observation of all num_steps if not (0 < self.planning_steps <= num_steps): if self.allow_skipping: return 0, {} else: raise RuntimeError( f"Insufficient planning steps: self.planning_steps {self.planning_steps} must" f" be greater than zero and less than or equal to num_steps {num_steps}." ) ## prepare action sequences and initial beliefs action_embedding = aux_model.action_embedder(actions) # (T, N, -1) action_embed_size = action_embedding.size(-1) action_padding = torch.zeros( self.planning_steps - 1, num_samplers, action_embed_size, device=action_embedding.device, ) # (k-1, N, -1) action_padded = torch.cat( (action_embedding, action_padding), dim=0 ) # (T+k-1, N, -1) ## unfold function will create consecutive action sequences action_seq = ( action_padded.unfold(dimension=0, size=self.planning_steps, step=1) .permute(3, 0, 1, 2) .view(self.planning_steps, num_steps * num_samplers, action_embed_size) ) # (k, T*N, -1) ## beliefs GRU output obs_embeds = aux_model.visual_mlp(obs_embeds) # (T, N, 128) beliefs = beliefs.view(1, num_steps * num_samplers, -1) # (1, T*N, -1) # get future contexts c_{t+1:t+k} = GRU(b_t, a_{t:t+k-1}) future_contexts_all, _ = aux_model.context_model( action_seq, beliefs ) # (k, T*N, -1) future_contexts_all = aux_model.belief_mlp(future_contexts_all) # (k, T*N, 128) future_contexts_all = future_contexts_all.view(-1, 128) # (k*T*N, 128) obs_embeds = obs_embeds.view( num_steps * num_samplers, obs_embeds.shape[-1] ).permute( 1, 0 ) # (-1, T*N) visual_logits = torch.matmul(future_contexts_all, obs_embeds) visual_log_probs = F.log_softmax(visual_logits, dim=1) ## (k*T*N, T*N) target = torch.zeros( (self.planning_steps, num_steps, num_samplers), dtype=torch.long, device=beliefs.device, ) # (k, T, N) loss_mask = torch.zeros( (self.planning_steps, num_steps, num_samplers), device=beliefs.device ) # (k, T, N) num_valid_before = 0 for j in range(num_samplers): for i in range(num_steps): index = i * num_samplers + j if i == 0 or masks[i, j].item() == 0: num_valid_before = 0 continue num_valid_before += 1 for back in range(min(num_valid_before, self.planning_steps)): target[back, i - (back + 1), j] = index loss_mask[back, i - (back + 1), j] = 1.0 target = target.view(-1) # (k*T*N,) loss_value = self.cross_entropy_loss(visual_log_probs, target) loss_value = loss_value.view( self.planning_steps, num_steps, num_samplers, 1 ) # (k, T, N, 1) loss_mask = loss_mask.unsqueeze(-1) # (k, T, N, 1) loss_valid_masks = loss_mask * _bernoulli_subsample_mask_like( loss_mask, self.subsample_rate ) # (k, T, N, 1) num_valid_losses = torch.count_nonzero(loss_valid_masks) avg_multi_class_loss = (loss_value * loss_valid_masks).sum() / torch.clamp( num_valid_losses, min=1.0 ) return ( avg_multi_class_loss, { "total": cast(torch.Tensor, avg_multi_class_loss).item(), }, ) ######## CPCA Softmax variants ###### class CPCA1SoftMaxLoss(CPCASoftMaxLoss): UUID = "cpcA_SOFTMAX_1" def __init__(self, subsample_rate: float = 1, *args, **kwargs): super().__init__( planning_steps=1, subsample_rate=subsample_rate, *args, **kwargs ) class CPCA2SoftMaxLoss(CPCASoftMaxLoss): UUID = "cpcA_SOFTMAX_2" def __init__(self, subsample_rate: float = 1, *args, **kwargs): super().__init__( planning_steps=2, subsample_rate=subsample_rate, *args, **kwargs ) class CPCA4SoftMaxLoss(CPCASoftMaxLoss): UUID = "cpcA_SOFTMAX_4" def __init__(self, subsample_rate: float = 1, *args, **kwargs): super().__init__( planning_steps=4, subsample_rate=subsample_rate, *args, **kwargs ) class CPCA8SoftMaxLoss(CPCASoftMaxLoss): UUID = "cpcA_SOFTMAX_8" def __init__(self, subsample_rate: float = 1, *args, **kwargs): super().__init__( planning_steps=8, subsample_rate=subsample_rate, *args, **kwargs ) class CPCA16SoftMaxLoss(CPCASoftMaxLoss): UUID = "cpcA_SOFTMAX_16" def __init__(self, subsample_rate: float = 1, *args, **kwargs): super().__init__( planning_steps=16, subsample_rate=subsample_rate, *args, **kwargs ) ########### class CPCA1Loss(CPCALoss): UUID = "CPCA_1" def __init__(self, subsample_rate: float = 0.2, *args, **kwargs): super().__init__( planning_steps=1, subsample_rate=subsample_rate, *args, **kwargs ) class CPCA2Loss(CPCALoss): UUID = "CPCA_2" def __init__(self, subsample_rate: float = 0.2, *args, **kwargs): super().__init__( planning_steps=2, subsample_rate=subsample_rate, *args, **kwargs ) class CPCA4Loss(CPCALoss): UUID = "CPCA_4" def __init__(self, subsample_rate: float = 0.2, *args, **kwargs): super().__init__( planning_steps=4, subsample_rate=subsample_rate, *args, **kwargs ) class CPCA8Loss(CPCALoss): UUID = "CPCA_8" def __init__(self, subsample_rate: float = 0.2, *args, **kwargs): super().__init__( planning_steps=8, subsample_rate=subsample_rate, *args, **kwargs ) class CPCA16Loss(CPCALoss): UUID = "CPCA_16" def __init__(self, subsample_rate: float = 0.2, *args, **kwargs): super().__init__( planning_steps=16, subsample_rate=subsample_rate, *args, **kwargs ) ================================================ FILE: allenact/embodiedai/mapping/__init__.py ================================================ ================================================ FILE: allenact/embodiedai/mapping/mapping_losses.py ================================================ import torch from torch.nn import functional as F from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ) from allenact.algorithms.onpolicy_sync.policy import ObservationType from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput class BinnedPointCloudMapLoss(AbstractActorCriticLoss): """A (binary cross entropy) loss for training metric maps for free space prediction.""" def __init__( self, binned_pc_uuid: str, map_logits_uuid: str, ): """Initializer. # Parameters binned_pc_uuid : The uuid of a sensor returning a dictionary with an "egocentric_update" key with the same format as returned by `allenact.embodied_ai.mapping_utils.map_builders.BinnedPointCloudMapBuilder`. Such a sensor can be found in the `allenact_plugins` library: see `allenact_plugins.ithor_plugin.ithor_sensors.BinnedPointCloudMapTHORSensor`. map_logits_uuid : key used to index into `actor_critic_output.extras` (returned by the model) whose value should be a tensor of the same shape as the tensor corresponding to the above "egocentric_update" key. """ super().__init__() self.binned_pc_uuid = binned_pc_uuid self.map_logits_uuid = map_logits_uuid def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], *args, **kwargs, ): ego_map_gt = batch["observations"][self.binned_pc_uuid][ "egocentric_update" ].float() *_, h, w, c = ego_map_gt.shape ego_map_gt = ego_map_gt.view(-1, h, w, c).permute(0, 3, 1, 2).contiguous() ego_map_logits = actor_critic_output.extras[self.map_logits_uuid] vision_range = ego_map_logits.shape[-1] ego_map_logits = ego_map_logits.view(-1, c, vision_range, vision_range) assert ego_map_gt.shape == ego_map_logits.shape ego_map_gt_thresholded = (ego_map_gt > 0.5).float() total_loss = F.binary_cross_entropy_with_logits( ego_map_logits, ego_map_gt_thresholded ) return ( total_loss, {"binned_pc_map_ce": total_loss.item()}, ) # FOR DEBUGGING: Save all the ground-truth & predicted maps side by side # import numpy as np # import imageio # for i in range(ego_map_gt_thresholded.shape[0]): # a = ego_map_gt_thresholded[i].permute(1, 2, 0).flip(0).detach().numpy() # b = torch.sigmoid(ego_map_logits)[i].permute(1, 2, 0).flip(0).detach().numpy() # # imageio.imwrite( # f"z_occupancy_maps/{i}.png", # np.concatenate((a, 1 + 0 * a[:, :10], b), axis=1), # ) class SemanticMapFocalLoss(AbstractActorCriticLoss): """A (focal-loss based) loss for training metric maps for free space prediction. As semantic maps tend to be quite sparse this loss uses the focal loss (https://arxiv.org/abs/1708.02002) rather than binary cross entropy (BCE). If the `gamma` parameter is 0.0 then this is just the normal BCE, larger values of `gamma` result less and less emphasis being paid to examples that are already well classified. """ def __init__( self, semantic_map_uuid: str, map_logits_uuid: str, gamma: float = 2.0 ): """Initializer. # Parameters semantic_map_uuid : The uuid of a sensor returning a dictionary with an "egocentric_update" key with the same format as returned by `allenact.embodied_ai.mapping_utils.map_builders.SemanticMapBuilder`. Such a sensor can be found in the `allenact_plugins` library: see `allenact_plugins.ithor_plugin.ithor_sensors.SemanticMapTHORSensor`. map_logits_uuid : key used to index into `actor_critic_output.extras` (returned by the model) whose value should be a tensor of the same shape as the tensor corresponding to the above "egocentric_update" key. """ super().__init__() assert gamma >= 0, f"`gamma` (=={gamma}) must be >= 0" self.semantic_map_uuid = semantic_map_uuid self.map_logits_uuid = map_logits_uuid self.gamma = gamma def loss( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], *args, **kwargs, ): ego_map_gt = batch["observations"][self.semantic_map_uuid]["egocentric_update"] ego_map_gt = ( ego_map_gt.view(-1, *ego_map_gt.shape[-3:]).permute(0, 3, 1, 2).contiguous() ) ego_map_logits = actor_critic_output.extras[self.map_logits_uuid] ego_map_logits = ego_map_logits.view(-1, *ego_map_logits.shape[-3:]) assert ego_map_gt.shape == ego_map_logits.shape p = torch.sigmoid(ego_map_logits) one_minus_p = torch.sigmoid(-ego_map_logits) log_p = F.logsigmoid(ego_map_logits) log_one_minus_p = F.logsigmoid(-ego_map_logits) ego_map_gt = ego_map_gt.float() total_loss = -( ego_map_gt * (log_p * (one_minus_p**self.gamma)) + (1 - ego_map_gt) * (log_one_minus_p * (p**self.gamma)) ).mean() return ( total_loss, {"sem_map_focal_loss": total_loss.item()}, ) # FOR DEBUGGING: Save all the ground-truth & predicted maps side by side # import numpy as np # import imageio # from allenact.embodiedai.mapping.mapping_utils.map_builders import SemanticMapBuilder # # print("\n" * 3) # for i in range(ego_map_gt.shape[0]): # pred_sem_map = torch.sigmoid(ego_map_logits)[i].permute(1, 2, 0).flip(0).detach() # a = SemanticMapBuilder.randomly_color_semantic_map(ego_map_gt[i].permute(1, 2, 0).flip(0).detach()) # b = SemanticMapBuilder.randomly_color_semantic_map(pred_sem_map) # imageio.imwrite( # f"z_semantic_maps/{i}.png", # np.concatenate((a, 255 + a[:, :10] * 0, b), axis=1), # ) # ================================================ FILE: allenact/embodiedai/mapping/mapping_models/__init__.py ================================================ ================================================ FILE: allenact/embodiedai/mapping/mapping_models/active_neural_slam.py ================================================ # MIT License # # Original Copyright (c) 2020 Devendra Chaplot # # Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import math from typing import Optional, Tuple, Dict, Any, cast import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torchvision.models as models from allenact.utils.model_utils import simple_conv_and_linear_weights_init DEGREES_TO_RADIANS = np.pi / 180.0 RADIANS_TO_DEGREES = 180.0 / np.pi def _inv_sigmoid(x: torch.Tensor): return torch.log(x) - torch.log1p(-x) class ActiveNeuralSLAM(nn.Module): """Active Neural SLAM module. This is an implementation of the Active Neural SLAM module from: ``` Chaplot, D.S., Gandhi, D., Gupta, S., Gupta, A. and Salakhutdinov, R., 2020. Learning To Explore Using Active Neural SLAM. In International Conference on Learning Representations (ICLR). ``` Note that this is purely the mapping component and does not include the planning components from the above paper. This implementation is adapted from `https://github.com/devendrachaplot/Neural-SLAM`, we have extended this implementation to allow for an arbitrary number of output map channels (enabling semantic mapping). At a high level, this model takes as input RGB egocentric images and outputs metric map tensors of shape (# channels) x height x width where height/width correspond to the ground plane of the environment. """ def __init__( self, frame_height: int, frame_width: int, n_map_channels: int, resolution_in_cm: int = 5, map_size_in_cm: int = 2400, vision_range_in_cm: int = 300, use_pose_estimation: bool = False, pretrained_resnet: bool = True, freeze_resnet_batchnorm: bool = True, use_resnet_layernorm: bool = False, ): """Initialize an Active Neural SLAM module. # Parameters frame_height : The height of the RGB images given to this module on calls to `forward`. frame_width : The width of the RGB images given to this module on calls to `forward`. n_map_channels : The number of output channels in the output maps. resolution_in_cm : The resolution of the output map, see `map_size_in_cm`. map_size_in_cm : The height & width of the map in centimeters. The size of the map tensor returned on calls to forward will be `map_size_in_cm/resolution_in_cm`. Note that `map_size_in_cm` must be an divisible by resolution_in_cm. vision_range_in_cm : Given an RGB image input, this module will transform this image into an "egocentric map" with height and width equaling `vision_range_in_cm/resolution_in_cm`. This egocentr map corresponds to the area of the world directly in front of the agent. This "egocentric map" will be rotated/translated into the allocentric reference frame and used to update the larger, allocentric, map whose height and width equal `map_size_in_cm/resolution_in_cm`. Thus this parameter controls how much of the map will be updated on every step. use_pose_estimation : Whether or not we should estimate the agent's change in position/rotation. If `False`, you'll need to provide the ground truth changes in position/rotation. pretrained_resnet : Whether or not to use ImageNet pre-trained model weights for the ResNet18 backbone. freeze_resnet_batchnorm : Whether or not the batch normalization layers in the ResNet18 backbone should be frozen and batchnorm updates disabled. You almost certainly want this to be `True` as using batch normalization during RL training results in all sorts of issues unless you're very careful. use_resnet_layernorm : If you've enabled `freeze_resnet_batchnorm` (recommended) you'll likely want to normalize the output from the ResNet18 model as we've found that these values can otherwise grow quite large harming learning. """ super(ActiveNeuralSLAM, self).__init__() self.frame_height = frame_height self.frame_width = frame_width self.n_map_channels = n_map_channels self.resolution_in_cm = resolution_in_cm self.map_size_in_cm = map_size_in_cm self.input_channels = 3 self.vision_range_in_cm = vision_range_in_cm self.dropout = 0.5 self.use_pose_estimation = use_pose_estimation self.freeze_resnet_batchnorm = freeze_resnet_batchnorm self.max_abs_map_logit_value = 20 # Visual Encoding resnet = models.resnet18(pretrained=pretrained_resnet) self.resnet_l5 = nn.Sequential(*list(resnet.children())[0:8]) self.conv = nn.Sequential( *filter(bool, [nn.Conv2d(512, 64, (1, 1), stride=(1, 1)), nn.ReLU()]) ) self.bn_modules = [ module for module in self.resnet_l5.modules() if "BatchNorm" in type(module).__name__ ] if freeze_resnet_batchnorm: for bn in self.bn_modules: bn.momentum = 0 # Layernorm (if requested) self.use_resnet_layernorm = use_resnet_layernorm if self.use_resnet_layernorm: assert ( self.freeze_resnet_batchnorm ), "When using layernorm, we require that set `freeze_resnet_batchnorm` to True." self.resnet_normalizer = nn.Sequential( nn.Conv2d(512, 512, 1), nn.LayerNorm( normalized_shape=[512, 7, 7], elementwise_affine=True, ), ) self.resnet_normalizer.apply(simple_conv_and_linear_weights_init) else: self.resnet_normalizer = nn.Identity() # convolution output size input_test = torch.randn( 1, self.input_channels, self.frame_height, self.frame_width ) # Have to explicitly call .forward to get past LGTM checks as it thinks nn.Sequential isn't callable conv_output = self.conv.forward(self.resnet_l5.forward(input_test)) self.conv_output_size = conv_output.view(-1).size(0) # projection layer self.proj1 = nn.Linear(self.conv_output_size, 1024) assert self.vision_range % 8 == 0 self.deconv_in_height = self.vision_range // 8 self.deconv_in_width = self.deconv_in_height self.n_input_channels_for_deconv = 64 proj2_out_size = 64 * self.deconv_in_height * self.deconv_in_width self.proj2 = nn.Linear(1024, proj2_out_size) if self.dropout > 0: self.dropout1 = nn.Dropout(self.dropout) self.dropout2 = nn.Dropout(self.dropout) # Deconv layers to predict map self.deconv = nn.Sequential( *filter( bool, [ nn.ConvTranspose2d( self.n_input_channels_for_deconv, 32, (4, 4), stride=(2, 2), padding=(1, 1), ), nn.ReLU(), nn.ConvTranspose2d(32, 16, (4, 4), stride=(2, 2), padding=(1, 1)), nn.ReLU(), nn.ConvTranspose2d( 16, self.n_map_channels, (4, 4), stride=(2, 2), padding=(1, 1) ), ], ) ) # Pose Estimator self.pose_conv = nn.Sequential( nn.Conv2d(2 * self.n_map_channels, 64, (4, 4), stride=(2, 2)), nn.ReLU(inplace=True), nn.Conv2d(64, 32, (4, 4), stride=(2, 2)), nn.ReLU(inplace=True), nn.Conv2d(32, 16, (3, 3), stride=(1, 1)), nn.ReLU(inplace=True), nn.Flatten(), ) self.pose_conv_output_dim = ( self.pose_conv.forward( torch.zeros( 1, 2 * self.n_map_channels, self.vision_range, self.vision_range ) ) .view(-1) .size(0) ) # projection layer self.pose_proj1 = nn.Linear(self.pose_conv_output_dim, 1024) self.pose_proj2_x = nn.Linear(1024, 128) self.pose_proj2_z = nn.Linear(1024, 128) self.pose_proj2_o = nn.Linear(1024, 128) self.pose_proj3_x = nn.Linear(128, 1) self.pose_proj3_y = nn.Linear(128, 1) self.pose_proj3_o = nn.Linear(128, 1) if self.dropout > 0: self.pose_dropout1 = nn.Dropout(self.dropout) self.train() @property def device(self): d = self.pose_proj1.weight.get_device() if d < 0: return torch.device("cpu") return torch.device(d) def train(self, mode: bool = True): super().train(mode=mode) if mode and self.freeze_resnet_batchnorm: for module in self.bn_modules: module.eval() @property def map_size(self): return self.map_size_in_cm // self.resolution_in_cm @property def vision_range(self): return self.vision_range_in_cm // self.resolution_in_cm def image_to_egocentric_map_logits( self, images: Optional[torch.Tensor], resnet_image_features: Optional[torch.Tensor] = None, ): if resnet_image_features is None: bs, _, _, _ = images.size() resnet_image_features = self.resnet_normalizer( self.resnet_l5(images[:, :3, :, :]) ) else: bs = resnet_image_features.shape[0] conv_output = self.conv(resnet_image_features) proj1 = F.relu(self.proj1(conv_output.reshape(-1, self.conv_output_size))) if self.dropout > 0: proj1 = self.dropout1(proj1) proj3 = F.relu(self.proj2(proj1)) deconv_input = proj3.view( bs, self.n_input_channels_for_deconv, self.deconv_in_height, self.deconv_in_width, ) deconv_output = self.deconv(deconv_input) return deconv_output def allocentric_map_to_egocentric_view( self, allocentric_map: torch.Tensor, xzr: torch.Tensor, padding_mode: str ): # Index the egocentric viewpoints at the given xzr locations with torch.no_grad(): allocentric_map = allocentric_map.float() xzr = xzr.float() theta = xzr[:, 2].float() * float(np.pi / 180) # Here form the rotation matrix cos_theta = torch.cos(theta) sin_theta = torch.sin(theta) rot_mat = torch.stack( ( torch.stack((cos_theta, -sin_theta), -1), torch.stack((sin_theta, cos_theta), -1), ), 1, ) scaler = 2 * (100 / (self.resolution_in_cm * self.map_size)) offset_to_center_the_agent = scaler * xzr[:, :2].unsqueeze(-1) - 1 offset_to_top_of_image = rot_mat @ torch.FloatTensor([0, 1.0]).unsqueeze( 1 ).to(self.device) rotation_and_translate_mat = torch.cat( ( rot_mat, offset_to_top_of_image + offset_to_center_the_agent, ), dim=-1, ) ego_map = F.grid_sample( allocentric_map, F.affine_grid( rotation_and_translate_mat.to(self.device), allocentric_map.shape, ), padding_mode=padding_mode, align_corners=False, ) vr = self.vision_range half_vr = vr // 2 center = self.map_size_in_cm // (2 * self.resolution_in_cm) cropped = ego_map[:, :, :vr, (center - half_vr) : (center + half_vr)] return cropped def estimate_egocentric_dx_dz_dr( self, map_probs_egocentric: torch.Tensor, last_map_probs_egocentric: torch.Tensor, ): assert last_map_probs_egocentric.shape == map_probs_egocentric.shape pose_est_input = torch.cat( (map_probs_egocentric.detach(), last_map_probs_egocentric.detach()), dim=1 ) pose_conv_output = self.pose_conv(pose_est_input) proj1 = F.relu(self.pose_proj1(pose_conv_output)) if self.dropout > 0: proj1 = self.pose_dropout1(proj1) proj2_x = F.relu(self.pose_proj2_x(proj1)) pred_dx = self.pose_proj3_x(proj2_x) proj2_z = F.relu(self.pose_proj2_z(proj1)) pred_dz = self.pose_proj3_y(proj2_z) proj2_o = F.relu(self.pose_proj2_o(proj1)) pred_do = self.pose_proj3_o(proj2_o) return torch.cat((pred_dx, pred_dz, pred_do), dim=1) @staticmethod def update_allocentric_xzrs_with_egocentric_movement( last_xzrs_allocentric: torch.Tensor, dx_dz_drs_egocentric: torch.Tensor, ): new_xzrs_allocentric = last_xzrs_allocentric.clone() theta = new_xzrs_allocentric[:, 2] * DEGREES_TO_RADIANS sin_theta = torch.sin(theta) cos_theta = torch.cos(theta) new_xzrs_allocentric[:, :2] += torch.matmul( torch.stack([cos_theta, -sin_theta, sin_theta, cos_theta], dim=-1).view( -1, 2, 2 ), dx_dz_drs_egocentric[:, :2].unsqueeze(-1), ).squeeze(-1) new_xzrs_allocentric[:, 2] += dx_dz_drs_egocentric[:, 2] new_xzrs_allocentric[:, 2] = ( torch.fmod(new_xzrs_allocentric[:, 2] - 180.0, 360.0) + 180.0 ) new_xzrs_allocentric[:, 2] = ( torch.fmod(new_xzrs_allocentric[:, 2] + 180.0, 360.0) - 180.0 ) return new_xzrs_allocentric def forward( self, images: Optional[torch.Tensor], last_map_probs_allocentric: Optional[torch.Tensor], last_xzrs_allocentric: Optional[torch.Tensor], dx_dz_drs_egocentric: Optional[torch.Tensor], last_map_logits_egocentric: Optional[torch.Tensor], return_allocentric_maps=True, resnet_image_features: Optional[torch.Tensor] = None, ) -> Dict[str, Any]: """Create allocentric/egocentric maps predictions given RGB image inputs. Here it is assumed that `last_xzrs_allocentric` has been re-centered so that (x, z) == (0,0) corresponds to the top left of the returned map (with increasing x/z moving to the bottom right of the map). Note that all maps are oriented so that: * **Increasing x values** correspond to **increasing columns** in the map(s). * **Increasing z values** correspond to **increasing rows** in the map(s). Note that this may seem a bit weird as: * "north" is pointing downwards in the map, * if you picture yourself as the agent facing north (i.e. down) in the map, then moving to the right from the agent's perspective will correspond to **increasing** which column the agent is at: ``` agent facing downwards - - > (dir. to the right of the agent, i.e. moving right corresponds to +cols) | | v (dir. agent faces, i.e. moving ahead corresponds to +rows) ``` This may be the opposite of what you expect. # Parameters images : A (# batches) x 3 x height x width tensor of RGB images. These should be normalized for use with a resnet model. See [here](https_DOC_COLON_//pytorch.org/vision/stable/models.html) for information (see also the `use_resnet_normalization` parameter of the `allenact.base_abstractions.sensor.RGBSensor` sensor). last_map_probs_allocentric : A (# batches) x (map channels) x (map height) x (map width) tensor representing the colllection of allocentric maps to be updated. last_xzrs_allocentric : A (# batches) x 3 tensor where `last_xzrs_allocentric[_DOC_COLON_, 0]` are the agent's (allocentric) x-coordinates on the previous step, `last_xzrs_allocentric[_DOC_COLON_, 1]` are the agent's (allocentric) z-coordinates from the previous step, and `last_xzrs_allocentric[_DOC_COLON_, 2]` are the agent's rotations (allocentric, in degrees) from the prevoius step. dx_dz_drs_egocentric : A (# batches) x 3 tensor representing the agent's change in x (in meters), z (in meters), and rotation (in degrees) from the previous step. Note that these changes are "egocentric" so that if the agent moved 1 meter ahead from it's perspective this should correspond to a dz of +1.0 regardless of the agent's orientation (similarly moving right would result in a dx of +1.0). This is ignored (and thus can be `None`) if you are using pose estimation (i.e. `self.use_pose_estimation` is `True`) or if `return_allocentric_maps` is `False`. last_map_logits_egocentric : The "egocentric_update" output when calling this function on the last agent's step. I.e. this should be the egocentric map view of the agent from the last step. This is used to compute the change in the agent's position rotation. This is ignored (and thus can be `None`) if you do not wish to estimate the agent's pose (i.e. `self.use_pose_estimation` is `False`). return_allocentric_maps : Whether or not to generate new allocentric maps given `last_map_probs_allocentric` and the new map estimates. Creating these new allocentric maps is expensive so better avoided when not needed. resnet_image_features : Sometimes you may wish to compute the ResNet image features yourself for use in another part of your model. Rather than having to recompute them multiple times, you can instead compute them once and pass them into this forward call (in this case the input `images` parameter is ignored). Note that if you're using the `self.resnet_l5` module to compute these features, be sure to also normalize them with `self.resnet_normalizer` if you have opted to `use_resnet_layernorm` when initializing this module). # Returns A dictionary with keys/values: * "egocentric_update" - The egocentric map view for the given RGB image. This is what should be used for computing losses in general. * "map_logits_probs_update_no_grad" - The egocentric map view after it has been rotated, translated, and moved into a full-sized allocentric map. This map has been detached from the computation graph and so should not be used for gradient computations. This will be `None` if `return_allocentric_maps` was `False`. * "map_logits_probs_no_grad" - The newly updated allocentric map, this corresponds to performing a pointwise maximum between `last_map_probs_allocentric` and the above returned `map_probs_allocentric_update_no_grad`. This will be `None` if `return_allocentric_maps` was `False`. * "dx_dz_dr_egocentric_preds" - The predicted change in x, z, and rotation of the agent (from the egocentric perspective of the agent). * "xzr_allocentric_preds" - The (predicted if `self.use_pose_estimation == True`) allocentric (x, z) position and rotation of the agent. This will equal `None` if `self.use_pose_estimation == False` and `dx_dz_drs_egocentric` is `None`. """ # TODO: For consistency we should update things so that: # "Furthermore, the rotation component of `last_xzrs_allocentric` and `dx_dz_drs_egocentric` # should be specified in **degrees* with positive rotation corresponding to a **CLOCKWISE** # rotation (this is the default used by the many game engines)." map_logits_egocentric = self.image_to_egocentric_map_logits( images=images, resnet_image_features=resnet_image_features ) map_probs_egocentric = torch.sigmoid(map_logits_egocentric) dx_dz_dr_egocentric_preds = None if last_map_logits_egocentric is not None: dx_dz_dr_egocentric_preds = self.estimate_egocentric_dx_dz_dr( map_probs_egocentric=map_probs_egocentric, last_map_probs_egocentric=torch.sigmoid(last_map_logits_egocentric), ) if self.use_pose_estimation: updated_xzrs_allocentrc = ( self.update_allocentric_xzrs_with_egocentric_movement( last_xzrs_allocentric=last_xzrs_allocentric, dx_dz_drs_egocentric=dx_dz_dr_egocentric_preds, ) ) elif dx_dz_drs_egocentric is not None: updated_xzrs_allocentrc = ( self.update_allocentric_xzrs_with_egocentric_movement( last_xzrs_allocentric=last_xzrs_allocentric, dx_dz_drs_egocentric=dx_dz_drs_egocentric, ) ) else: updated_xzrs_allocentrc = None if return_allocentric_maps: # Aggregate egocentric map prediction in the allocentric map # using the predicted pose (if `self.use_pose_estimation`) or the ground # truth pose (if not `self.use_pose_estimation`) with torch.no_grad(): # Rotate and translate the egocentric map view, we do this grid sampling # at the level of probabilities as bad results can occur at the logit level full_size_allocentric_map_probs_update = ( _move_egocentric_map_view_into_allocentric_position( map_probs_egocentric=map_probs_egocentric, xzrs_allocentric=updated_xzrs_allocentrc, allocentric_map_height_width=(self.map_size, self.map_size), resolution_in_cm=self.resolution_in_cm, ) ) map_probs_allocentric = torch.max( last_map_probs_allocentric, full_size_allocentric_map_probs_update ) else: full_size_allocentric_map_probs_update = None map_probs_allocentric = None return { "egocentric_update": map_logits_egocentric, "map_probs_allocentric_update_no_grad": full_size_allocentric_map_probs_update, "map_probs_allocentric_no_grad": map_probs_allocentric, "dx_dz_dr_egocentric_preds": dx_dz_dr_egocentric_preds, "xzr_allocentric_preds": updated_xzrs_allocentrc, } def _move_egocentric_map_view_into_allocentric_position( map_probs_egocentric: torch.Tensor, xzrs_allocentric: torch.Tensor, allocentric_map_height_width: Tuple[int, int], resolution_in_cm: float, ): """Translate/rotate an egocentric map view into an allocentric map. Let's say you have a collection of egocentric maps in a tensor of shape `(# batches) x (# channels) x (# ego rows) x (# ego columns)` where these are "egocentric" as we assume the agent is always at the center of the map and facing "downwards", namely * **ahead** of the agent should correspond to **increasing rows** in the map(s). * **right** of the agent should correspond to **increasing columns** in the map(s). Note that the above is a bit weird as, if you picture yourself as the agent facing downwards in the map, then moving to the right from the agent perspective. Here's how things should look if you plotted one of these egocentric maps: ``` center of map - - > (dir. to the right of the agent, i.e. moving right corresponds to +cols) | | v (dir. agent faces, i.e. moving ahead corresponds to +rows) ``` This function is used to translate/rotate the above ego maps so that they are in the right position/rotation in an allocentric map of size `(# batches) x (# channels) x (# allocentric_map_height_width[0]) x (# allocentric_map_height_width[1])`. Adapted from the get_grid function in https://github.com/devendrachaplot/Neural-SLAM. # Parameters map_probs_egocentric : Egocentric map views. xzrs_allocentric : (# batches)x3 tensor with `xzrs_allocentric[:, 0]` being the x-coordinates (in meters), `xzrs_allocentric[:, 1]` being the z-coordinates (in meters), and `xzrs_allocentric[:, 2]` being the rotation (in degrees) of the agent in the allocentric reference frame. Here it is assumed that `xzrs_allocentric` has been re-centered so that (x, z) == (0,0) corresponds to the top left of the returned map (with increasing x/z moving to the bottom right of the map). Note that positive rotations are in the counterclockwise direction. allocentric_map_height_width : Height/width of the allocentric map to be returned resolution_in_cm : Resolution (in cm) of map to be returned (and of map_probs_egocentric). I.e. `map_probs_egocentric[0,0,0:1,0:1]` should correspond to a `resolution_in_cm x resolution_in_cm` square on the ground plane in the world. # Returns `(# batches) x (# channels) x (# allocentric_map_height_width[0]) x (# allocentric_map_height_width[1])` tensor where the input `map_probs_egocentric` maps have been rotated/translated so that they are in the positions specified by `xzrs_allocentric`. """ # TODO: For consistency we should update the rotations so they are in the clockwise direction. # First we place the egocentric map view into the center # of a map that has the same size as the allocentric map nbatch, c, ego_h, ego_w = cast( Tuple[int, int, int, int], map_probs_egocentric.shape ) allo_h, allo_w = allocentric_map_height_width max_view_range = math.sqrt((ego_w / 2.0) ** 2 + ego_h**2) if min(allo_h, allo_w) / 2.0 < max_view_range: raise NotImplementedError( f"The shape of your egocentric view (ego_h, ego_w)==({ego_h, ego_w})" f" is too large relative the size of the allocentric map (allo_h, allo_w)==({allo_h}, {allo_w})." f" The height/width of your allocentric map should be at least {2 * max_view_range} to allow" f" for no information to be lost when rotating the egocentric map." ) full_size_ego_map_update_probs = map_probs_egocentric.new( nbatch, c, *allocentric_map_height_width ).fill_(0) assert (ego_h % 2, ego_w % 2, allo_h % 2, allo_w % 2) == ( 0, ) * 4, "All map heights/widths should be divisible by 2." x1 = allo_w // 2 - ego_w // 2 x2 = x1 + ego_w z1 = allo_h // 2 z2 = z1 + ego_h full_size_ego_map_update_probs[:, :, z1:z2, x1:x2] = map_probs_egocentric # Now we'll rotate and translate `full_size_ego_map_update_probs` # so that the egocentric map view is positioned where it should be # in the allocentric coordinate frame # To do this we first need to rescale our allocentric xz coordinates # so that the center of the map is (0,0) and the top left corner is (-1, -1) # as this is what's expected by the `affine_grid` function below. rescaled_xzrs_allocentric = xzrs_allocentric.clone().detach().float() rescaled_xzrs_allocentric[:, :2] *= ( 100.0 / resolution_in_cm ) # Put x / z into map units rather than meters rescaled_xzrs_allocentric[:, 0] /= allo_w / 2 # x corresponds to columns rescaled_xzrs_allocentric[:, 1] /= allo_h / 2 # z corresponds to rows rescaled_xzrs_allocentric[:, :2] -= 1.0 # Re-center x = rescaled_xzrs_allocentric[:, 0] z = rescaled_xzrs_allocentric[:, 1] theta = ( -rescaled_xzrs_allocentric[:, 2] * DEGREES_TO_RADIANS ) # Notice the negative sign cos_theta = theta.cos() sin_theta = theta.sin() zeroes = torch.zeros_like(cos_theta) ones = torch.ones_like(cos_theta) theta11 = torch.stack([cos_theta, -sin_theta, zeroes], 1) theta12 = torch.stack([sin_theta, cos_theta, zeroes], 1) theta1 = torch.stack([theta11, theta12], 1) theta21 = torch.stack([ones, zeroes, x], 1) theta22 = torch.stack([zeroes, ones, z], 1) theta2 = torch.stack([theta21, theta22], 1) grid_size = [nbatch, c, allo_h, allo_w] rot_grid = F.affine_grid(theta1, grid_size) trans_grid = F.affine_grid(theta2, grid_size) return F.grid_sample( F.grid_sample( full_size_ego_map_update_probs, rot_grid, padding_mode="zeros", align_corners=False, ), trans_grid, padding_mode="zeros", align_corners=False, ) ================================================ FILE: allenact/embodiedai/mapping/mapping_utils/__init__.py ================================================ ================================================ FILE: allenact/embodiedai/mapping/mapping_utils/map_builders.py ================================================ # MIT License # # Original Copyright (c) 2020 Devendra Chaplot # # Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import random from typing import Optional, Sequence, Union, Dict import cv2 import numpy as np import torch import torch.nn.functional as F from allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import ( depth_frame_to_world_space_xyz, project_point_cloud_to_map, ) class BinnedPointCloudMapBuilder(object): """Class used to iteratively construct a map of "free space" based on input depth maps (i.e. pointclouds). Adapted from https://github.com/devendrachaplot/Neural-SLAM This class can be used to (iteratively) construct a metric map of free space in an environment as an agent moves around. After every step the agent takes, you should call the `update` function and pass the agent's egocentric depth image along with the agent's new position. This depth map will be converted into a pointcloud, binned along the up/down axis, and then projected onto a 3-dimensional tensor of shape (HxWxC) whose where HxW represent the ground plane and where C equals the number of bins the up-down coordinate was binned into. This 3d map counts the number of points in each bin. Thus a lack of points within a region can be used to infer that that region is free space. # Attributes fov : FOV of the camera used to produce the depth images given when calling `update`. vision_range_in_map_units : The maximum distance (in number of rows/columns) that will be updated when calling `update`, points outside of this map vision range are ignored. map_size_in_cm : Total map size in cm. resolution_in_cm : Number of cm per row/column in the map. height_bins : The bins used to bin the up-down coordinate (for us the y-coordinate). For example, if `height_bins = [0.1, 1]` then all y-values < 0.1 will be mapped to 0, all y values in [0.1, 1) will be mapped to 1, and all y-values >= 1 will be mapped to 2. **Importantly:** these y-values will first be recentered by the `min_xyz` value passed when calling `reset(...)`. device : A `torch.device` on which to run computations. If this device is a GPU you can potentially obtain significant speed-ups. """ def __init__( self, fov: float, vision_range_in_cm: int, map_size_in_cm: int, resolution_in_cm: int, height_bins: Sequence[float], return_egocentric_local_context: bool = False, device: torch.device = torch.device("cpu"), ): assert vision_range_in_cm % resolution_in_cm == 0 self.fov = fov self.vision_range_in_map_units = vision_range_in_cm // resolution_in_cm self.map_size_in_cm = map_size_in_cm self.resolution_in_cm = resolution_in_cm self.height_bins = height_bins self.device = device self.return_egocentric_local_context = return_egocentric_local_context self.binned_point_cloud_map = np.zeros( ( self.map_size_in_cm // self.resolution_in_cm, self.map_size_in_cm // self.resolution_in_cm, len(self.height_bins) + 1, ), dtype=np.float32, ) self.min_xyz: Optional[np.ndarray] = None def update( self, depth_frame: np.ndarray, camera_xyz: np.ndarray, camera_rotation: float, camera_horizon: float, ) -> Dict[str, np.ndarray]: """Updates the map with the input depth frame from the agent. See the `allenact.embodiedai.mapping.mapping_utils.point_cloud_utils.project_point_cloud_to_map` function for more information input parameter definitions. **We assume that the input `depth_frame` has depths recorded in meters**. # Returns Let `map_size = self.map_size_in_cm // self.resolution_in_cm`. Returns a dictionary with keys-values: * `"egocentric_update"` - A tensor of shape `(vision_range_in_map_units)x(vision_range_in_map_units)x(len(self.height_bins) + 1)` corresponding to the binned pointcloud after having been centered on the agent and rotated so that points ahead of the agent correspond to larger row indices and points further to the right of the agent correspond to larger column indices. Note that by "centered" we mean that one can picture the agent as being positioned at (0, vision_range_in_map_units/2) and facing downward. Each entry in this tensor is a count equaling the number of points in the pointcloud that, once binned, fell into this entry. This is likely the output you want to use if you want to build a model to predict free space from an image. * `"allocentric_update"` - A `(map_size)x(map_size)x(len(self.height_bins) + 1)` corresponding to `"egocentric_update"` but rotated to the world-space coordinates. This `allocentric_update` is what is used to update the internally stored representation of the map. * `"map"` - A `(map_size)x(map_size)x(len(self.height_bins) + 1)` tensor corresponding to the sum of all `"allocentric_update"` values since the last `reset()`. ``` """ with torch.no_grad(): assert self.min_xyz is not None, "Please call `reset` before `update`." camera_xyz = ( torch.from_numpy(camera_xyz - self.min_xyz).float().to(self.device) ) try: depth_frame = torch.from_numpy(depth_frame).to(self.device) except ValueError: depth_frame = torch.from_numpy(depth_frame.copy()).to(self.device) depth_frame[ depth_frame > self.vision_range_in_map_units * self.resolution_in_cm / 100 ] = np.NaN world_space_point_cloud = depth_frame_to_world_space_xyz( depth_frame=depth_frame, camera_world_xyz=camera_xyz, rotation=camera_rotation, horizon=camera_horizon, fov=self.fov, ) world_binned_map_update = project_point_cloud_to_map( xyz_points=world_space_point_cloud, bin_axis="y", bins=self.height_bins, map_size=self.binned_point_cloud_map.shape[0], resolution_in_cm=self.resolution_in_cm, flip_row_col=True, ) # Center the cloud on the agent recentered_point_cloud = world_space_point_cloud - ( torch.FloatTensor([1.0, 0.0, 1.0]).to(self.device) * camera_xyz ).reshape((1, 1, 3)) # Rotate the cloud so that positive-z is the direction the agent is looking theta = ( np.pi * camera_rotation / 180 ) # No negative since THOR rotations are already backwards cos_theta = np.cos(theta) sin_theta = np.sin(theta) rotation_transform = torch.FloatTensor( [ [cos_theta, 0, -sin_theta], [0, 1, 0], # unchanged [sin_theta, 0, cos_theta], ] ).to(self.device) rotated_point_cloud = recentered_point_cloud @ rotation_transform.T xoffset = (self.map_size_in_cm / 100) / 2 agent_centric_point_cloud = rotated_point_cloud + torch.FloatTensor( [xoffset, 0, 0] ).to(self.device) allocentric_update_numpy = world_binned_map_update.cpu().numpy() self.binned_point_cloud_map = ( self.binned_point_cloud_map + allocentric_update_numpy ) agent_centric_binned_map = project_point_cloud_to_map( xyz_points=agent_centric_point_cloud, bin_axis="y", bins=self.height_bins, map_size=self.binned_point_cloud_map.shape[0], resolution_in_cm=self.resolution_in_cm, flip_row_col=True, ) vr = self.vision_range_in_map_units vr_div_2 = self.vision_range_in_map_units // 2 width_div_2 = agent_centric_binned_map.shape[1] // 2 agent_centric_binned_map = agent_centric_binned_map[ :vr, (width_div_2 - vr_div_2) : (width_div_2 + vr_div_2), : ] to_return = { "egocentric_update": agent_centric_binned_map.cpu().numpy(), "allocentric_update": allocentric_update_numpy, "map": self.binned_point_cloud_map, } if self.return_egocentric_local_context: # See the update function of the semantic map sensor for in depth comments regarding the below # Essentially we are simply rotating the full map into the orientation of the agent and then # selecting a smaller region around the agent. theta = -np.pi * camera_rotation / 180 cos_theta = np.cos(theta) sin_theta = np.sin(theta) rot_mat = torch.FloatTensor( [[cos_theta, -sin_theta], [sin_theta, cos_theta]] ).to(self.device) move_back_offset = ( -0.5 * (self.vision_range_in_map_units * self.resolution_in_cm / 100) ) * ( rot_mat @ torch.tensor( [0, 1], dtype=torch.float, device=self.device ).unsqueeze(-1) ) map_size = self.binned_point_cloud_map.shape[0] scaler = 2 * (100 / (self.resolution_in_cm * map_size)) offset_to_center_the_agent = ( scaler * ( torch.tensor( [ camera_xyz[0], camera_xyz[2], ], dtype=torch.float, device=self.device, ).unsqueeze(-1) + move_back_offset ) - 1 ) offset_to_top_of_image = rot_mat @ torch.FloatTensor( [0, 1.0] ).unsqueeze(1).to(self.device) rotation_and_translate_mat = torch.cat( ( rot_mat, offset_to_top_of_image + offset_to_center_the_agent, ), dim=1, ) full_map_tensor = ( torch.tensor( self.binned_point_cloud_map, dtype=torch.float, device=self.device, ) .unsqueeze(0) .permute(0, 3, 1, 2) ) full_ego_map = ( F.grid_sample( full_map_tensor, F.affine_grid( rotation_and_translate_mat.to(self.device).unsqueeze(0), full_map_tensor.shape, align_corners=False, ), align_corners=False, ) .squeeze(0) .permute(1, 2, 0) ) egocentric_local_context = full_ego_map[ :vr, (width_div_2 - vr_div_2) : (width_div_2 + vr_div_2), : ] to_return["egocentric_local_context"] = ( egocentric_local_context.cpu().numpy() ) return to_return def reset(self, min_xyz: np.ndarray): """Reset the map. Resets the internally stored map. # Parameters min_xyz : An array of size (3,) corresponding to the minimum possible x, y, and z values that will be observed as a point in a pointcloud when calling `.update(...)`. The (world-space) maps returned by calls to `update` will have been normalized so the (0,0,:) entry corresponds to these minimum values. """ self.min_xyz = min_xyz self.binned_point_cloud_map = np.zeros_like(self.binned_point_cloud_map) class ObjectHull2d: def __init__( self, object_id: str, object_type: str, hull_points: Union[np.ndarray, Sequence[Sequence[float]]], ): """A class used to represent 2d convex hulls of objects when projected to the ground plane. # Parameters object_id : A unique id for the object. object_type : The type of the object. hull_points : A Nx2 matrix with `hull_points[:, 0]` being the x coordinates and `hull_points[:, 1]` being the `z` coordinates (this is using the Unity game engine conventions where the `y` axis is up/down). """ self.object_id = object_id self.object_type = object_type self.hull_points = ( hull_points if isinstance(hull_points, np.ndarray) else np.array(hull_points) ) class SemanticMapBuilder(object): """Class used to iteratively construct a semantic map based on input depth maps (i.e. pointclouds). Adapted from https://github.com/devendrachaplot/Neural-SLAM This class can be used to (iteratively) construct a semantic map of objects in the environment. This map is similar to that generated by `BinnedPointCloudMapBuilder` (see its documentation for more information) but the various channels correspond to different object types. Thus if the `(i,j,k)` entry of a map generated by this function is `True`, this means that an object of type `k` is present in position `i,j` in the map. In particular, by "present" we mean that, after projecting the object to the ground plane and taking the convex hull of the resulting 2d object, a non-trivial portion of this convex hull overlaps the `i,j` position. For attribute information, see the documentation of the `BinnedPointCloudMapBuilder` class. The only attribute present in this class that is not present in `BinnedPointCloudMapBuilder` is `ordered_object_types` which corresponds to a list of unique object types where object type `ordered_object_types[i]` will correspond to the `i`th channel of the map generated by this class. """ def __init__( self, fov: float, vision_range_in_cm: int, map_size_in_cm: int, resolution_in_cm: int, ordered_object_types: Sequence[str], device: torch.device = torch.device("cpu"), ): self.fov = fov self.vision_range_in_map_units = vision_range_in_cm // resolution_in_cm self.map_size_in_cm = map_size_in_cm self.resolution_in_cm = resolution_in_cm self.ordered_object_types = tuple(ordered_object_types) self.device = device self.object_type_to_index = { ot: i for i, ot in enumerate(self.ordered_object_types) } self.ground_truth_semantic_map = np.zeros( ( self.map_size_in_cm // self.resolution_in_cm, self.map_size_in_cm // self.resolution_in_cm, len(self.ordered_object_types), ), dtype=np.uint8, ) self.explored_mask = np.zeros( ( self.map_size_in_cm // self.resolution_in_cm, self.map_size_in_cm // self.resolution_in_cm, 1, ), dtype=bool, ) self.min_xyz: Optional[np.ndarray] = None @staticmethod def randomly_color_semantic_map( map: Union[np.ndarray, torch.Tensor], threshold: float = 0.5, seed: int = 1 ) -> np.ndarray: if not isinstance(map, np.ndarray): map = np.array(map) rnd = random.Random(seed) semantic_int_mat = ( (map >= threshold) * np.array(list(range(1, map.shape[-1] + 1))).reshape((1, 1, -1)) ).max(-1) # noinspection PyTypeChecker return np.uint8( np.array( [(0, 0, 0)] + [ tuple(rnd.randint(0, 256) for _ in range(3)) for _ in range(map.shape[-1]) ] )[semantic_int_mat] ) def _xzs_to_colrows(self, xzs: np.ndarray): height, width, _ = self.ground_truth_semantic_map.shape return np.clip( np.int32( ( (100 / self.resolution_in_cm) * (xzs - np.array([[self.min_xyz[0], self.min_xyz[2]]])) ) ), a_min=0, a_max=np.array( [width - 1, height - 1] ), # width then height as we're returns cols then rows ) def build_ground_truth_map(self, object_hulls: Sequence[ObjectHull2d]): self.ground_truth_semantic_map.fill(0) height, width, _ = self.ground_truth_semantic_map.shape for object_hull in object_hulls: ot = object_hull.object_type if ot in self.object_type_to_index: ind = self.object_type_to_index[ot] self.ground_truth_semantic_map[:, :, ind : (ind + 1)] = ( cv2.fillConvexPoly( img=np.array( self.ground_truth_semantic_map[:, :, ind : (ind + 1)], dtype=np.uint8, ), points=self._xzs_to_colrows(np.array(object_hull.hull_points)), color=255, ) ) def update( self, depth_frame: np.ndarray, camera_xyz: np.ndarray, camera_rotation: float, camera_horizon: float, ) -> Dict[str, np.ndarray]: """Updates the map with the input depth frame from the agent. See the documentation for `BinnedPointCloudMapBuilder.update`, the inputs and outputs are similar except that channels are used to represent the presence/absence of objects of given types. Unlike `BinnedPointCloudMapBuilder.update`, this function also returns two masks with keys `"egocentric_mask"` and `"mask"` that can be used to determine what portions of the map have been observed by the agent so far in the egocentric and world-space reference frames respectively. """ with torch.no_grad(): assert self.min_xyz is not None camera_xyz = torch.from_numpy(camera_xyz - self.min_xyz).to(self.device) map_size = self.ground_truth_semantic_map.shape[0] depth_frame = torch.from_numpy(depth_frame).to(self.device) depth_frame[ depth_frame > self.vision_range_in_map_units * self.resolution_in_cm / 100 ] = np.NaN world_space_point_cloud = depth_frame_to_world_space_xyz( depth_frame=depth_frame, camera_world_xyz=camera_xyz, rotation=camera_rotation, horizon=camera_horizon, fov=self.fov, ) world_newly_explored = ( project_point_cloud_to_map( xyz_points=world_space_point_cloud, bin_axis="y", bins=[], map_size=map_size, resolution_in_cm=self.resolution_in_cm, flip_row_col=True, ) > 0.001 ) world_update_and_mask = torch.cat( ( torch.logical_and( torch.from_numpy(self.ground_truth_semantic_map).to( self.device ), world_newly_explored, ), world_newly_explored, ), dim=-1, ).float() world_update_and_mask_for_sample = world_update_and_mask.unsqueeze( 0 ).permute(0, 3, 1, 2) # We now use grid sampling to rotate world_update_for_sample into the egocentric coordinate # frame of the agent so that the agent's forward direction is downwards in the tensor # (and it's right side is to the right in the image, this means that right/left # when taking the perspective of the agent in the image). This convention aligns with # what's expected by grid_sample where +x corresponds to +cols and +z corresponds to +rows. # Here also the rows/cols have been normalized so that the center of the image is at (0,0) # and the bottom right is at (1,1). # Mentally you can think of the output from the F.affine_grid function as you wanting # rotating/translating an axis-aligned square on the image-to-be-sampled and then # copying whatever is in this square to a new image. Note that the translation always # happens in the global reference frame after the rotation. We'll start by rotating # the square so that the the agent's z direction is downwards in the image. # Since the global axis of the map and the grid sampling are aligned, this requires # rotating the square by the rotation of the agent. As rotation is negative the usual # standard in THOR, we need to negate the rotation of the agent. theta = -np.pi * camera_rotation / 180 # Here form the rotation matrix cos_theta = np.cos(theta) sin_theta = np.sin(theta) rot_mat = torch.FloatTensor( [[cos_theta, -sin_theta], [sin_theta, cos_theta]] ).to(self.device) # Now we need to figure out the translation. For an intuitive understanding, we break this # translation into two different "offsets". The first offset centers the square on the # agent's current location: scaler = 2 * (100 / (self.resolution_in_cm * map_size)) offset_to_center_the_agent = ( scaler * torch.FloatTensor([camera_xyz[0], camera_xyz[2]]) .unsqueeze(-1) .to(self.device) - 1 ) # The second offset moves the square in the direction of the agent's z direction # so that the output image will have the agent's view starting directly at the # top of the image. offset_to_top_of_image = rot_mat @ torch.FloatTensor([0, 1.0]).unsqueeze( 1 ).to(self.device) rotation_and_translate_mat = torch.cat( ( rot_mat, offset_to_top_of_image + offset_to_center_the_agent, ), dim=1, ) ego_update_and_mask = F.grid_sample( world_update_and_mask_for_sample.to(self.device), F.affine_grid( rotation_and_translate_mat.to(self.device).unsqueeze(0), world_update_and_mask_for_sample.shape, align_corners=False, ), align_corners=False, ) # All that's left now is to crop out the portion of the transformed tensor that we actually # care about (i.e. the portion corresponding to the agent's `self.vision_range_in_map_units`. vr = self.vision_range_in_map_units half_vr = vr // 2 center = self.map_size_in_cm // (2 * self.resolution_in_cm) cropped = ego_update_and_mask[ :, :, :vr, (center - half_vr) : (center + half_vr) ] np.logical_or( self.explored_mask, world_newly_explored.cpu().numpy(), out=self.explored_mask, ) return { "egocentric_update": cropped[0, :-1].permute(1, 2, 0).cpu().numpy(), "egocentric_mask": (cropped[0, -1:].view(vr, vr, 1) > 0.001) .cpu() .numpy(), "explored_mask": np.array(self.explored_mask), "map": np.logical_and( self.explored_mask, (self.ground_truth_semantic_map > 0) ), } def reset(self, min_xyz: np.ndarray, object_hulls: Sequence[ObjectHull2d]): """Reset the map. Resets the internally stored map. # Parameters min_xyz : An array of size (3,) corresponding to the minimum possible x, y, and z values that will be observed as a point in a pointcloud when calling `.update(...)`. The (world-space) maps returned by calls to `update` will have been normalized so the (0,0,:) entry corresponds to these minimum values. object_hulls : The object hulls corresponding to objects in the scene. These will be used to construct the map. """ self.min_xyz = min_xyz self.build_ground_truth_map(object_hulls=object_hulls) ================================================ FILE: allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.py ================================================ # MIT License # # Original Copyright (c) 2020 Devendra Chaplot # # Modified work Copyright (c) 2021 Allen Institute for Artificial Intelligence # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import math from typing import Optional, Sequence, cast import numpy as np import torch from allenact_plugins.ithor_plugin.ithor_util import vertical_to_horizontal_fov def camera_space_xyz_to_world_xyz( camera_space_xyzs: torch.Tensor, camera_world_xyz: torch.Tensor, rotation: float, horizon: float, ) -> torch.Tensor: """Transforms xyz coordinates in the camera's coordinate frame to world- space (global) xyz frame. This code has been adapted from https://github.com/devendrachaplot/Neural-SLAM. **IMPORTANT:** We use the conventions from the Unity game engine. In particular: * A rotation of 0 corresponds to facing north. * Positive rotations correspond to CLOCKWISE rotations. That is a rotation of 90 degrees corresponds to facing east. **THIS IS THE OPPOSITE CONVENTION OF THE ONE GENERALLY USED IN MATHEMATICS.** * When facing NORTH (rotation==0) moving ahead by 1 meter results in the the z coordinate increasing by 1. Moving to the right by 1 meter corresponds to increasing the x coordinate by 1. Finally moving upwards by 1 meter corresponds to increasing the y coordinate by 1. **Having x,z as the ground plane in this way is common in computer graphics but is different than the usual mathematical convention of having z be "up".** * The horizon corresponds to how far below the horizontal the camera is facing. I.e. a horizon of 30 corresponds to the camera being angled downwards at an angle of 30 degrees. # Parameters camera_space_xyzs : A 3xN matrix of xyz coordinates in the camera's reference frame. Here `x, y, z = camera_space_xyzs[:, i]` should equal the xyz coordinates for the ith point. camera_world_xyz : The camera's xyz position in the world reference frame. rotation : The world-space rotation (in degrees) of the camera. horizon : The horizon (in degrees) of the camera. # Returns 3xN tensor with entry [:, i] is the xyz world-space coordinate corresponding to the camera-space coordinate camera_space_xyzs[:, i] """ # Adapted from https://github.com/devendrachaplot/Neural-SLAM. # First compute the transformation that points undergo # due to the camera's horizon psi = -horizon * np.pi / 180 cos_psi = np.cos(psi) sin_psi = np.sin(psi) # fmt: off horizon_transform = camera_space_xyzs.new( [ [1, 0, 0], # unchanged [0, cos_psi, sin_psi], [0, -sin_psi, cos_psi,], ], ) # fmt: on # Next compute the transformation that points undergo # due to the agent's rotation about the y-axis phi = -rotation * np.pi / 180 cos_phi = np.cos(phi) sin_phi = np.sin(phi) # fmt: off rotation_transform = camera_space_xyzs.new( [ [cos_phi, 0, -sin_phi], [0, 1, 0], # unchanged [sin_phi, 0, cos_phi],], ) # fmt: on # Apply the above transformations view_points = (rotation_transform @ horizon_transform) @ camera_space_xyzs # Translate the points w.r.t. the camera's position in world space. world_points = view_points + camera_world_xyz[:, None] return world_points def depth_frame_to_camera_space_xyz( depth_frame: torch.Tensor, mask: Optional[torch.Tensor], fov: float = 90 ) -> torch.Tensor: """Transforms a input depth map into a collection of xyz points (i.e. a point cloud) in the camera's coordinate frame. # Parameters depth_frame : A square depth map, i.e. an MxM matrix with entry `depth_frame[i, j]` equaling the distance from the camera to nearest surface at pixel (i,j). mask : An optional boolean mask of the same size (MxM) as the input depth. Only values where this mask are true will be included in the returned matrix of xyz coordinates. If `None` then no pixels will be masked out (so the returned matrix of xyz points will have dimension 3x(M*M) fov: The field of view of the camera. # Returns A 3xN matrix with entry [:, i] equalling a the xyz coordinates (in the camera's coordinate frame) of a point in the point cloud corresponding to the input depth frame. """ h, w = depth_frame.shape[:2] if mask is None: mask = torch.ones_like(depth_frame, dtype=torch.bool) # pixel centers camera_space_yx_offsets = ( torch.stack(torch.where(mask)) + 0.5 # Offset by 0.5 so that we are in the middle of the pixel ) # Subtract center camera_space_yx_offsets[:1] -= h / 2.0 camera_space_yx_offsets[1:] -= w / 2.0 # Make "up" in y be positive camera_space_yx_offsets[0, :] *= -1 # Put points on the clipping plane camera_space_yx_offsets[:1] *= (2.0 / h) * math.tan((fov / 2) / 180 * math.pi) camera_space_yx_offsets[1:] *= (2.0 / w) * math.tan( (vertical_to_horizontal_fov(fov, height=h, width=w) / 2) / 180 * math.pi ) # noinspection PyArgumentList camera_space_xyz = torch.cat( [ camera_space_yx_offsets[1:, :], # This is x camera_space_yx_offsets[:1, :], # This is y torch.ones_like(camera_space_yx_offsets[:1, :]), ], axis=0, ) return camera_space_xyz * depth_frame[mask][None, :] def depth_frame_to_world_space_xyz( depth_frame: torch.Tensor, camera_world_xyz: torch.Tensor, rotation: float, horizon: float, fov: float, ): """Transforms a input depth map into a collection of xyz points (i.e. a point cloud) in the world-space coordinate frame. **IMPORTANT:** We use the conventions from the Unity game engine. In particular: * A rotation of 0 corresponds to facing north. * Positive rotations correspond to CLOCKWISE rotations. That is a rotation of 90 degrees corresponds to facing east. **THIS IS THE OPPOSITE CONVENTION OF THE ONE GENERALLY USED IN MATHEMATICS.** * When facing NORTH (rotation==0) moving ahead by 1 meter results in the the z coordinate increasing by 1. Moving to the right by 1 meter corresponds to increasing the x coordinate by 1. Finally moving upwards by 1 meter corresponds to increasing the y coordinate by 1. **Having x,z as the ground plane in this way is common in computer graphics but is different than the usual mathematical convention of having z be "up".** * The horizon corresponds to how far below the horizontal the camera is facing. I.e. a horizon of 30 corresponds to the camera being angled downwards at an angle of 30 degrees. # Parameters depth_frame : A square depth map, i.e. an MxM matrix with entry `depth_frame[i, j]` equaling the distance from the camera to nearest surface at pixel (i,j). mask : An optional boolean mask of the same size (MxM) as the input depth. Only values where this mask are true will be included in the returned matrix of xyz coordinates. If `None` then no pixels will be masked out (so the returned matrix of xyz points will have dimension 3x(M*M) camera_space_xyzs : A 3xN matrix of xyz coordinates in the camera's reference frame. Here `x, y, z = camera_space_xyzs[:, i]` should equal the xyz coordinates for the ith point. camera_world_xyz : The camera's xyz position in the world reference frame. rotation : The world-space rotation (in degrees) of the camera. horizon : The horizon (in degrees) of the camera. fov: The field of view of the camera. # Returns A 3xN matrix with entry [:, i] equalling a the xyz coordinates (in the world coordinate frame) of a point in the point cloud corresponding to the input depth frame. """ camera_space_xyz = depth_frame_to_camera_space_xyz( depth_frame=depth_frame, mask=None, fov=fov ) world_points = camera_space_xyz_to_world_xyz( camera_space_xyzs=camera_space_xyz, camera_world_xyz=camera_world_xyz, rotation=rotation, horizon=horizon, ) return world_points.view(3, *depth_frame.shape).permute(1, 2, 0) def project_point_cloud_to_map( xyz_points: torch.Tensor, bin_axis: str, bins: Sequence[float], map_size: int, resolution_in_cm: int, flip_row_col: bool, ): """Bins an input point cloud into a map tensor with the bins equaling the channels. This code has been adapted from https://github.com/devendrachaplot/Neural-SLAM. # Parameters xyz_points : (x,y,z) pointcloud(s) as a torch.Tensor of shape (... x height x width x 3). All operations are vectorized across the `...` dimensions. bin_axis : Either "x", "y", or "z", the axis which should be binned by the values in `bins`. If you have generated your point clouds with any of the other functions in the `point_cloud_utils` module you almost certainly want this to be "y" as this is the default upwards dimension. bins: The values by which to bin along `bin_axis`, see the `bins` parameter of `np.digitize` for more info. map_size : The axes not specified by `bin_axis` will be be divided by `resolution_in_cm / 100` and then rounded to the nearest integer. They are then expected to have their values within the interval [0, ..., map_size - 1]. resolution_in_cm: The resolution_in_cm, in cm, of the map output from this function. Every grid square of the map corresponds to a (`resolution_in_cm`x`resolution_in_cm`) square in space. flip_row_col: Should the rows/cols of the map be flipped? See the 'Returns' section below for more info. # Returns A collection of maps of shape (... x map_size x map_size x (len(bins)+1)), note that bin_axis has been moved to the last index of this returned map, the other two axes stay in their original order unless `flip_row_col` has been called in which case they are reversed (useful as often rows should correspond to y or z instead of x). """ bin_dim = ["x", "y", "z"].index(bin_axis) start_shape = xyz_points.shape xyz_points = xyz_points.reshape([-1, *start_shape[-3:]]) num_clouds, h, w, _ = xyz_points.shape if not flip_row_col: new_order = [i for i in [0, 1, 2] if i != bin_dim] + [bin_dim] else: new_order = [i for i in [2, 1, 0] if i != bin_dim] + [bin_dim] uvw_points = cast( torch.Tensor, torch.stack([xyz_points[..., i] for i in new_order], dim=-1) ) num_bins = len(bins) + 1 isnotnan = ~torch.isnan(xyz_points[..., 0]) uvw_points_binned: torch.Tensor = torch.cat( ( torch.round(100 * uvw_points[..., :-1] / resolution_in_cm).long(), torch.bucketize( uvw_points[..., -1:].contiguous(), boundaries=uvw_points.new(bins) ), ), dim=-1, ) maxes = ( xyz_points.new() .long() .new([map_size, map_size, num_bins]) .reshape((1, 1, 1, 3)) ) isvalid = torch.logical_and( torch.logical_and( (uvw_points_binned >= 0).all(-1), (uvw_points_binned < maxes).all(-1), ), isnotnan, ) uvw_points_binned_with_index_mat = torch.cat( ( torch.repeat_interleave( torch.arange(0, num_clouds).to(xyz_points.device), h * w ).reshape(-1, 1), uvw_points_binned.reshape(-1, 3), ), dim=1, ) uvw_points_binned_with_index_mat[~isvalid.reshape(-1), :] = 0 ind = ( uvw_points_binned_with_index_mat[:, 0] * (map_size * map_size * num_bins) + uvw_points_binned_with_index_mat[:, 1] * (map_size * num_bins) + uvw_points_binned_with_index_mat[:, 2] * num_bins + uvw_points_binned_with_index_mat[:, 3] ) ind[~isvalid.reshape(-1)] = 0 count = torch.bincount( ind.view(-1), isvalid.view(-1).long(), minlength=num_clouds * map_size * map_size * num_bins, ) return count.view(*start_shape[:-3], map_size, map_size, num_bins) ################ # FOR DEBUGGNG # ################ # The below functions are versions of the above which, because of their reliance on # numpy functions, cannot use GPU acceleration. These are possibly useful for debugging, # performance comparisons, or for validating that the above GPU variants work properly. def _cpu_only_camera_space_xyz_to_world_xyz( camera_space_xyzs: np.ndarray, camera_world_xyz: np.ndarray, rotation: float, horizon: float, ): # Adapted from https://github.com/devendrachaplot/Neural-SLAM. # view_position = 3, world_points = 3 x N # NOTE: camera_position is not equal to agent_position!! # First compute the transformation that points undergo # due to the camera's horizon psi = -horizon * np.pi / 180 cos_psi = np.cos(psi) sin_psi = np.sin(psi) # fmt: off horizon_transform = np.array( [ [1, 0, 0], # unchanged [0, cos_psi, sin_psi], [0, -sin_psi, cos_psi,], ], np.float64, ) # fmt: on # Next compute the transformation that points undergo # due to the agent's rotation about the y-axis phi = -rotation * np.pi / 180 cos_phi = np.cos(phi) sin_phi = np.sin(phi) # fmt: off rotation_transform = np.array( [ [cos_phi, 0, -sin_phi], [0, 1, 0], # unchanged [sin_phi, 0, cos_phi],], np.float64, ) # fmt: on # Apply the above transformations view_points = (rotation_transform @ horizon_transform) @ camera_space_xyzs # Translate the points w.r.t. the camera's position in world space. world_points = view_points + camera_world_xyz[:, None] return world_points def _cpu_only_depth_frame_to_camera_space_xyz( depth_frame: np.ndarray, mask: Optional[np.ndarray], fov: float = 90 ): """""" assert ( len(depth_frame.shape) == 2 and depth_frame.shape[0] == depth_frame.shape[1] ), f"depth has shape {depth_frame.shape}, we only support (N, N) shapes for now." resolution = depth_frame.shape[0] if mask is None: mask = np.ones(depth_frame.shape, dtype=bool) # pixel centers camera_space_yx_offsets = ( np.stack(np.where(mask)) + 0.5 # Offset by 0.5 so that we are in the middle of the pixel ) # Subtract center camera_space_yx_offsets -= resolution / 2.0 # Make "up" in y be positive camera_space_yx_offsets[0, :] *= -1 # Put points on the clipping plane camera_space_yx_offsets *= (2.0 / resolution) * math.tan((fov / 2) / 180 * math.pi) camera_space_xyz = np.concatenate( [ camera_space_yx_offsets[1:, :], # This is x camera_space_yx_offsets[:1, :], # This is y np.ones_like(camera_space_yx_offsets[:1, :]), ], axis=0, ) return camera_space_xyz * depth_frame[mask][None, :] def _cpu_only_depth_frame_to_world_space_xyz( depth_frame: np.ndarray, camera_world_xyz: np.ndarray, rotation: float, horizon: float, fov: float, ): camera_space_xyz = _cpu_only_depth_frame_to_camera_space_xyz( depth_frame=depth_frame, mask=None, fov=fov ) world_points = _cpu_only_camera_space_xyz_to_world_xyz( camera_space_xyzs=camera_space_xyz, camera_world_xyz=camera_world_xyz, rotation=rotation, horizon=horizon, ) return world_points.reshape((3, *depth_frame.shape)).transpose((1, 2, 0)) def _cpu_only_project_point_cloud_to_map( xyz_points: np.ndarray, bin_axis: str, bins: Sequence[float], map_size: int, resolution_in_cm: int, flip_row_col: bool, ): """Bins points into bins. Adapted from https://github.com/devendrachaplot/Neural-SLAM. # Parameters xyz_points : (x,y,z) point clouds as a np.ndarray of shape (... x height x width x 3). (x,y,z) should be coordinates specified in meters. bin_axis : Either "x", "y", or "z", the axis which should be binned by the values in `bins` bins: The values by which to bin along `bin_axis`, see the `bins` parameter of `np.digitize` for more info. map_size : The axes not specified by `bin_axis` will be be divided by `resolution_in_cm / 100` and then rounded to the nearest integer. They are then expected to have their values within the interval [0, ..., map_size - 1]. resolution_in_cm: The resolution_in_cm, in cm, of the map output from this function. Every grid square of the map corresponds to a (`resolution_in_cm`x`resolution_in_cm`) square in space. flip_row_col: Should the rows/cols of the map be flipped # Returns A collection of maps of shape (... x map_size x map_size x (len(bins)+1)), note that bin_axis has been moved to the last index of this returned map, the other two axes stay in their original order unless `flip_row_col` has been called in which case they are reversed (useful if you give points as often rows should correspond to y or z instead of x). """ bin_dim = ["x", "y", "z"].index(bin_axis) start_shape = xyz_points.shape xyz_points = xyz_points.reshape([-1, *start_shape[-3:]]) num_clouds, h, w, _ = xyz_points.shape if not flip_row_col: new_order = [i for i in [0, 1, 2] if i != bin_dim] + [bin_dim] else: new_order = [i for i in [2, 1, 0] if i != bin_dim] + [bin_dim] uvw_points: np.ndarray = np.stack([xyz_points[..., i] for i in new_order], axis=-1) num_bins = len(bins) + 1 isnotnan = ~np.isnan(xyz_points[..., 0]) uvw_points_binned = np.concatenate( ( np.round(100 * uvw_points[..., :-1] / resolution_in_cm).astype(np.int32), np.digitize(uvw_points[..., -1:], bins=bins).astype(np.int32), ), axis=-1, ) maxes = np.array([map_size, map_size, num_bins]).reshape((1, 1, 1, 3)) isvalid = np.logical_and.reduce( ( (uvw_points_binned >= 0).all(-1), (uvw_points_binned < maxes).all(-1), isnotnan, ) ) uvw_points_binned_with_index_mat = np.concatenate( ( np.repeat(np.arange(0, num_clouds), h * w).reshape(-1, 1), uvw_points_binned.reshape(-1, 3), ), axis=1, ) uvw_points_binned_with_index_mat[~isvalid.reshape(-1), :] = 0 ind = np.ravel_multi_index( uvw_points_binned_with_index_mat.transpose(), (num_clouds, map_size, map_size, num_bins), ) ind[~isvalid.reshape(-1)] = 0 count = np.bincount( ind.ravel(), isvalid.ravel().astype(np.int32), minlength=num_clouds * map_size * map_size * num_bins, ) return count.reshape([*start_shape[:-3], map_size, map_size, num_bins]) ================================================ FILE: allenact/embodiedai/models/__init__.py ================================================ ================================================ FILE: allenact/embodiedai/models/aux_models.py ================================================ # Original work Copyright (c) Facebook, Inc. and its affiliates. # Modified work Copyright (c) Allen Institute for AI # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Several of the models defined in this file are modified versions of those found in https://github.com/joel99/habitat-pointnav- aux/blob/master/habitat_baselines/""" import torch import torch.nn as nn from allenact.embodiedai.aux_losses.losses import ( InverseDynamicsLoss, TemporalDistanceLoss, CPCALoss, CPCASoftMaxLoss, ) from allenact.utils.model_utils import FeatureEmbedding class AuxiliaryModel(nn.Module): """The class of defining the models for all kinds of self-supervised auxiliary tasks.""" def __init__( self, aux_uuid: str, action_dim: int, obs_embed_dim: int, belief_dim: int, action_embed_size: int = 4, cpca_classifier_hidden_dim: int = 32, cpca_softmax_dim: int = 128, ): super().__init__() self.aux_uuid = aux_uuid self.action_dim = action_dim self.obs_embed_dim = obs_embed_dim self.belief_dim = belief_dim self.action_embed_size = action_embed_size self.cpca_classifier_hidden_dim = cpca_classifier_hidden_dim self.cpca_softmax_dim = cpca_softmax_dim self.initialize_model_given_aux_uuid(self.aux_uuid) def initialize_model_given_aux_uuid(self, aux_uuid: str): if aux_uuid == InverseDynamicsLoss.UUID: self.init_inverse_dynamics() elif aux_uuid == TemporalDistanceLoss.UUID: self.init_temporal_distance() elif CPCALoss.UUID in aux_uuid: # the CPCA family with various k self.init_cpca() elif CPCASoftMaxLoss.UUID in aux_uuid: self.init_cpca_softmax() else: raise ValueError("Unknown Auxiliary Loss UUID") def init_inverse_dynamics(self): self.decoder = nn.Linear( 2 * self.obs_embed_dim + self.belief_dim, self.action_dim ) def init_temporal_distance(self): self.decoder = nn.Linear(2 * self.obs_embed_dim + self.belief_dim, 1) def init_cpca(self): ## Auto-regressive model to predict future context self.action_embedder = FeatureEmbedding( self.action_dim + 1, self.action_embed_size ) # NOTE: add extra 1 in embedding dict cuz we will pad zero actions? self.context_model = nn.GRU(self.action_embed_size, self.belief_dim) ## Classifier to estimate mutual information self.classifier = nn.Sequential( nn.Linear( self.belief_dim + self.obs_embed_dim, self.cpca_classifier_hidden_dim ), nn.ReLU(), nn.Linear(self.cpca_classifier_hidden_dim, 1), ) def init_cpca_softmax(self): # same as CPCA with extra MLP for contrastive losses. ### self.action_embedder = FeatureEmbedding( self.action_dim + 1, self.action_embed_size ) # NOTE: add extra 1 in embedding dict cuz we will pad zero actions? self.context_model = nn.GRU(self.action_embed_size, self.belief_dim) ## Classifier to estimate mutual information self.visual_mlp = nn.Sequential( nn.Linear(self.obs_embed_dim, self.cpca_classifier_hidden_dim), nn.ReLU(), nn.Linear(self.cpca_classifier_hidden_dim, self.cpca_softmax_dim), ) self.belief_mlp = nn.Sequential( nn.Linear(self.belief_dim, self.cpca_classifier_hidden_dim), nn.ReLU(), nn.Linear(self.cpca_classifier_hidden_dim, self.cpca_softmax_dim), ) def forward(self, features: torch.FloatTensor): if self.aux_uuid in [InverseDynamicsLoss.UUID, TemporalDistanceLoss.UUID]: return self.decoder(features) else: raise NotImplementedError( f"Auxiliary model with UUID {self.aux_uuid} does not support `forward` call." ) ================================================ FILE: allenact/embodiedai/models/basic_models.py ================================================ """Basic building block torch networks that can be used across a variety of tasks.""" from typing import ( Sequence, Dict, Union, cast, List, Callable, Optional, Tuple, Any, ) import gym import numpy as np import torch from gym.spaces.dict import Dict as SpaceDict import torch.nn as nn from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel, DistributionType from allenact.base_abstractions.distributions import CategoricalDistr, Distr from allenact.base_abstractions.misc import ActorCriticOutput, Memory from allenact.utils.model_utils import make_cnn, compute_cnn_output from allenact.utils.system import get_logger class SimpleCNN(nn.Module): """A Simple N-Conv CNN followed by a fully connected layer. Takes in observations (of type gym.spaces.dict) and produces an embedding of the `rgb_uuid` and/or `depth_uuid` components. # Attributes observation_space : The observation_space of the agent, should have `rgb_uuid` or `depth_uuid` as a component (otherwise it is a blind model). output_size : The size of the embedding vector to produce. """ def __init__( self, observation_space: SpaceDict, output_size: int, rgb_uuid: Optional[str], depth_uuid: Optional[str], layer_channels: Sequence[int] = (32, 64, 32), kernel_sizes: Sequence[Tuple[int, int]] = ((8, 8), (4, 4), (3, 3)), layers_stride: Sequence[Tuple[int, int]] = ((4, 4), (2, 2), (1, 1)), paddings: Sequence[Tuple[int, int]] = ((0, 0), (0, 0), (0, 0)), dilations: Sequence[Tuple[int, int]] = ((1, 1), (1, 1), (1, 1)), flatten: bool = True, output_relu: bool = True, ): """Initializer. # Parameters observation_space : See class attributes documentation. output_size : See class attributes documentation. """ super().__init__() self.rgb_uuid = rgb_uuid if self.rgb_uuid is not None: assert self.rgb_uuid in observation_space.spaces self._n_input_rgb = observation_space.spaces[self.rgb_uuid].shape[2] assert self._n_input_rgb >= 0 else: self._n_input_rgb = 0 self.depth_uuid = depth_uuid if self.depth_uuid is not None: assert self.depth_uuid in observation_space.spaces self._n_input_depth = observation_space.spaces[self.depth_uuid].shape[2] assert self._n_input_depth >= 0 else: self._n_input_depth = 0 if not self.is_blind: # hyperparameters for layers self._cnn_layers_channels = list(layer_channels) self._cnn_layers_kernel_size = list(kernel_sizes) self._cnn_layers_stride = list(layers_stride) self._cnn_layers_paddings = list(paddings) self._cnn_layers_dilations = list(dilations) if self._n_input_rgb > 0: input_rgb_cnn_dims = np.array( observation_space.spaces[self.rgb_uuid].shape[:2], dtype=np.float32 ) self.rgb_cnn = self.make_cnn_from_params( output_size=output_size, input_dims=input_rgb_cnn_dims, input_channels=self._n_input_rgb, flatten=flatten, output_relu=output_relu, ) if self._n_input_depth > 0: input_depth_cnn_dims = np.array( observation_space.spaces[self.depth_uuid].shape[:2], dtype=np.float32, ) self.depth_cnn = self.make_cnn_from_params( output_size=output_size, input_dims=input_depth_cnn_dims, input_channels=self._n_input_depth, flatten=flatten, output_relu=output_relu, ) def make_cnn_from_params( self, output_size: int, input_dims: np.ndarray, input_channels: int, flatten: bool, output_relu: bool, ) -> nn.Module: output_dims = input_dims for kernel_size, stride, padding, dilation in zip( self._cnn_layers_kernel_size, self._cnn_layers_stride, self._cnn_layers_paddings, self._cnn_layers_dilations, ): # noinspection PyUnboundLocalVariable output_dims = self._conv_output_dim( dimension=output_dims, padding=np.array(padding, dtype=np.float32), dilation=np.array(dilation, dtype=np.float32), kernel_size=np.array(kernel_size, dtype=np.float32), stride=np.array(stride, dtype=np.float32), ) # noinspection PyUnboundLocalVariable cnn = make_cnn( input_channels=input_channels, layer_channels=self._cnn_layers_channels, kernel_sizes=self._cnn_layers_kernel_size, strides=self._cnn_layers_stride, paddings=self._cnn_layers_paddings, dilations=self._cnn_layers_dilations, output_height=output_dims[0], output_width=output_dims[1], output_channels=output_size, flatten=flatten, output_relu=output_relu, ) self.layer_init(cnn) return cnn @staticmethod def _conv_output_dim( dimension: Sequence[int], padding: Sequence[int], dilation: Sequence[int], kernel_size: Sequence[int], stride: Sequence[int], ) -> Tuple[int, ...]: """Calculates the output height and width based on the input height and width to the convolution layer. For parameter definitions see. [here](https://pytorch.org/docs/master/nn.html#torch.nn.Conv2d). # Parameters dimension : See above link. padding : See above link. dilation : See above link. kernel_size : See above link. stride : See above link. """ assert len(dimension) == 2 out_dimension = [] for i in range(len(dimension)): out_dimension.append( int( np.floor( ( ( dimension[i] + 2 * padding[i] - dilation[i] * (kernel_size[i] - 1) - 1 ) / stride[i] ) + 1 ) ) ) return tuple(out_dimension) @staticmethod def layer_init(cnn) -> None: """Initialize layer parameters using Kaiming normal.""" for layer in cnn: if isinstance(layer, (nn.Conv2d, nn.Linear)): nn.init.kaiming_normal_(layer.weight, nn.init.calculate_gain("relu")) if layer.bias is not None: nn.init.constant_(layer.bias, val=0) @property def is_blind(self): """True if the observation space doesn't include `self.rgb_uuid` or `self.depth_uuid`.""" return self._n_input_rgb + self._n_input_depth == 0 def forward(self, observations: Dict[str, torch.Tensor]): # type: ignore if self.is_blind: return None def check_use_agent(new_setting): if use_agent is not None: assert ( use_agent is new_setting ), "rgb and depth must both use an agent dim or none" return new_setting cnn_output_list: List[torch.Tensor] = [] use_agent: Optional[bool] = None if self.rgb_uuid is not None: use_agent = check_use_agent(len(observations[self.rgb_uuid].shape) == 6) cnn_output_list.append( compute_cnn_output(self.rgb_cnn, observations[self.rgb_uuid]) ) if self.depth_uuid is not None: use_agent = check_use_agent(len(observations[self.depth_uuid].shape) == 6) cnn_output_list.append( compute_cnn_output(self.depth_cnn, observations[self.depth_uuid]) ) if use_agent: channels_dim = 3 # [step, sampler, agent, channel (, height, width)] else: channels_dim = 2 # [step, sampler, channel (, height, width)] return torch.cat(cnn_output_list, dim=channels_dim) class RNNStateEncoder(nn.Module): """A simple RNN-based model playing a role in many baseline embodied- navigation agents. See `seq_forward` for more details of how this model is used. """ def __init__( self, input_size: int, hidden_size: int, num_layers: int = 1, rnn_type: str = "GRU", trainable_masked_hidden_state: bool = False, ): """An RNN for encoding the state in RL. Supports masking the hidden state during various timesteps in the forward lass. # Parameters input_size : The input size of the RNN. hidden_size : The hidden size. num_layers : The number of recurrent layers. rnn_type : The RNN cell type. Must be GRU or LSTM. trainable_masked_hidden_state : If `True` the initial hidden state (used at the start of a Task) is trainable (as opposed to being a vector of zeros). """ super().__init__() self._num_recurrent_layers = num_layers self._rnn_type = rnn_type self.rnn = getattr(torch.nn, rnn_type)( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers ) self.trainable_masked_hidden_state = trainable_masked_hidden_state if trainable_masked_hidden_state: self.init_hidden_state = nn.Parameter( 0.1 * torch.randn((num_layers, 1, hidden_size)), requires_grad=True ) self.layer_init() def layer_init(self): """Initialize the RNN parameters in the model.""" for name, param in self.rnn.named_parameters(): if "weight" in name: nn.init.orthogonal_(param) elif "bias" in name: nn.init.constant_(param, 0) @property def num_recurrent_layers(self) -> int: """The number of recurrent layers in the network.""" return self._num_recurrent_layers * (2 if "LSTM" in self._rnn_type else 1) def _pack_hidden( self, hidden_states: Union[torch.FloatTensor, Sequence[torch.FloatTensor]] ) -> torch.FloatTensor: """Stacks hidden states in an LSTM together (if using a GRU rather than an LSTM this is just the identity). # Parameters hidden_states : The hidden states to (possibly) stack. """ if "LSTM" in self._rnn_type: hidden_states = cast( torch.FloatTensor, torch.cat([hidden_states[0], hidden_states[1]], dim=0), ) return cast(torch.FloatTensor, hidden_states) def _unpack_hidden( self, hidden_states: torch.FloatTensor ) -> Union[torch.FloatTensor, Tuple[torch.FloatTensor, torch.FloatTensor]]: """Partial inverse of `_pack_hidden` (exact if there are 2 hidden layers).""" if "LSTM" in self._rnn_type: new_hidden_states = ( hidden_states[0 : self._num_recurrent_layers], hidden_states[self._num_recurrent_layers :], ) return cast(Tuple[torch.FloatTensor, torch.FloatTensor], new_hidden_states) return cast(torch.FloatTensor, hidden_states) def _mask_hidden( self, hidden_states: Union[Tuple[torch.FloatTensor, ...], torch.FloatTensor], masks: torch.FloatTensor, ) -> Union[Tuple[torch.FloatTensor, ...], torch.FloatTensor]: """Mask input hidden states given `masks`. Useful when masks represent steps on which a task has completed. # Parameters hidden_states : The hidden states. masks : Masks to apply to hidden states (see seq_forward). # Returns Masked hidden states. Here masked hidden states will be replaced with either all zeros (if `trainable_masked_hidden_state` was False) and will otherwise be a learnable collection of parameters. """ if not self.trainable_masked_hidden_state: if isinstance(hidden_states, tuple): hidden_states = tuple( cast(torch.FloatTensor, v * masks) for v in hidden_states ) else: hidden_states = cast(torch.FloatTensor, masks * hidden_states) else: if isinstance(hidden_states, tuple): # noinspection PyTypeChecker hidden_states = tuple( v * masks # type:ignore + (1.0 - masks) * (self.init_hidden_state.repeat(1, v.shape[1], 1)) # type: ignore for v in hidden_states # type:ignore ) # type: ignore else: # noinspection PyTypeChecker hidden_states = masks * hidden_states + (1 - masks) * ( # type: ignore self.init_hidden_state.repeat(1, hidden_states.shape[1], 1) ) return hidden_states def single_forward( self, x: torch.FloatTensor, hidden_states: torch.FloatTensor, masks: torch.FloatTensor, ) -> Tuple[ torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]] ]: """Forward for a single-step input.""" ( x, hidden_states, masks, mem_agent, obs_agent, nsteps, nsamplers, nagents, ) = self.adapt_input(x, hidden_states, masks) unpacked_hidden_states = self._unpack_hidden(hidden_states) x, unpacked_hidden_states = self.rnn( x, self._mask_hidden( unpacked_hidden_states, cast(torch.FloatTensor, masks[0].view(1, -1, 1)) ), ) return self.adapt_result( x, self._pack_hidden(unpacked_hidden_states), mem_agent, obs_agent, nsteps, nsamplers, nagents, ) def adapt_input( self, x: torch.FloatTensor, hidden_states: torch.FloatTensor, masks: torch.FloatTensor, ) -> Tuple[ torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, bool, bool, int, int, int, ]: nsteps, nsamplers = masks.shape[:2] assert len(hidden_states.shape) in [ 3, 4, ], "hidden_states must be [layer, sampler, hidden] or [layer, sampler, agent, hidden]" assert len(x.shape) in [ 3, 4, ], "observations must be [step, sampler, data] or [step, sampler, agent, data]" nagents = 1 mem_agent: bool if len(hidden_states.shape) == 4: # [layer, sampler, agent, hidden] mem_agent = True nagents = hidden_states.shape[2] else: # [layer, sampler, hidden] mem_agent = False obs_agent: bool if len(x.shape) == 4: # [step, sampler, agent, dims] obs_agent = True else: # [step, sampler, dims] obs_agent = False # Flatten (nsamplers, nagents) x = x.view(nsteps, nsamplers * nagents, -1) # type:ignore masks = masks.expand(-1, -1, nagents).reshape( # type:ignore nsteps, nsamplers * nagents ) # Flatten (nsamplers, nagents) and remove step dim hidden_states = hidden_states.view( # type:ignore self.num_recurrent_layers, nsamplers * nagents, -1 ) # noinspection PyTypeChecker return x, hidden_states, masks, mem_agent, obs_agent, nsteps, nsamplers, nagents def adapt_result( self, outputs: torch.FloatTensor, hidden_states: torch.FloatTensor, mem_agent: bool, obs_agent: bool, nsteps: int, nsamplers: int, nagents: int, ) -> Tuple[ torch.FloatTensor, torch.FloatTensor, ]: output_dims = (nsteps, nsamplers) + ((nagents, -1) if obs_agent else (-1,)) hidden_dims = (self.num_recurrent_layers, nsamplers) + ( (nagents, -1) if mem_agent else (-1,) ) outputs = cast(torch.FloatTensor, outputs.view(*output_dims)) hidden_states = cast( torch.FloatTensor, hidden_states.view(*hidden_dims), ) return outputs, hidden_states def seq_forward( # type: ignore self, x: torch.FloatTensor, hidden_states: torch.FloatTensor, masks: torch.FloatTensor, ) -> Tuple[ torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]] ]: """Forward for a sequence of length T. # Parameters x : (Steps, Samplers, Agents, -1) tensor. hidden_states : The starting hidden states. masks : A (Steps, Samplers, Agents) tensor. The masks to be applied to hidden state at every timestep, equal to 0 whenever the previous step finalized the task, 1 elsewhere. """ ( x, hidden_states, masks, mem_agent, obs_agent, nsteps, nsamplers, nagents, ) = self.adapt_input(x, hidden_states, masks) # steps in sequence which have zero for any episode. Assume t=0 has # a zero in it. has_zeros = (masks[1:] == 0.0).any(dim=-1).nonzero().squeeze().cpu() # +1 to correct the masks[1:] if has_zeros.dim() == 0: # handle scalar has_zeros = [has_zeros.item() + 1] # type: ignore else: has_zeros = (has_zeros + 1).numpy().tolist() # add t=0 and t=T to the list has_zeros = cast(List[int], [0] + has_zeros + [nsteps]) unpacked_hidden_states = self._unpack_hidden( cast(torch.FloatTensor, hidden_states) ) outputs = [] for i in range(len(has_zeros) - 1): # process steps that don't have any zeros in masks together start_idx = int(has_zeros[i]) end_idx = int(has_zeros[i + 1]) # noinspection PyTypeChecker rnn_scores, unpacked_hidden_states = self.rnn( x[start_idx:end_idx], self._mask_hidden( unpacked_hidden_states, cast(torch.FloatTensor, masks[start_idx].view(1, -1, 1)), ), ) outputs.append(rnn_scores) return self.adapt_result( cast(torch.FloatTensor, torch.cat(outputs, dim=0)), self._pack_hidden(unpacked_hidden_states), mem_agent, obs_agent, nsteps, nsamplers, nagents, ) def forward( # type: ignore self, x: torch.FloatTensor, hidden_states: torch.FloatTensor, masks: torch.FloatTensor, ) -> Tuple[ torch.FloatTensor, Union[torch.FloatTensor, Tuple[torch.FloatTensor, ...]] ]: nsteps = masks.shape[0] if nsteps == 1: return self.single_forward(x, hidden_states, masks) return self.seq_forward(x, hidden_states, masks) class LinearActorCritic(ActorCriticModel[CategoricalDistr]): def __init__( self, input_uuid: str, action_space: gym.spaces.Discrete, observation_space: SpaceDict, ): super().__init__(action_space=action_space, observation_space=observation_space) assert ( input_uuid in observation_space.spaces ), "LinearActorCritic expects only a single observational input." self.input_uuid = input_uuid box_space: gym.spaces.Box = observation_space[self.input_uuid] assert isinstance(box_space, gym.spaces.Box), ( "LinearActorCritic requires that" "observation space corresponding to the input uuid is a Box space." ) assert len(box_space.shape) == 1 self.in_dim = box_space.shape[0] self.linear = nn.Linear(self.in_dim, action_space.n + 1) nn.init.orthogonal_(self.linear.weight) nn.init.constant_(self.linear.bias, 0) # noinspection PyMethodMayBeStatic def _recurrent_memory_specification(self): return None def forward(self, observations, memory, prev_actions, masks): out = self.linear(observations[self.input_uuid]) # noinspection PyArgumentList return ( ActorCriticOutput( # ensure [steps, samplers, ...] distributions=CategoricalDistr(logits=out[..., :-1]), # ensure [steps, samplers, flattened] values=cast(torch.FloatTensor, out[..., -1:].view(*out.shape[:2], -1)), extras={}, ), None, ) class RNNActorCritic(ActorCriticModel[Distr]): def __init__( self, input_uuid: str, action_space: gym.spaces.Discrete, observation_space: SpaceDict, hidden_size: int = 128, num_layers: int = 1, rnn_type: str = "GRU", head_type: Callable[..., ActorCriticModel[Distr]] = LinearActorCritic, ): super().__init__(action_space=action_space, observation_space=observation_space) self.hidden_size = hidden_size self.rnn_type = rnn_type assert ( input_uuid in observation_space.spaces ), "LinearActorCritic expects only a single observational input." self.input_uuid = input_uuid box_space: gym.spaces.Box = observation_space[self.input_uuid] assert isinstance(box_space, gym.spaces.Box), ( "RNNActorCritic requires that" "observation space corresponding to the input uuid is a Box space." ) assert len(box_space.shape) == 1 self.in_dim = box_space.shape[0] self.state_encoder = RNNStateEncoder( input_size=self.in_dim, hidden_size=hidden_size, num_layers=num_layers, rnn_type=rnn_type, trainable_masked_hidden_state=True, ) self.head_uuid = "{}_{}".format("rnn", input_uuid) self.ac_nonrecurrent_head: ActorCriticModel[Distr] = head_type( input_uuid=self.head_uuid, action_space=action_space, observation_space=SpaceDict( { self.head_uuid: gym.spaces.Box( low=np.float32(0.0), high=np.float32(1.0), shape=(hidden_size,) ) } ), ) self.memory_key = "rnn" @property def recurrent_hidden_state_size(self) -> int: return self.hidden_size @property def num_recurrent_layers(self) -> int: return self.state_encoder.num_recurrent_layers def _recurrent_memory_specification(self): return { self.memory_key: ( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) } def forward( # type:ignore self, observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]], memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: if self.memory_key not in memory: get_logger().warning( f"Key {self.memory_key} not found in memory," f" initializing this as all zeros." ) obs = observations[self.input_uuid] memory.check_append( key=self.memory_key, tensor=obs.new( self.num_recurrent_layers, obs.shape[1], self.recurrent_hidden_state_size, ) .float() .zero_(), sampler_dim=1, ) rnn_out, mem_return = self.state_encoder( x=observations[self.input_uuid], hidden_states=memory.tensor(self.memory_key), masks=masks, ) # noinspection PyCallingNonCallable out, _ = self.ac_nonrecurrent_head( observations={self.head_uuid: rnn_out}, memory=None, prev_actions=prev_actions, masks=masks, ) # noinspection PyArgumentList return ( out, memory.set_tensor(self.memory_key, mem_return), ) ================================================ FILE: allenact/embodiedai/models/fusion_models.py ================================================ # Original work Copyright (c) Facebook, Inc. and its affiliates. # Modified work Copyright (c) Allen Institute for AI # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Adapted from https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/ import math from typing import Tuple import torch import torch.nn as nn class Fusion(nn.Module): """Base class of belief fusion model from Auxiliary Tasks Speed Up Learning PointGoal Navigation (Ye, 2020) Child class should implement `get_belief_weights` function to generate weights to fuse the beliefs from all the auxiliary task into one.""" def __init__(self, hidden_size, obs_embed_size, num_tasks): super().__init__() self.hidden_size = hidden_size # H self.obs_embed_size = obs_embed_size # Z self.num_tasks = num_tasks # k def forward( self, all_beliefs: torch.FloatTensor, # (T, N, H, K) obs_embeds: torch.FloatTensor, # (T, N, Z) ) -> Tuple[torch.FloatTensor, torch.FloatTensor]: # (T, N, H), (T, N, K) num_steps, num_samplers, _, _ = all_beliefs.shape all_beliefs = all_beliefs.view( num_steps * num_samplers, self.hidden_size, self.num_tasks ) obs_embeds = obs_embeds.view(num_steps * num_samplers, -1) weights = self.get_belief_weights( all_beliefs=all_beliefs, obs_embeds=obs_embeds, # (T*N, H, K) # (T*N, Z) ).unsqueeze( -1 ) # (T*N, K, 1) beliefs = torch.bmm(all_beliefs, weights) # (T*N, H, 1) beliefs = beliefs.squeeze(-1).view(num_steps, num_samplers, self.hidden_size) weights = weights.squeeze(-1).view(num_steps, num_samplers, self.num_tasks) return beliefs, weights def get_belief_weights( self, all_beliefs: torch.FloatTensor, # (T*N, H, K) obs_embeds: torch.FloatTensor, # (T*N, Z) ) -> torch.FloatTensor: # (T*N, K) raise NotImplementedError() class AverageFusion(Fusion): UUID = "avg" def get_belief_weights( self, all_beliefs: torch.FloatTensor, # (T*N, H, K) obs_embeds: torch.FloatTensor, # (T*N, Z) ) -> torch.FloatTensor: # (T*N, K) batch_size = all_beliefs.shape[0] weights = torch.ones(batch_size, self.num_tasks).to(all_beliefs) weights /= self.num_tasks return weights class SoftmaxFusion(Fusion): """Situational Fusion of Visual Representation for Visual Navigation https://arxiv.org/abs/1908.09073.""" UUID = "smax" def __init__(self, hidden_size, obs_embed_size, num_tasks): super().__init__(hidden_size, obs_embed_size, num_tasks) # mapping from rnn input to task # ignore beliefs self.linear = nn.Linear(obs_embed_size, num_tasks) def get_belief_weights( self, all_beliefs: torch.Tensor, # (T*N, H, K) obs_embeds: torch.Tensor, # (T*N, Z) ) -> torch.Tensor: # (T*N, K) scores = self.linear(obs_embeds) # (T*N, K) weights = torch.softmax(scores, dim=-1) return weights class AttentiveFusion(Fusion): """Attention is All You Need https://arxiv.org/abs/1706.03762 i.e. scaled dot-product attention.""" UUID = "attn" def __init__(self, hidden_size, obs_embed_size, num_tasks): super().__init__(hidden_size, obs_embed_size, num_tasks) self.linear = nn.Linear(obs_embed_size, hidden_size) def get_belief_weights( self, all_beliefs: torch.Tensor, # (T*N, H, K) obs_embeds: torch.Tensor, # (T*N, Z) ) -> torch.Tensor: # (T*N, K) queries = self.linear(obs_embeds).unsqueeze(1) # (T*N, 1, H) scores = torch.bmm(queries, all_beliefs).squeeze(1) # (T*N, K) weights = torch.softmax( scores / math.sqrt(self.hidden_size), dim=-1 ) # (T*N, K) return weights ================================================ FILE: allenact/embodiedai/models/resnet.py ================================================ # Original work Copyright (c) Facebook, Inc. and its affiliates. # Modified work Copyright (c) Allen Institute for AI # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Adapted from https://github.com/joel99/habitat-pointnav-aux/blob/master/habitat_baselines/ from typing import Optional import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from gym.spaces.dict import Dict as SpaceDict from allenact.utils.model_utils import Flatten from allenact.utils.system import get_logger def conv3x3(in_planes, out_planes, stride=1, groups=1): """3x3 convolution with padding.""" return nn.Conv2d( in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, groups=groups, ) def conv1x1(in_planes, out_planes, stride=1): """1x1 convolution.""" return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) class BasicBlock(nn.Module): expansion = 1 resneXt = False def __init__( self, inplanes, planes, ngroups, stride=1, downsample=None, cardinality=1, ): super(BasicBlock, self).__init__() self.convs = nn.Sequential( conv3x3(inplanes, planes, stride, groups=cardinality), nn.GroupNorm(ngroups, planes), nn.ReLU(True), conv3x3(planes, planes, groups=cardinality), nn.GroupNorm(ngroups, planes), ) self.downsample = downsample self.relu = nn.ReLU(True) def forward(self, x): residual = x out = self.convs(x) if self.downsample is not None: residual = self.downsample(x) return self.relu(out + residual) def _build_bottleneck_branch(inplanes, planes, ngroups, stride, expansion, groups=1): return nn.Sequential( conv1x1(inplanes, planes), nn.GroupNorm(ngroups, planes), nn.ReLU(True), conv3x3(planes, planes, stride, groups=groups), nn.GroupNorm(ngroups, planes), nn.ReLU(True), conv1x1(planes, planes * expansion), nn.GroupNorm(ngroups, planes * expansion), ) class SE(nn.Module): def __init__(self, planes, r=16): super().__init__() self.squeeze = nn.AdaptiveAvgPool2d(1) self.excite = nn.Sequential( nn.Linear(planes, int(planes / r)), nn.ReLU(True), nn.Linear(int(planes / r), planes), nn.Sigmoid(), ) def forward(self, x): b, c, _, _ = x.size() x = self.squeeze(x) x = x.view(b, c) x = self.excite(x) return x.view(b, c, 1, 1) def _build_se_branch(planes, r=16): return SE(planes, r) class Bottleneck(nn.Module): expansion = 4 resneXt = False def __init__( self, inplanes, planes, ngroups, stride=1, downsample=None, cardinality=1, ): super().__init__() self.convs = _build_bottleneck_branch( inplanes, planes, ngroups, stride, self.expansion, groups=cardinality, ) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def _impl(self, x): identity = x out = self.convs(x) if self.downsample is not None: identity = self.downsample(x) return self.relu(out + identity) def forward(self, x): return self._impl(x) class SEBottleneck(Bottleneck): def __init__( self, inplanes, planes, ngroups, stride=1, downsample=None, cardinality=1, ): super().__init__(inplanes, planes, ngroups, stride, downsample, cardinality) self.se = _build_se_branch(planes * self.expansion) def _impl(self, x): identity = x out = self.convs(x) out = self.se(out) * out if self.downsample is not None: identity = self.downsample(x) return self.relu(out + identity) class SEResNeXtBottleneck(SEBottleneck): expansion = 2 resneXt = True class ResNeXtBottleneck(Bottleneck): expansion = 2 resneXt = True class GroupNormResNet(nn.Module): def __init__(self, in_channels, base_planes, ngroups, block, layers, cardinality=1): super(GroupNormResNet, self).__init__() self.conv1 = nn.Sequential( nn.Conv2d( in_channels, base_planes, kernel_size=7, stride=2, padding=3, bias=False, ), nn.GroupNorm(ngroups, base_planes), nn.ReLU(True), ) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.cardinality = cardinality self.inplanes = base_planes if block.resneXt: base_planes *= 2 self.layer1 = self._make_layer(block, ngroups, base_planes, layers[0]) self.layer2 = self._make_layer( block, ngroups, base_planes * 2, layers[1], stride=2 ) self.layer3 = self._make_layer( block, ngroups, base_planes * 2 * 2, layers[2], stride=2 ) self.layer4 = self._make_layer( block, ngroups, base_planes * 2 * 2 * 2, layers[3], stride=2 ) self.final_channels = self.inplanes self.final_spatial_compress = 1.0 / (2**5) def _make_layer(self, block, ngroups, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( conv1x1(self.inplanes, planes * block.expansion, stride), nn.GroupNorm(ngroups, planes * block.expansion), ) layers = [ block( self.inplanes, planes, ngroups, stride, downsample, cardinality=self.cardinality, ) ] self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, ngroups)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def gnresnet18(in_channels, base_planes, ngroups): model = GroupNormResNet(in_channels, base_planes, ngroups, BasicBlock, [2, 2, 2, 2]) return model def gnresnet50(in_channels, base_planes, ngroups): model = GroupNormResNet(in_channels, base_planes, ngroups, Bottleneck, [3, 4, 6, 3]) return model def gnresneXt50(in_channels, base_planes, ngroups): model = GroupNormResNet( in_channels, base_planes, ngroups, ResNeXtBottleneck, [3, 4, 6, 3], cardinality=int(base_planes / 2), ) return model def se_gnresnet50(in_channels, base_planes, ngroups): model = GroupNormResNet( in_channels, base_planes, ngroups, SEBottleneck, [3, 4, 6, 3] ) return model def se_gnresneXt50(in_channels, base_planes, ngroups): model = GroupNormResNet( in_channels, base_planes, ngroups, SEResNeXtBottleneck, [3, 4, 6, 3], cardinality=int(base_planes / 2), ) return model def se_gnresneXt101(in_channels, base_planes, ngroups): model = GroupNormResNet( in_channels, base_planes, ngroups, SEResNeXtBottleneck, [3, 4, 23, 3], cardinality=int(base_planes / 2), ) return model class GroupNormResNetEncoder(nn.Module): def __init__( self, observation_space: SpaceDict, rgb_uuid: Optional[str], depth_uuid: Optional[str], output_size: int, baseplanes=32, ngroups=32, make_backbone=None, ): super().__init__() self._inputs = [] self.rgb_uuid = rgb_uuid if self.rgb_uuid is not None: assert self.rgb_uuid in observation_space.spaces self._n_input_rgb = observation_space.spaces[self.rgb_uuid].shape[2] assert self._n_input_rgb >= 0 self._inputs.append(self.rgb_uuid) else: self._n_input_rgb = 0 self.depth_uuid = depth_uuid if self.depth_uuid is not None: assert self.depth_uuid in observation_space.spaces self._n_input_depth = observation_space.spaces[self.depth_uuid].shape[2] assert self._n_input_depth >= 0 self._inputs.append(self.depth_uuid) else: self._n_input_depth = 0 if not self.is_blind: spatial_size = ( observation_space.spaces[self._inputs[0]].shape[0] // 2 ) # H (=W) / 2 # RGBD into one model input_channels = self._n_input_rgb + self._n_input_depth # C self.backbone = make_backbone(input_channels, baseplanes, ngroups) final_spatial = int( np.ceil(spatial_size * self.backbone.final_spatial_compress) ) # fix bug in habitat that uses int() after_compression_flat_size = 2048 num_compression_channels = int( round(after_compression_flat_size / (final_spatial**2)) ) self.compression = nn.Sequential( nn.Conv2d( self.backbone.final_channels, num_compression_channels, kernel_size=3, padding=1, bias=False, ), nn.GroupNorm(1, num_compression_channels), nn.ReLU(True), ) self.output_shape = ( num_compression_channels, final_spatial, final_spatial, ) self.head = nn.Sequential( Flatten(), nn.Linear(np.prod(self.output_shape), output_size), nn.ReLU(True), ) self.layer_init() @property def is_blind(self): return self._n_input_rgb + self._n_input_depth == 0 def layer_init(self): for layer in self.modules(): if isinstance(layer, (nn.Conv2d, nn.Linear)): nn.init.kaiming_normal_(layer.weight, nn.init.calculate_gain("relu")) if layer.bias is not None: nn.init.constant_(layer.bias, val=0) get_logger().debug("Initializing resnet encoder") def forward(self, observations): if self.is_blind: return None # TODO: the reshape follows compute_cnn_output() # but it's hard to make the forward as a nn.Module as cnn param nagents: Optional[int] = None nsteps: Optional[int] = None nsamplers: Optional[int] = None assert len(self._inputs) > 0 cnn_input = [] for mode in self._inputs: mode_obs = observations[mode] assert len(mode_obs.shape) in [ 5, 6, ], "CNN input must have shape [STEP, SAMPLER, (AGENT,) dim1, dim2, dim3]" if len(mode_obs.shape) == 6: nsteps, nsamplers, nagents = mode_obs.shape[:3] else: nsteps, nsamplers = mode_obs.shape[:2] # Make FLAT_BATCH = nsteps * nsamplers (* nagents) mode_obs = mode_obs.view( (-1,) + mode_obs.shape[2 + int(nagents is not None) :] ) # permute tensor to dimension [BATCH x CHANNEL x HEIGHT X WIDTH] mode_obs = mode_obs.permute(0, 3, 1, 2) cnn_input.append(mode_obs) x = torch.cat(cnn_input, dim=1) x = F.avg_pool2d(x, 2) # 2x downsampling x = self.backbone(x) # (256, 4, 4) x = self.compression(x) # (128, 4, 4) x = self.head(x) # (2048) -> (hidden_size) if nagents is not None: x = x.reshape( ( nsteps, nsamplers, nagents, ) + x.shape[1:] ) else: x = x.reshape( ( nsteps, nsamplers, ) + x.shape[1:] ) return x ================================================ FILE: allenact/embodiedai/models/visual_nav_models.py ================================================ from collections import OrderedDict from typing import Tuple, Dict, Optional, List, Sequence from typing import TypeVar import gym import torch import torch.nn as nn from gym.spaces.dict import Dict as SpaceDict from allenact.algorithms.onpolicy_sync.policy import ( ActorCriticModel, LinearCriticHead, LinearActorHead, ObservationType, DistributionType, ) from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput, Memory from allenact.embodiedai.aux_losses.losses import MultiAuxTaskNegEntropyLoss from allenact.embodiedai.models.aux_models import AuxiliaryModel from allenact.embodiedai.models.basic_models import RNNStateEncoder from allenact.embodiedai.models.fusion_models import Fusion from allenact.utils.model_utils import FeatureEmbedding from allenact.utils.system import get_logger FusionType = TypeVar("FusionType", bound=Fusion) class VisualNavActorCritic(ActorCriticModel[CategoricalDistr]): """Base class of visual navigation / manipulation (or broadly, embodied AI) model. `forward_encoder` function requires implementation. """ action_space: gym.spaces.Discrete def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, hidden_size=512, multiple_beliefs=False, beliefs_fusion: Optional[FusionType] = None, auxiliary_uuids: Optional[List[str]] = None, auxiliary_model_class=AuxiliaryModel, ): super().__init__(action_space=action_space, observation_space=observation_space) self._hidden_size = hidden_size assert multiple_beliefs == (beliefs_fusion is not None) self.multiple_beliefs = multiple_beliefs self.beliefs_fusion = beliefs_fusion self.auxiliary_uuids = auxiliary_uuids if isinstance(self.auxiliary_uuids, list) and len(self.auxiliary_uuids) == 0: self.auxiliary_uuids = None # Define the placeholders in init function self.state_encoders: Optional[nn.ModuleDict] = None self.aux_models: Optional[nn.ModuleDict] = None self.actor: Optional[LinearActorHead] = None self.critic: Optional[LinearCriticHead] = None self.prev_action_embedder: Optional[FeatureEmbedding] = None self.fusion_model: Optional[nn.Module] = None self.belief_names: Optional[Sequence[str]] = None self.auxiliary_model_class = auxiliary_model_class def create_state_encoders( self, obs_embed_size: int, prev_action_embed_size: int, num_rnn_layers: int, rnn_type: str, add_prev_actions: bool, add_prev_action_null_token: bool, trainable_masked_hidden_state=False, ): rnn_input_size = obs_embed_size self.prev_action_embedder = FeatureEmbedding( input_size=int(add_prev_action_null_token) + self.action_space.n, output_size=prev_action_embed_size if add_prev_actions else 0, ) if add_prev_actions: rnn_input_size += prev_action_embed_size state_encoders = OrderedDict() # perserve insertion order in py3.6 if self.multiple_beliefs: # multiple belief model for aux_uuid in self.auxiliary_uuids: state_encoders[aux_uuid] = RNNStateEncoder( rnn_input_size, self._hidden_size, num_layers=num_rnn_layers, rnn_type=rnn_type, trainable_masked_hidden_state=trainable_masked_hidden_state, ) # create fusion model self.fusion_model = self.beliefs_fusion( hidden_size=self._hidden_size, obs_embed_size=obs_embed_size, num_tasks=len(self.auxiliary_uuids), ) else: # single belief model state_encoders["single_belief"] = RNNStateEncoder( rnn_input_size, self._hidden_size, num_layers=num_rnn_layers, rnn_type=rnn_type, trainable_masked_hidden_state=trainable_masked_hidden_state, ) self.state_encoders = nn.ModuleDict(state_encoders) self.belief_names = list(self.state_encoders.keys()) get_logger().info( "there are {} belief models: {}".format( len(self.belief_names), self.belief_names ) ) def load_state_dict(self, state_dict, **kwargs): new_state_dict = OrderedDict() for key in state_dict.keys(): if "state_encoder." in key: # old key name new_key = key.replace("state_encoder.", "state_encoders.single_belief.") elif "goal_visual_encoder.embed_class" in key: new_key = key.replace( "goal_visual_encoder.embed_class", "goal_visual_encoder.embed_goal" ) else: new_key = key new_state_dict[new_key] = state_dict[key] return super().load_state_dict(new_state_dict, **kwargs) # compatible in keys def create_actorcritic_head(self): self.actor = LinearActorHead(self._hidden_size, self.action_space.n) self.critic = LinearCriticHead(self._hidden_size) def create_aux_models(self, obs_embed_size: int, action_embed_size: int): if self.auxiliary_uuids is None: return aux_models = OrderedDict() for aux_uuid in self.auxiliary_uuids: aux_models[aux_uuid] = self.auxiliary_model_class( aux_uuid=aux_uuid, action_dim=self.action_space.n, obs_embed_dim=obs_embed_size, belief_dim=self._hidden_size, action_embed_size=action_embed_size, ) self.aux_models = nn.ModuleDict(aux_models) @property def num_recurrent_layers(self): """Number of recurrent hidden layers.""" return list(self.state_encoders.values())[0].num_recurrent_layers @property def recurrent_hidden_state_size(self): """The recurrent hidden state size of a single model.""" return self._hidden_size def _recurrent_memory_specification(self): return { memory_key: ( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) for memory_key in self.belief_names } def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor: raise NotImplementedError("Obs Encoder Not Implemented") def fuse_beliefs( self, beliefs_dict: Dict[str, torch.FloatTensor], obs_embeds: torch.FloatTensor, ) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]: all_beliefs = torch.stack(list(beliefs_dict.values()), dim=-1) # (T, N, H, k) if self.multiple_beliefs: # call the fusion model return self.fusion_model(all_beliefs=all_beliefs, obs_embeds=obs_embeds) # single belief beliefs = all_beliefs.squeeze(-1) # (T,N,H) return beliefs, None def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: """Processes input batched observations to produce new actor and critic values. Processes input batched observations (along with prior hidden states, previous actions, and masks denoting which recurrent hidden states should be masked) and returns an `ActorCriticOutput` object containing the model's policy (distribution over actions) and evaluation of the current state (value). # Parameters observations : Batched input observations. memory : `Memory` containing the hidden states from initial timepoints. prev_actions : Tensor of previous actions taken. masks : Masks applied to hidden states. See `RNNStateEncoder`. # Returns Tuple of the `ActorCriticOutput` and recurrent hidden state. """ # 1.1 use perception model (i.e. encoder) to get observation embeddings obs_embeds = self.forward_encoder(observations) # 1.2 use embedding model to get prev_action embeddings if self.prev_action_embedder.input_size == self.action_space.n + 1: # In this case we have a unique embedding for the start of an episode prev_actions_embeds = self.prev_action_embedder( torch.where( condition=0 != masks.view(*prev_actions.shape), input=prev_actions + 1, other=torch.zeros_like(prev_actions), ) ) else: prev_actions_embeds = self.prev_action_embedder(prev_actions) joint_embeds = torch.cat((obs_embeds, prev_actions_embeds), dim=-1) # (T, N, *) # 2. use RNNs to get single/multiple beliefs beliefs_dict = {} for key, model in self.state_encoders.items(): beliefs_dict[key], rnn_hidden_states = model( joint_embeds, memory.tensor(key), masks ) memory.set_tensor(key, rnn_hidden_states) # update memory here # 3. fuse beliefs for multiple belief models beliefs, task_weights = self.fuse_beliefs( beliefs_dict, obs_embeds ) # fused beliefs # 4. prepare output extras = ( { aux_uuid: { "beliefs": ( beliefs_dict[aux_uuid] if self.multiple_beliefs else beliefs ), "obs_embeds": obs_embeds, "aux_model": ( self.aux_models[aux_uuid] if aux_uuid in self.aux_models else None ), } for aux_uuid in self.auxiliary_uuids } if self.auxiliary_uuids is not None else {} ) if self.multiple_beliefs: extras[MultiAuxTaskNegEntropyLoss.UUID] = task_weights actor_critic_output = ActorCriticOutput( distributions=self.actor(beliefs), values=self.critic(beliefs), extras=extras, ) return actor_critic_output, memory ================================================ FILE: allenact/embodiedai/preprocessors/__init__.py ================================================ ================================================ FILE: allenact/embodiedai/preprocessors/resnet.py ================================================ from typing import List, Callable, Optional, Any, cast, Dict import gym import numpy as np import torch import torch.nn as nn from torchvision import models from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.misc_utils import prepare_locals_for_super class ResNetEmbedder(nn.Module): def __init__(self, resnet, pool=True): super().__init__() self.model = resnet self.pool = pool self.eval() def forward(self, x): with torch.no_grad(): x = self.model.conv1(x) x = self.model.bn1(x) x = self.model.relu(x) x = self.model.maxpool(x) x = self.model.layer1(x) x = self.model.layer2(x) x = self.model.layer3(x) x = self.model.layer4(x) if not self.pool: return x else: x = self.model.avgpool(x) x = torch.flatten(x, 1) return x class ResNetPreprocessor(Preprocessor): """Preprocess RGB or depth image using a ResNet model.""" def __init__( self, input_uuids: List[str], output_uuid: str, input_height: int, input_width: int, output_height: int, output_width: int, output_dims: int, pool: bool, torchvision_resnet_model: Callable[..., models.ResNet] = models.resnet18, device: Optional[torch.device] = None, device_ids: Optional[List[torch.device]] = None, **kwargs: Any, ): def f(x, k): assert k in x, "{} must be set in ResNetPreprocessor".format(k) return x[k] def optf(x, k, default): return x[k] if k in x else default self.input_height = input_height self.input_width = input_width self.output_height = output_height self.output_width = output_width self.output_dims = output_dims self.pool = pool self.make_model = torchvision_resnet_model self.device = torch.device("cpu") if device is None else device self.device_ids = device_ids or cast( List[torch.device], list(range(torch.cuda.device_count())) ) self._resnet: Optional[ResNetEmbedder] = None low = -np.inf high = np.inf shape = (self.output_dims, self.output_height, self.output_width) assert ( len(input_uuids) == 1 ), "resnet preprocessor can only consume one observation type" observation_space = gym.spaces.Box(low=low, high=high, shape=shape) super().__init__(**prepare_locals_for_super(locals())) @property def resnet(self) -> ResNetEmbedder: if self._resnet is None: self._resnet = ResNetEmbedder( self.make_model(pretrained=True).to(self.device), pool=self.pool ) return self._resnet def to(self, device: torch.device) -> "ResNetPreprocessor": self._resnet = self.resnet.to(device) self.device = device return self def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw # If the input is depth, repeat it across all 3 channels if x.shape[1] == 1: x = x.repeat(1, 3, 1, 1) return self.resnet(x.to(self.device)) ================================================ FILE: allenact/embodiedai/sensors/__init__.py ================================================ ================================================ FILE: allenact/embodiedai/sensors/vision_sensors.py ================================================ from abc import abstractmethod, ABC from typing import Optional, Tuple, Any, cast, Union, Sequence import PIL import gym import numpy as np from torchvision import transforms from allenact.base_abstractions.misc import EnvType from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import SubTaskType from allenact.utils.misc_utils import prepare_locals_for_super from allenact.utils.tensor_utils import ScaleBothSides IMAGENET_RGB_MEANS: Tuple[float, float, float] = (0.485, 0.456, 0.406) IMAGENET_RGB_STDS: Tuple[float, float, float] = (0.229, 0.224, 0.225) class VisionSensor(Sensor[EnvType, SubTaskType]): def __init__( self, mean: Union[Sequence[float], np.ndarray, None] = None, stdev: Union[Sequence[float], np.ndarray, None] = None, height: Optional[int] = None, width: Optional[int] = None, uuid: str = "vision", output_shape: Optional[Tuple[int, ...]] = None, output_channels: Optional[int] = None, unnormalized_infimum: float = -np.inf, unnormalized_supremum: float = np.inf, scale_first: bool = True, **kwargs: Any ): """Initializer. # Parameters mean : The images will be normalized with the given mean stdev : The images will be normalized with the given standard deviations. height : If it's a non-negative integer and `width` is also non-negative integer, the image returned from the environment will be rescaled to have `height` rows and `width` columns using bilinear sampling. width : If it's a non-negative integer and `height` is also non-negative integer, the image returned from the environment will be rescaled to have `height` rows and `width` columns using bilinear sampling. uuid : The universally unique identifier for the sensor. output_shape : Optional observation space shape (alternative to `output_channels`). output_channels : Optional observation space number of channels (alternative to `output_shape`). unnormalized_infimum : Lower limit(s) for the observation space range. unnormalized_supremum : Upper limit(s) for the observation space range. scale_first : Whether to scale image before normalization (if needed). kwargs : Extra kwargs. Currently unused. """ self._norm_means = np.array(mean) if mean is not None else None self._norm_sds = np.array(stdev) if stdev is not None else None assert (self._norm_means is None) == (self._norm_sds is None), ( "In VisionSensor's config, " "either both mean/stdev must be None or neither." ) self._should_normalize = self._norm_means is not None self._height = height self._width = width assert (self._width is None) == (self._height is None), ( "In VisionSensor's config, " "either both height/width must be None or neither." ) self._scale_first = scale_first self.scaler: Optional[ScaleBothSides] = None if self._width is not None: self.scaler = ScaleBothSides( width=cast(int, self._width), height=cast(int, self._height) ) self.to_pil = transforms.ToPILImage() # assumes mode="RGB" for 3 channels self._observation_space = self._make_observation_space( output_shape=output_shape, output_channels=output_channels, unnormalized_infimum=unnormalized_infimum, unnormalized_supremum=unnormalized_supremum, ) assert int(PIL.__version__.split(".")[0]) != 7, ( "We found that Pillow version >=7.* has broken scaling," " please downgrade to version 6.2.1 or upgrade to >=8.0.0" ) observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _make_observation_space( self, output_shape: Optional[Tuple[int, ...]], output_channels: Optional[int], unnormalized_infimum: float, unnormalized_supremum: float, ) -> gym.spaces.Box: assert output_shape is None or output_channels is None, ( "In VisionSensor's config, " "only one of output_shape and output_channels can be not None." ) shape: Optional[Tuple[int, ...]] = None if output_shape is not None: shape = output_shape elif self._height is not None and output_channels is not None: shape = ( cast(int, self._height), cast(int, self._width), cast(int, output_channels), ) if not self._should_normalize or shape is None or len(shape) == 1: return gym.spaces.Box( low=np.float32(unnormalized_infimum), high=np.float32(unnormalized_supremum), shape=shape, ) else: out_shape = shape[:-1] + (1,) low = np.tile( (unnormalized_infimum - cast(np.ndarray, self._norm_means)) / cast(np.ndarray, self._norm_sds), out_shape, ) high = np.tile( (unnormalized_supremum - cast(np.ndarray, self._norm_means)) / cast(np.ndarray, self._norm_sds), out_shape, ) return gym.spaces.Box(low=np.float32(low), high=np.float32(high)) def _get_observation_space(self): return self._observation_space @property def height(self) -> Optional[int]: """Height that input image will be rescale to have. # Returns The height as a non-negative integer or `None` if no rescaling is done. """ return self._height @property def width(self) -> Optional[int]: """Width that input image will be rescale to have. # Returns The width as a non-negative integer or `None` if no rescaling is done. """ return self._width @abstractmethod def frame_from_env(self, env: EnvType, task: Optional[SubTaskType]) -> np.ndarray: raise NotImplementedError def process_img(self, img: np.ndarray): assert ( np.issubdtype(img.dtype, np.float32) and (len(img.shape) == 2 or img.shape[-1] == 1) ) or (img.shape[-1] == 3 and np.issubdtype(img.dtype, np.uint8)), ( "Input frame must either have 3 channels and be of" " type np.uint8 or have one channel and be of type np.float32" ) if ( self._scale_first and self.scaler is not None and img.shape[:2] != (self._height, self._width) ): img = np.array(self.scaler(self.to_pil(img)), dtype=img.dtype) # hwc elif np.issubdtype(img.dtype, np.float32): img = img.copy() assert img.dtype in [np.uint8, np.float32] if np.issubdtype(img.dtype, np.uint8): img = img.astype(np.float32) / 255.0 if self._should_normalize: img -= self._norm_means img /= self._norm_sds if ( (not self._scale_first) and self.scaler is not None and img.shape[:2] != (self._height, self._width) ): img = np.array(self.scaler(self.to_pil(img)), dtype=np.float32) # hwc return img def get_observation( self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any ) -> Any: return self.process_img(self.frame_from_env(env=env, task=task)) class RGBSensor(VisionSensor[EnvType, SubTaskType], ABC): def __init__( self, use_resnet_normalization: bool = False, mean: Optional[Union[np.ndarray, Sequence[float]]] = IMAGENET_RGB_MEANS, stdev: Optional[Union[np.ndarray, Sequence[float]]] = IMAGENET_RGB_STDS, height: Optional[int] = None, width: Optional[int] = None, uuid: str = "rgb", output_shape: Optional[Tuple[int, ...]] = None, output_channels: int = 3, unnormalized_infimum: float = 0.0, unnormalized_supremum: float = 1.0, scale_first: bool = True, **kwargs: Any ): """Initializer. # Parameters use_resnet_normalization : Whether to apply image normalization with the given `mean` and `stdev`. mean : The images will be normalized with the given mean if `use_resnet_normalization` is True (default `[0.485, 0.456, 0.406]`, i.e. the standard resnet normalization mean). stdev : The images will be normalized with the given standard deviation if `use_resnet_normalization` is True (default `[0.229, 0.224, 0.225]`, i.e. the standard resnet normalization standard deviation). height: If it's a non-negative integer and `width` is also non-negative integer, the image returned from the environment will be rescaled to have `height` rows and `width` columns using bilinear sampling. width: If it's a non-negative integer and `height` is also non-negative integer, the image returned from the environment will be rescaled to have `height` rows and `width` columns using bilinear sampling. uuid: The universally unique identifier for the sensor. output_shape: Optional observation space shape (alternative to `output_channels`). output_channels: Optional observation space number of channels (alternative to `output_shape`). unnormalized_infimum: Lower limit(s) for the observation space range. unnormalized_supremum: Upper limit(s) for the observation space range. scale_first: Whether to scale image before normalization (if needed). kwargs : Extra kwargs. Currently unused. """ if not use_resnet_normalization: mean, stdev = None, None if isinstance(mean, tuple): mean = np.array(mean, dtype=np.float32).reshape((1, 1, len(mean))) if isinstance(stdev, tuple): stdev = np.array(stdev, dtype=np.float32).reshape((1, 1, len(stdev))) super().__init__(**prepare_locals_for_super(locals())) class DepthSensor(VisionSensor[EnvType, SubTaskType], ABC): def __init__( self, use_normalization: bool = False, mean: Optional[Union[np.ndarray, float]] = 0.5, stdev: Optional[Union[np.ndarray, float]] = 0.25, height: Optional[int] = None, width: Optional[int] = None, uuid: str = "depth", output_shape: Optional[Tuple[int, ...]] = None, output_channels: int = 1, unnormalized_infimum: float = 0.0, unnormalized_supremum: float = 5.0, scale_first: bool = True, **kwargs: Any ): """Initializer. # Parameters config : If `config["use_normalization"]` is `True` then the depth images will be normalized with mean 0.5 and standard deviation 0.25. If both `config["height"]` and `config["width"]` are non-negative integers then the depth image returned from the environment will be rescaled to have shape (config["height"], config["width"]) using bilinear sampling. use_normalization : Whether to apply image normalization with the given `mean` and `stdev`. mean : The images will be normalized with the given mean if `use_normalization` is True (default 0.5). stdev : The images will be normalized with the given standard deviation if `use_normalization` is True (default 0.25). height: If it's a non-negative integer and `width` is also non-negative integer, the image returned from the environment will be rescaled to have `height` rows and `width` columns using bilinear sampling. width: If it's a non-negative integer and `height` is also non-negative integer, the image returned from the environment will be rescaled to have `height` rows and `width` columns using bilinear sampling. uuid: The universally unique identifier for the sensor. output_shape: Optional observation space shape (alternative to `output_channels`). output_channels: Optional observation space number of channels (alternative to `output_shape`). unnormalized_infimum: Lower limit(s) for the observation space range. unnormalized_supremum: Upper limit(s) for the observation space range. scale_first: Whether to scale image before normalization (if needed). kwargs : Extra kwargs. Currently unused. """ if not use_normalization: mean, stdev = None, None if isinstance(mean, float): mean = np.array(mean, dtype=np.float32).reshape(1, 1) if isinstance(stdev, float): stdev = np.array(stdev, dtype=np.float32).reshape(1, 1) super().__init__(**prepare_locals_for_super(locals())) def get_observation( # type: ignore self, env: EnvType, task: Optional[SubTaskType], *args: Any, **kwargs: Any ) -> Any: depth = super().get_observation(env, task, *args, **kwargs) depth = np.expand_dims(depth, 2) return depth ================================================ FILE: allenact/embodiedai/storage/__init__.py ================================================ ================================================ FILE: allenact/embodiedai/storage/vdr_storage.py ================================================ import math import random from collections import defaultdict from typing import Union, Tuple, Optional, Dict, Callable, cast, Sequence import torch import torch.nn.functional as F from allenact.algorithms.onpolicy_sync.policy import ObservationType from allenact.algorithms.onpolicy_sync.storage import ( MiniBatchStorageMixin, ExperienceStorage, ) from allenact.base_abstractions.misc import ( GenericAbstractLoss, ModelType, Memory, LossOutput, ) from allenact.utils.misc_utils import unzip, partition_sequence def _index_recursive(d: Dict, key: Union[str, Tuple[str, ...]]): if isinstance(key, str): return d[key] for k in key: d = d[k] return d class InverseDynamicsVDRLoss(GenericAbstractLoss): def __init__( self, compute_action_logits_fn: Callable, img0_key: str, img1_key: str, action_key: str, ): self.compute_action_logits_fn = compute_action_logits_fn self.img0_key = img0_key self.img1_key = img1_key self.action_key = action_key def loss( self, *, model: ModelType, batch: ObservationType, batch_memory: Memory, stream_memory: Memory, ) -> LossOutput: action_logits = self.compute_action_logits_fn( model=model, img0=batch[self.img0_key], img1=batch[self.img1_key], ) loss = F.cross_entropy(action_logits, target=batch[self.action_key]) return LossOutput( value=loss, info={"cross_entropy": loss.item()}, per_epoch_info={}, batch_memory=batch_memory, stream_memory=stream_memory, bsize=int(batch[self.img0_key].shape[0]), ) class DiscreteVisualDynamicsReplayStorage(ExperienceStorage, MiniBatchStorageMixin): def __init__( self, image_uuid: Union[str, Tuple[str, ...]], action_success_uuid: Optional[Union[str, Tuple[str, ...]]], nactions: int, num_to_store_per_action: int, max_to_save_per_episode: int, target_batch_size: int, extra_targets: Optional[Sequence] = None, ): self.image_uuid = image_uuid self.action_success_uuid = action_success_uuid self.nactions = nactions self.num_to_store_per_action = num_to_store_per_action self.max_to_save_per_episode = max_to_save_per_episode self.target_batch_size = target_batch_size self.extra_targets = extra_targets if extra_targets is not None else [] self._prev_imgs: Optional[torch.Tensor] = None self.action_to_saved_transitions = {i: [] for i in range(nactions)} self.action_to_num_seen = {i: 0 for i in range(nactions)} self.task_sampler_to_actions_already_sampled = defaultdict(lambda: set()) self.device = torch.device("cpu") self._total_samples_returned_in_batches = 0 @property def total_experiences(self): return self._total_samples_returned_in_batches def set_partition(self, index: int, num_parts: int): self.num_to_store_per_action = math.ceil( self.num_to_store_per_action / num_parts ) self.target_batch_size = math.ceil(self.target_batch_size / num_parts) def initialize(self, *, observations: ObservationType, **kwargs): self._prev_imgs = None self.add(observations=observations, actions=None, masks=None) def batched_experience_generator(self, num_mini_batch: int): triples = [ (i0, a, i1) for a, v in self.action_to_saved_transitions.items() for (i0, i1) in v ] random.shuffle(triples) if len(triples) == 0: return parts = partition_sequence( triples, math.ceil(len(triples) / self.target_batch_size) ) for part in parts: img0s, actions, img1s = unzip(part, n=3) img0 = torch.stack([i0.to(self.device) for i0 in img0s], 0) action = torch.tensor(actions, device=self.device) img1 = torch.stack([i1.to(self.device) for i1 in img1s], 0) self._total_samples_returned_in_batches += img0.shape[0] yield {"img0": img0, "action": action, "img1": img1} def add( self, *, observations: ObservationType, actions: Optional[torch.Tensor], masks: Optional[torch.Tensor], **kwargs, ): cur_imgs = cast( torch.Tensor, _index_recursive(d=observations, key=self.image_uuid).cpu() ) if self._prev_imgs is not None: actions = actions.view(-1).cpu().numpy() masks = masks.view(-1).cpu().numpy() if self.action_success_uuid is not None: action_successes = ( observations[self.action_success_uuid].cpu().view(-1).numpy() ) else: action_successes = [True] * actions.shape[0] extra = {} for et in self.extra_targets: extra[et] = observations[et][0].cpu().numpy() nsamplers = actions.shape[0] assert nsamplers == masks.shape[0] for i, (a, m, action_success) in enumerate( zip(actions, masks, action_successes) ): actions_already_sampled_in_ep = ( self.task_sampler_to_actions_already_sampled[i] ) if ( m != 0 and action_success and ( len(actions_already_sampled_in_ep) <= self.max_to_save_per_episode ) and a not in actions_already_sampled_in_ep ): # Not the start of a new episode/task -> self._prev_imgs[i] corresponds to cur_imgs[i] saved_transitions = self.action_to_saved_transitions[a] if len(saved_transitions) < self.num_to_store_per_action: saved_transitions.append((self._prev_imgs[i], cur_imgs[i])) else: saved_transitions[ random.randint(0, len(saved_transitions) - 1) ] = ( self._prev_imgs[i], cur_imgs[i], ) # Reservoir sampling transitions # a = int(a) # saved_transitions = self.action_to_saved_transitions[a] # num_seen = self.action_to_num_seen[a] # if num_seen < self.triples_to_save_per_action: # saved_transitions.append((self._prev_imgs[i], cur_imgs[i])) # else: # index = random.randint(0, num_seen) # if index < self.triples_to_save_per_action: # saved_transitions[index] = (self._prev_imgs[i], cur_imgs[i]) actions_already_sampled_in_ep.add(a) self.action_to_num_seen[a] += 1 else: actions_already_sampled_in_ep.clear() self._prev_imgs = cur_imgs def before_updates(self, **kwargs): pass def after_updates(self, **kwargs): pass def to(self, device: torch.device): self.device = device ================================================ FILE: allenact/main.py ================================================ """Entry point to training/validating/testing for a user given experiment name.""" import os if "CUDA_DEVICE_ORDER" not in os.environ: # Necessary to order GPUs correctly in some cases os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" import argparse import ast import importlib import inspect import json from typing import Dict, List, Optional, Tuple, Type from setproctitle import setproctitle as ptitle from allenact import __version__ from allenact.algorithms.onpolicy_sync.runner import ( CONFIG_KWARGS_STR, OnPolicyRunner, SaveDirFormat, ) from allenact.base_abstractions.experiment_config import ExperimentConfig from allenact.utils.system import HUMAN_LOG_LEVELS, get_logger, init_logging def get_argument_parser(): """Creates the argument parser.""" # noinspection PyTypeChecker parser = argparse.ArgumentParser( description="allenact", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "experiment", type=str, help="the path to experiment config file relative the 'experiment_base' directory" " (see the `--experiment_base` flag).", ) parser.add_argument( "--eval", dest="eval", action="store_true", required=False, help="if you pass the `--eval` flag, AllenAct will run inference on your experiment configuration." " You will need to specify which experiment checkpoints to run evaluation using the `--checkpoint`" " flag.", ) parser.set_defaults(eval=False) parser.add_argument( "--config_kwargs", type=str, default=None, required=False, help="sometimes it is useful to be able to pass additional key-word arguments" " to `__init__` when initializing an experiment configuration. This flag can be used" " to pass such key-word arugments by specifying them with json, e.g." '\n\t--config_kwargs \'{"gpu_id": 0, "my_important_variable": [1,2,3]}\'' "\nTo see which arguments are supported for your experiment see the experiment" " config's `__init__` function. If the value passed to this function is a file path" " then we will try to load this file path as a json object and use this json object" " as key-word arguments.", ) parser.add_argument( "--extra_tag", type=str, default="", required=False, help="Add an extra tag to the experiment when trying out new ideas (will be used" " as a subdirectory of the tensorboard path so you will be able to" " search tensorboard logs using this extra tag). This can also be used to add an extra" " organization when running evaluation (e.g. `--extra_tag running_eval_on_great_idea_12`)", ) parser.add_argument( "-o", "--output_dir", required=False, type=str, default="experiment_output", help="experiment output folder", ) parser.add_argument( "--save_dir_fmt", required=False, type=lambda s: SaveDirFormat[s.upper()], default="flat", help="The file structure to use when saving results from allenact." " See documentation o f`SaveDirFormat` for more details." " Allowed values are ('flat' and 'nested'). Default: 'flat'.", ) parser.add_argument( "-s", "--seed", required=False, default=None, type=int, help="random seed", ) parser.add_argument( "-b", "--experiment_base", required=False, default=os.getcwd(), type=str, help="experiment configuration base folder (default: working directory)", ) parser.add_argument( "-c", "--checkpoint", required=False, default=None, type=str, help="optional checkpoint file name to resume training on or run testing with. When testing (see the `--eval` flag) this" " argument can be used very flexibly as:" "\n(1) the path to a particular individual checkpoint file," "\n(2) the path to a directory of checkpoint files all of which you'd like to be evaluated" " (checkpoints are expected to have a `.pt` file extension)," '\n(3) a "glob" pattern (https://tldp.org/LDP/abs/html/globbingref.html) that will be expanded' " using python's `glob.glob` function and should return a collection of checkpoint files." "\nIf you'd like to only evaluate a subset of the checkpoints specified by the above directory/glob" " (e.g. every checkpoint saved after 5mil steps) you'll likely want to use the `--approx_ckpt_step_interval`" " flag.", ) parser.add_argument( "--infer_output_dir", dest="infer_output_dir", action="store_true", required=False, help="applied when evaluating checkpoint(s) in nested save_dir_fmt: if specified, the output dir will be inferred from checkpoint path.", ) parser.add_argument( "--approx_ckpt_step_interval", required=False, default=None, type=float, help="if running tests on a collection of checkpoints (see the `--checkpoint` flag) this argument can be" " used to skip checkpoints. In particular, if this value is specified and equals `n` then we will" " only evaluate checkpoints whose step count is closest to each of `0*n`, `1*n`, `2*n`, `3*n`, ... " " n * ceil(max training steps in ckpts / n). Note that 'closest to' is important here as AllenAct does" " not generally save checkpoints at exact intervals (doing so would result in performance degregation" " in distributed training).", ) parser.add_argument( "-r", "--restart_pipeline", dest="restart_pipeline", action="store_true", required=False, help="for training, if checkpoint is specified, DO NOT continue the training pipeline from where" " training had previously ended. Instead restart the training pipeline from scratch but" " with the model weights from the checkpoint.", ) parser.set_defaults(restart_pipeline=False) parser.add_argument( "-d", "--deterministic_cudnn", dest="deterministic_cudnn", action="store_true", required=False, help="sets CuDNN to deterministic mode", ) parser.set_defaults(deterministic_cudnn=False) parser.add_argument( "-m", "--max_sampler_processes_per_worker", required=False, default=None, type=int, help="maximal number of sampler processes to spawn for each worker", ) parser.add_argument( "-e", "--deterministic_agents", dest="deterministic_agents", action="store_true", required=False, help="enable deterministic agents (i.e. always taking the mode action) during validation/testing", ) parser.set_defaults(deterministic_agents=False) parser.add_argument( "-l", "--log_level", default="info", type=str, required=False, help="sets the log_level. it must be one of {}.".format( ", ".join(HUMAN_LOG_LEVELS) ), ) parser.add_argument( "-i", "--disable_tensorboard", dest="disable_tensorboard", action="store_true", required=False, help="disable tensorboard logging", ) parser.set_defaults(disable_tensorboard=False) parser.add_argument( "-a", "--disable_config_saving", dest="disable_config_saving", action="store_true", required=False, help="disable saving the used config in the output directory", ) parser.set_defaults(disable_config_saving=False) parser.add_argument( "--collect_valid_results", dest="collect_valid_results", action="store_true", required=False, help="enables returning and saving valid results during training", ) parser.set_defaults(collect_valid_results=False) parser.add_argument( "--valid_on_initial_weights", dest="valid_on_initial_weights", action="store_true", required=False, help="enables running validation on the model with initial weights", ) parser.set_defaults(valid_on_initial_weights=False) parser.add_argument( "--test_expert", dest="test_expert", action="store_true", required=False, help="use expert during test", ) parser.set_defaults(test_expert=False) parser.add_argument( "--version", action="version", version=f"allenact {__version__}" ) parser.add_argument( "--distributed_ip_and_port", dest="distributed_ip_and_port", required=False, type=str, default="127.0.0.1:0", help="IP address and port of listener for distributed process with rank 0." " Port number 0 lets runner choose a free port. For more details, please follow the" " tutorial https://allenact.org/tutorials/distributed-objectnav-tutorial/.", ) parser.add_argument( "--machine_id", dest="machine_id", required=False, type=int, default=0, help="ID for machine in distributed runs. For more details, please follow the" " tutorial https://allenact.org/tutorials/distributed-objectnav-tutorial/", ) parser.add_argument( "--save_ckpt_at_every_host", dest="save_ckpt_at_every_host", action="store_true", required=False, help="if you pass the `--save_ckpt_at_every_host` flag, AllenAct will save checkpoints at every host as the" " the training progresses in distributed training mode.", ) parser.set_defaults(save_ckpt_at_every_host=False) parser.add_argument( "--callbacks", dest="callbacks", required=False, type=str, default="", help="Comma-separated list of files with Callback classes to use.", ) parser.add_argument( "--enable_crash_recovery", dest="enable_crash_recovery", default=False, action="store_true", required=False, help="Whether or not to try recovering when a task crashes (use at your own risk).", ) ### DEPRECATED FLAGS parser.add_argument( "-t", "--test_date", default=None, type=str, required=False, help="`--test_date` has been deprecated. Please use `--eval` instead.", ) parser.add_argument( "--approx_ckpt_steps_count", required=False, default=None, type=float, help="`--approx_ckpt_steps_count` has been deprecated." " Please specify the checkpoint directly using the '--checkpoint' flag.", ) parser.add_argument( "-k", "--skip_checkpoints", required=False, default=0, type=int, help="`--skip_checkpoints` has been deprecated. Please use `--approx_ckpt_steps_count` instead.", ) ### END DEPRECATED FLAGS return parser def get_args(): """Creates the argument parser and parses any input arguments.""" parser = get_argument_parser() args = parser.parse_args() # check for deprecated deprecated_flags = ["test_date", "skip_checkpoints", "approx_ckpt_steps_count"] for df in deprecated_flags: df_info = parser._option_string_actions[f"--{df}"] if getattr(args, df) is not df_info.default: raise RuntimeError(df_info.help) return args def _config_source(config_type: Type) -> Dict[str, str]: if config_type is ExperimentConfig: return {} try: module_file_path = inspect.getfile(config_type) module_dot_path = config_type.__module__ sources_dict = {module_file_path: module_dot_path} for super_type in config_type.__bases__: sources_dict.update(_config_source(super_type)) return sources_dict except TypeError as _: return {} def find_sub_modules(path: str, module_list: Optional[List] = None): if module_list is None: module_list = [] path = os.path.abspath(path) if path[-3:] == ".py": module_list.append(path) elif os.path.isdir(path): contents = os.listdir(path) if any(key in contents for key in ["__init__.py", "setup.py"]): new_paths = [os.path.join(path, f) for f in os.listdir(path)] for new_path in new_paths: find_sub_modules(new_path, module_list) return module_list def load_config(args) -> Tuple[ExperimentConfig, Dict[str, str]]: assert os.path.exists( args.experiment_base ), "The path '{}' does not seem to exist (your current working directory is '{}').".format( args.experiment_base, os.getcwd() ) rel_base_dir = os.path.relpath( # Normalizing string representation of path os.path.abspath(args.experiment_base), os.getcwd() ) rel_base_dot_path = rel_base_dir.replace("/", ".") if rel_base_dot_path == ".": rel_base_dot_path = "" exp_dot_path = args.experiment if exp_dot_path[-3:] == ".py": exp_dot_path = exp_dot_path[:-3] exp_dot_path = exp_dot_path.replace("/", ".") module_path = ( f"{rel_base_dot_path}.{exp_dot_path}" if len(rel_base_dot_path) != 0 else exp_dot_path ) try: importlib.invalidate_caches() module = importlib.import_module(module_path) except ModuleNotFoundError as e: if not any(isinstance(arg, str) and module_path in arg for arg in e.args): raise e all_sub_modules = set(find_sub_modules(os.getcwd())) desired_config_name = module_path.split(".")[-1] relevant_submodules = [ sm for sm in all_sub_modules if desired_config_name in os.path.basename(sm) ] raise ModuleNotFoundError( f"Could not import experiment '{module_path}', are you sure this is the right path?" f" Possibly relevant files include {relevant_submodules}." f" Note that the experiment must be reachable along your `PYTHONPATH`, it might" f" be helpful for you to run `export PYTHONPATH=$PYTHONPATH:$PWD` in your" f" project's top level directory." ) from e experiments = [ m[1] for m in inspect.getmembers(module, inspect.isclass) if m[1].__module__ == module.__name__ and issubclass(m[1], ExperimentConfig) ] assert ( len(experiments) == 1 ), "Too many or two few experiments defined in {}".format(module_path) config_kwargs = {} if args.config_kwargs is not None: if os.path.exists(args.config_kwargs): with open(args.config_kwargs, "r") as f: config_kwargs = json.load(f) else: try: config_kwargs = json.loads(args.config_kwargs) except json.JSONDecodeError: get_logger().warning( f"The input for --config_kwargs ('{args.config_kwargs}')" f" does not appear to be valid json. Often this is due to" f" json requiring very specific syntax (e.g. double quoted strings)" f" we'll try to get around this by evaluating with `ast.literal_eval`" f" (a safer version of the standard `eval` function)." ) config_kwargs = ast.literal_eval(args.config_kwargs) assert isinstance( config_kwargs, Dict ), "`--config_kwargs` must be a json string (or a path to a .json file) that evaluates to a dictionary." config = experiments[0](**config_kwargs) sources = _config_source(config_type=experiments[0]) sources[CONFIG_KWARGS_STR] = json.dumps(config_kwargs) return config, sources def main(): args = get_args() init_logging(args.log_level) get_logger().info("Running with args {}".format(args)) ptitle("Master: {}".format("Training" if args.eval is None else "Evaluation")) cfg, srcs = load_config(args) if not args.eval: OnPolicyRunner( config=cfg, output_dir=args.output_dir, save_dir_fmt=args.save_dir_fmt, loaded_config_src_files=srcs, seed=args.seed, mode="train", deterministic_cudnn=args.deterministic_cudnn, deterministic_agents=args.deterministic_agents, extra_tag=args.extra_tag, disable_tensorboard=args.disable_tensorboard, disable_config_saving=args.disable_config_saving, distributed_ip_and_port=args.distributed_ip_and_port, machine_id=args.machine_id, callbacks_paths=args.callbacks, ).start_train( checkpoint=args.checkpoint, restart_pipeline=args.restart_pipeline, max_sampler_processes_per_worker=args.max_sampler_processes_per_worker, collect_valid_results=args.collect_valid_results, valid_on_initial_weights=args.valid_on_initial_weights, try_restart_after_task_error=args.enable_crash_recovery, save_ckpt_at_every_host=save_ckpt_at_every_host, ) else: OnPolicyRunner( config=cfg, output_dir=args.output_dir, save_dir_fmt=args.save_dir_fmt, loaded_config_src_files=srcs, seed=args.seed, mode="test", deterministic_cudnn=args.deterministic_cudnn, deterministic_agents=args.deterministic_agents, extra_tag=args.extra_tag, disable_tensorboard=args.disable_tensorboard, disable_config_saving=args.disable_config_saving, distributed_ip_and_port=args.distributed_ip_and_port, machine_id=args.machine_id, callbacks_paths=args.callbacks, ).start_test( checkpoint_path_dir_or_pattern=args.checkpoint, infer_output_dir=args.infer_output_dir, approx_ckpt_step_interval=args.approx_ckpt_step_interval, max_sampler_processes_per_worker=args.max_sampler_processes_per_worker, inference_expert=args.test_expert, ) if __name__ == "__main__": main() ================================================ FILE: allenact/setup.py ================================================ import os from pathlib import Path from setuptools import find_packages, setup def parse_req_file(fname, initial=None): """Reads requires.txt file generated by setuptools and outputs a new/updated dict of extras as keys and corresponding lists of dependencies as values. The input file's contents are similar to a `ConfigParser` file, e.g. pkg_1 pkg_2 pkg_3 [extras1] pkg_4 pkg_5 [extras2] pkg_6 pkg_7 """ reqs = {} if initial is None else initial cline = None with open(fname, "r") as f: for line in f.readlines(): line = line[:-1].strip() if len(line) == 0: continue if line[0] == "[": # Add new key for current extras (if missing in dict) cline = line[1:-1].strip() if cline not in reqs: reqs[cline] = [] else: # Only keep dependencies from extras if cline is not None: reqs[cline].append(line) return reqs def get_version(fname): """Reads PKG-INFO file generated by setuptools and extracts the Version number.""" res = "UNK" with open(fname, "r") as f: for line in f.readlines(): line = line[:-1] if line.startswith("Version:"): res = line.replace("Version:", "").strip() break if res in ["UNK", ""]: raise ValueError(f"Missing Version number in {fname}") return res def _do_setup(): base_dir = os.path.abspath(os.path.dirname(Path(__file__))) if not os.path.exists( os.path.join(base_dir, "allenact.egg-info/dependency_links.txt") ): # Build mode for sdist os.chdir(os.path.join(base_dir, "..")) with open(".VERSION", "r") as f: __version__ = f.readline().strip() # Extra dependencies for development (actually unnecessary) extras = { "dev": [ l.strip() for l in open("dev_requirements.txt", "r").readlines() if l.strip() != "" ] } else: # Install mode from sdist __version__ = get_version(os.path.join(base_dir, "allenact.egg-info/PKG-INFO")) extras = parse_req_file( os.path.join(base_dir, "allenact.egg-info/requires.txt") ) setup( name="allenact", version=__version__, description="AllenAct framework", long_description=( "AllenAct is a modular and flexible learning framework designed with" " a focus on the unique requirements of Embodied-AI research." ), classifiers=[ "Intended Audience :: Science/Research", "Development Status :: 3 - Alpha", "License :: OSI Approved :: MIT License", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Programming Language :: Python", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ], keywords=["reinforcement learning", "embodied-AI", "AI", "RL", "SLAM"], url="https://github.com/allenai/allenact", author="Allen Institute for Artificial Intelligence", author_email="lucaw@allenai.org", license="MIT", packages=find_packages(include=["allenact", "allenact.*"]), install_requires=[ "gym==0.17.*", # Newer versions of gym are now broken with updates to setuptools "torch>=1.6.0,!=1.8.0", "torchvision>=0.7.0,<=0.16.2", "tensorboardx>=2.1", "setproctitle", "moviepy>=1.0.3", "filelock", "numpy>=1.19.1", "Pillow>=8.2.0,<10.3.0", "matplotlib>=3.3.1", "networkx", "opencv-python", "wheel>=0.36.2", "attrs>=21.4.0", "scipy>=1.5.4", ], setup_requires=["pytest-runner"], tests_require=["pytest", "pytest-cov", "compress_pickle"], entry_points={"console_scripts": ["allenact=allenact.main:main"]}, extras_require=extras, ) if __name__ == "__main__": _do_setup() ================================================ FILE: allenact/utils/__init__.py ================================================ ================================================ FILE: allenact/utils/cache_utils.py ================================================ import math from typing import Dict, Any, Union, Callable, Optional from allenact.utils.system import get_logger def pos_to_str_for_cache(pos: Dict[str, float]) -> str: return "_".join([str(pos["x"]), str(pos["y"]), str(pos["z"])]) def str_to_pos_for_cache(s: str) -> Dict[str, float]: split = s.split("_") return {"x": float(split[0]), "y": float(split[1]), "z": float(split[2])} def get_distance( cache: Dict[str, Any], pos: Dict[str, float], target: Dict[str, float] ) -> float: pos = { "x": 0.25 * math.ceil(pos["x"] / 0.25), "y": pos["y"], "z": 0.25 * math.ceil(pos["z"] / 0.25), } sp = _get_shortest_path_distance_from_cache(cache, pos, target) if sp == -1.0: pos = { "x": 0.25 * math.floor(pos["x"] / 0.25), "y": pos["y"], "z": 0.25 * math.ceil(pos["z"] / 0.25), } sp = _get_shortest_path_distance_from_cache(cache, pos, target) if sp == -1.0: pos = { "x": 0.25 * math.ceil(pos["x"] / 0.25), "y": pos["y"], "z": 0.25 * math.floor(pos["z"] / 0.25), } sp = _get_shortest_path_distance_from_cache(cache, pos, target) if sp == -1.0: pos = { "x": 0.25 * math.floor(pos["x"] / 0.25), "y": pos["y"], "z": 0.25 * math.floor(pos["z"] / 0.25), } sp = _get_shortest_path_distance_from_cache(cache, pos, target) if sp == -1.0: pos = find_nearest_point_in_cache(cache, pos) sp = _get_shortest_path_distance_from_cache(cache, pos, target) if sp == -1.0: target = find_nearest_point_in_cache(cache, target) sp = _get_shortest_path_distance_from_cache(cache, pos, target) if sp == -1.0: print("Your cache is incomplete!") exit() return sp def get_distance_to_object( cache: Dict[str, Any], pos: Dict[str, float], target_class: str ) -> float: dists = [] weights = [] for rounder_func_0 in [math.ceil, math.floor]: for rounder_func_1 in [math.ceil, math.floor]: rounded_pos = { "x": 0.25 * rounder_func_0(pos["x"] / 0.25), "y": pos["y"], "z": 0.25 * rounder_func_1(pos["z"] / 0.25), } dist = _get_shortest_path_distance_to_object_from_cache( cache, rounded_pos, target_class ) if dist >= 0: dists.append(dist) weights.append( 1.0 / ( math.sqrt( (pos["x"] - rounded_pos["x"]) ** 2 + (pos["z"] - rounded_pos["z"]) ** 2 ) + 1e6 ) ) if len(dists) == 0: raise RuntimeError("Your cache is incomplete!") total_weight = sum(weights) weights = [w / total_weight for w in weights] return sum(d * w for d, w in zip(dists, weights)) def _get_shortest_path_distance_from_cache( cache: Dict[str, Any], position: Dict[str, float], target: Dict[str, float] ) -> float: try: return cache[pos_to_str_for_cache(position)][pos_to_str_for_cache(target)][ "distance" ] except KeyError: return -1.0 def _get_shortest_path_distance_to_object_from_cache( cache: Dict[str, Any], position: Dict[str, float], target_class: str ) -> float: try: return cache[pos_to_str_for_cache(position)][target_class]["distance"] except KeyError: return -1.0 def find_nearest_point_in_cache( cache: Dict[str, Any], point: Dict[str, float] ) -> Dict[str, float]: best_delta = float("inf") closest_point: Dict[str, float] = {} for p in cache: pos = str_to_pos_for_cache(p) delta = ( abs(point["x"] - pos["x"]) + abs(point["y"] - pos["y"]) + abs(point["z"] - pos["z"]) ) if delta < best_delta: best_delta = delta closest_point = pos return closest_point class DynamicDistanceCache(object): def __init__(self, rounding: Optional[int] = None): self.cache: Dict[str, Any] = {} self.rounding = rounding self.hits = 0 self.misses = 0 self.num_accesses = 0 def find_distance( self, scene_name: str, position: Dict[str, Any], target: Union[Dict[str, Any], str], native_distance_function: Callable[ [Dict[str, Any], Union[Dict[str, Any], str]], float ], ) -> float: # Convert the position to its rounded string representation position_str = scene_name + self._pos_to_str(position) # If the target is also a position, convert it to its rounded string representation if isinstance(target, str): target_str = target else: target_str = self._pos_to_str(target) if position_str not in self.cache: self.cache[position_str] = {} if target_str not in self.cache[position_str]: self.cache[position_str][target_str] = native_distance_function( position, target ) self.misses += 1 else: self.hits += 1 self.num_accesses += 1 if self.num_accesses % 1000 == 0: get_logger().debug("Cache Miss-Hit Ratio: %.4f" % (self.misses / self.hits)) return self.cache[position_str][target_str] def invalidate(self): self.cache = [] def _pos_to_str(self, pos: Dict[str, Any]) -> str: if self.rounding: pos = {k: round(v, self.rounding) for k, v in pos.items()} return str(pos) ================================================ FILE: allenact/utils/cacheless_frcnn.py ================================================ from typing import List, Any import torch from torchvision.models.detection.backbone_utils import resnet_fpn_backbone from torchvision.models.detection.faster_rcnn import FasterRCNN # noinspection PyProtectedMember from torchvision.models.detection.faster_rcnn import model_urls from torchvision.models.detection.rpn import AnchorGenerator from torchvision.models.utils import load_state_dict_from_url class CachelessAnchorGenerator(AnchorGenerator): def forward(self, image_list: Any, feature_maps: Any): grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps]) image_size = image_list.tensors.shape[-2:] strides = [ [int(image_size[0] / g[0]), int(image_size[1] / g[1])] for g in grid_sizes ] dtype, device = feature_maps[0].dtype, feature_maps[0].device self.set_cell_anchors(dtype, device) anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides) anchors = torch.jit.annotate(List[List[torch.Tensor]], []) # type:ignore for i, (image_height, image_width) in enumerate(image_list.image_sizes): anchors_in_image = [] for anchors_per_feature_map in anchors_over_all_feature_maps: anchors_in_image.append(anchors_per_feature_map) anchors.append(anchors_in_image) anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors] return anchors def fasterrcnn_resnet50_fpn( pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs ): if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone("resnet50", pretrained_backbone) anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios) model = FasterRCNN( backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs ) # min_size = 300 # max_size = 400 # anchor_sizes = ((12,), (24,), (48,), (96,), (192,)) # aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) # rpn_anchor_generator = CachelessAnchorGenerator( # anchor_sizes, aspect_ratios # ) # model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress ) model.load_state_dict(state_dict) return model ================================================ FILE: allenact/utils/experiment_utils.py ================================================ """Utility classes and functions for running and designing experiments.""" import abc import collections.abc import copy import numbers import random from collections import OrderedDict, defaultdict from typing import ( Callable, NamedTuple, Dict, Any, Union, Iterator, Optional, List, cast, Sequence, TypeVar, Generic, Tuple, ) import attr import numpy as np import torch import torch.optim as optim import wandb import shutil from allenact.algorithms.offpolicy_sync.losses.abstract_offpolicy_loss import Memory from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ) from allenact.algorithms.onpolicy_sync.storage import ( ExperienceStorage, RolloutStorage, RolloutBlockStorage, ) from allenact.base_abstractions.misc import Loss, GenericAbstractLoss from allenact.utils.misc_utils import prepare_locals_for_super from allenact.utils.system import get_logger try: # noinspection PyProtectedMember,PyUnresolvedReferences from torch.optim.lr_scheduler import _LRScheduler except (ImportError, ModuleNotFoundError): raise ImportError("`_LRScheduler` was not found in `torch.optim.lr_scheduler`") _DEFAULT_ONPOLICY_UUID = "onpolicy" def evenly_distribute_count_into_bins(count: int, nbins: int) -> List[int]: """Distribute a count into a number of bins. # Parameters count: A positive integer to be distributed, should be `>= nbins`. nbins: The number of bins. # Returns A list of positive integers which sum to `count`. These values will be as close to equal as possible (may differ by at most 1). """ assert count >= nbins, f"count ({count}) < nbins ({nbins})" res = [0] * nbins for it in range(count): res[it % nbins] += 1 return res def recursive_update( original: Union[Dict, collections.abc.MutableMapping], update: Union[Dict, collections.abc.MutableMapping], ): """Recursively updates original dictionary with entries form update dict. # Parameters original : Original dictionary to be updated. update : Dictionary with additional or replacement entries. # Returns Updated original dictionary. """ for k, v in update.items(): if isinstance(v, collections.abc.MutableMapping): original[k] = recursive_update(original.get(k, {}), v) else: original[k] = v return original ToBuildType = TypeVar("ToBuildType") class Builder(tuple, Generic[ToBuildType]): """Used to instantiate a given class with (default) parameters. Helper class that stores a class, default parameters for that class, and key word arguments that (possibly) overwrite the defaults. When calling this an object of the Builder class it generates a class of type `class_type` with parameters specified by the attributes `default` and `kwargs` (and possibly additional, overwriting, keyword arguments). # Attributes class_type : The class to be instantiated when calling the object. kwargs : Keyword arguments used to instantiate an object of type `class_type`. default : Default parameters used when instantiating the class. """ class_type: ToBuildType kwargs: Dict[str, Any] default: Dict[str, Any] # noinspection PyTypeChecker def __new__( cls, class_type: ToBuildType, kwargs: Optional[Dict[str, Any]] = None, default: Optional[Dict[str, Any]] = None, ): """Create a new Builder. For parameter descriptions see the class documentation. Note that `kwargs` and `default` can be None in which case they are set to be empty dictionaries. """ self = tuple.__new__( cls, ( class_type, kwargs if kwargs is not None else {}, default if default is not None else {}, ), ) self.class_type = class_type self.kwargs = self[1] self.default = self[2] return self def __repr__(self) -> str: return ( f"Group(class_type={self.class_type}," f" kwargs={self.kwargs}," f" default={self.default})" ) def __call__(self, **kwargs) -> ToBuildType: """Build and return a new class. # Parameters kwargs : additional keyword arguments to use when instantiating the object. These overwrite all arguments already in the `self.kwargs` and `self.default` attributes. # Returns Class of type `self.class_type` with parameters taken from `self.default`, `self.kwargs`, and any keyword arguments additionally passed to `__call__`. """ allkwargs = copy.deepcopy(self.default) recursive_update(allkwargs, self.kwargs) recursive_update(allkwargs, kwargs) return cast(Callable, self.class_type)(**allkwargs) class ScalarMeanTracker(object): """Track a collection `scalar key -> mean` pairs.""" def __init__(self) -> None: self._sums: Dict[str, float] = OrderedDict() self._counts: Dict[str, int] = OrderedDict() def add_scalars( self, scalars: Dict[str, Union[float, int]], n: Union[int, Dict[str, int]] = 1 ) -> None: """Add additional scalars to track. # Parameters scalars : A dictionary of `scalar key -> value` pairs. """ ndict = cast( Dict[str, int], (n if isinstance(n, Dict) else defaultdict(lambda: n)) # type: ignore ) for k in scalars: if k not in self._sums: self._sums[k] = ndict[k] * scalars[k] self._counts[k] = ndict[k] else: self._sums[k] += ndict[k] * scalars[k] self._counts[k] += ndict[k] def pop_and_reset(self) -> Dict[str, float]: """Return tracked means and reset. On resetting all previously tracked values are discarded. # Returns A dictionary of `scalar key -> current mean` pairs corresponding to those values added with `add_scalars`. """ means = OrderedDict( [(k, float(self._sums[k] / self._counts[k])) for k in self._sums] ) self.reset() return means def reset(self): self._sums = OrderedDict() self._counts = OrderedDict() def sums(self): return copy.copy(self._sums) def counts(self) -> Dict[str, int]: return copy.copy(self._counts) def means(self) -> Dict[str, float]: return OrderedDict( [(k, float(self._sums[k] / self._counts[k])) for k in self._sums] ) @property def empty(self): assert len(self._sums) == len( self._counts ), "Mismatched length of _sums {} and _counts {}".format( len(self._sums), len(self._counts) ) return len(self._sums) == 0 class LoggingPackage: """Data package used for logging.""" def __init__( self, mode: str, training_steps: Optional[int], storage_uuid_to_total_experiences: Dict[str, int], pipeline_stage: Optional[int] = None, checkpoint_file_name: Optional[str] = None, ) -> None: self.mode = mode self.training_steps: int = training_steps self.storage_uuid_to_total_experiences: Dict[str, int] = ( storage_uuid_to_total_experiences ) self.pipeline_stage = pipeline_stage self.metrics_tracker = ScalarMeanTracker() self.info_trackers: Dict[Tuple[str, str], ScalarMeanTracker] = {} self.metric_dicts: List[Any] = [] self.viz_data: Optional[Dict[str, List[Dict[str, Any]]]] = None self.checkpoint_file_name: Optional[str] = checkpoint_file_name self.task_callback_data: List[Any] = [] self.num_empty_metrics_dicts_added: int = 0 @property def num_non_empty_metrics_dicts_added(self) -> int: return len(self.metric_dicts) @staticmethod def _metrics_dict_is_empty( single_task_metrics_dict: Dict[str, Union[float, int]] ) -> bool: return ( len(single_task_metrics_dict) == 0 or ( len(single_task_metrics_dict) == 1 and "task_info" in single_task_metrics_dict ) or ( "success" in single_task_metrics_dict and single_task_metrics_dict["success"] is None ) ) def add_metrics_dict( self, single_task_metrics_dict: Dict[str, Union[float, int]] ) -> bool: if self._metrics_dict_is_empty(single_task_metrics_dict): self.num_empty_metrics_dicts_added += 1 return False self.metric_dicts.append(single_task_metrics_dict) self.metrics_tracker.add_scalars( {k: v for k, v in single_task_metrics_dict.items() if k != "task_info"} ) return True def add_info_dict( self, info_dict: Dict[str, Union[int, float]], n: int, stage_component_uuid: str, storage_uuid: str, ): key = (stage_component_uuid, storage_uuid) if key not in self.info_trackers: self.info_trackers[key] = ScalarMeanTracker() assert n >= 0 self.info_trackers[key].add_scalars(scalars=info_dict, n=n) class LinearDecay(object): """Linearly decay between two values over some number of steps. Obtain the value corresponding to the `i`-th step by calling an instance of this class with the value `i`. # Parameters steps : The number of steps over which to decay. startp : The starting value. endp : The ending value. """ def __init__(self, steps: int, startp: float = 1.0, endp: float = 0.0) -> None: """Initializer. See class documentation for parameter definitions. """ self.steps = steps self.startp = startp self.endp = endp def __call__(self, epoch: int) -> float: """Get the decayed value for `epoch` number of steps. # Parameters epoch : The number of steps. # Returns Decayed value for `epoch` number of steps. """ epoch = max(min(epoch, self.steps), 0) return self.startp + (self.endp - self.startp) * (epoch / float(self.steps)) class MultiLinearDecay(object): """Container for multiple stages of LinearDecay. Obtain the value corresponding to the `i`-th step by calling an instance of this class with the value `i`. # Parameters stages: List of `LinearDecay` objects to be sequentially applied for the number of steps in each stage. """ def __init__(self, stages: Sequence[LinearDecay]) -> None: """Initializer. See class documentation for parameter definitions. """ self.stages = stages self.steps = np.cumsum([stage.steps for stage in self.stages]) self.total_steps = self.steps[-1] self.stage_idx = -1 self.min_steps = 0 self.max_steps = 0 self.stage = None def __call__(self, epoch: int) -> float: """Get the decayed value factor for `epoch` number of steps. # Parameters epoch : The number of steps. # Returns Decayed value for `epoch` number of steps. """ epoch = max(min(epoch, self.total_steps), 0) while epoch >= self.max_steps and self.max_steps < self.total_steps: self.stage_idx += 1 assert self.stage_idx < len(self.stages) self.min_steps = self.max_steps self.max_steps = self.steps[self.stage_idx] self.stage = self.stages[self.stage_idx] return self.stage(epoch - self.min_steps) # noinspection PyTypeHints,PyUnresolvedReferences def set_deterministic_cudnn() -> None: """Makes cudnn deterministic. This may slow down computations. """ if torch.cuda.is_available(): torch.backends.cudnn.deterministic = True # type: ignore torch.backends.cudnn.benchmark = False # type: ignore def set_seed(seed: Optional[int] = None) -> None: """Set seeds for multiple (cpu) sources of randomness. Sets seeds for (cpu) `pytorch`, base `random`, and `numpy`. # Parameters seed : The seed to set. If set to None, keep using the current seed. """ if seed is None: return torch.manual_seed(seed) # seeds the RNG for all devices (CPU and GPUs) random.seed(seed) np.random.seed(seed) class EarlyStoppingCriterion(abc.ABC): """Abstract class for class who determines if training should stop early in a particular pipeline stage.""" @abc.abstractmethod def __call__( self, stage_steps: int, total_steps: int, training_metrics: ScalarMeanTracker, ) -> bool: """Returns `True` if training should be stopped early. # Parameters stage_steps: Total number of steps taken in the current pipeline stage. total_steps: Total number of steps taken during training so far (includes steps taken in prior pipeline stages). training_metrics: Metrics recovered over some fixed number of steps (see the `metric_accumulate_interval` attribute in the `TrainingPipeline` class) training. """ raise NotImplementedError class NeverEarlyStoppingCriterion(EarlyStoppingCriterion): """Implementation of `EarlyStoppingCriterion` which never stops early.""" def __call__( self, stage_steps: int, total_steps: int, training_metrics: ScalarMeanTracker, ) -> bool: return False class OffPolicyPipelineComponent(NamedTuple): """An off-policy component for a PipeLineStage. # Attributes data_iterator_builder: A function to instantiate a Data Iterator (with a __next__(self) method) loss_names: list of unique names assigned to off-policy losses updates: number of off-policy updates between on-policy rollout collections loss_weights : A list of floating point numbers describing the relative weights applied to the losses referenced by `loss_names`. Should be the same length as `loss_names`. If this is `None`, all weights will be assumed to be one. data_iterator_kwargs_generator: Optional generator of keyword arguments for data_iterator_builder (useful for distributed training. It takes a `cur_worker` int value, a `rollouts_per_worker` list of number of samplers per training worker, and an optional random `seed` shared by all workers, which can be None. """ data_iterator_builder: Callable[..., Iterator] loss_names: List[str] updates: int loss_weights: Optional[Sequence[float]] = None data_iterator_kwargs_generator: Callable[ [int, Sequence[int], Optional[int]], Dict ] = lambda cur_worker, rollouts_per_worker, seed: {} class TrainingSettings: """Class defining parameters used for training (within a stage or the entire pipeline). # Attributes num_mini_batch : The number of mini-batches to break a rollout into. update_repeats : The number of times we will cycle through the mini-batches corresponding to a single rollout doing gradient updates. max_grad_norm : The maximum "inf" norm of any gradient step (gradients are clipped to not exceed this). num_steps : Total number of steps a single agent takes in a rollout. gamma : Discount factor applied to rewards (should be in [0, 1]). use_gae : Whether or not to use generalized advantage estimation (GAE). gae_lambda : The additional parameter used in GAE. advance_scene_rollout_period: Optional number of rollouts before enforcing an advance scene in all samplers. save_interval : The frequency with which to save (in total agent steps taken). If `None` then *no* checkpoints will be saved. Otherwise, in addition to the checkpoints being saved every `save_interval` steps, a checkpoint will *always* be saved at the end of each pipeline stage. If `save_interval <= 0` then checkpoints will only be saved at the end of each pipeline stage. metric_accumulate_interval : The frequency with which training/validation metrics are accumulated (in total agent steps). Metrics accumulated in an interval are logged (if `should_log` is `True`) and used by the stage's early stopping criterion (if any). """ num_mini_batch: Optional[int] update_repeats: Optional[Union[int, Sequence[int]]] max_grad_norm: Optional[float] num_steps: Optional[int] gamma: Optional[float] use_gae: Optional[bool] gae_lambda: Optional[float] advance_scene_rollout_period: Optional[int] save_interval: Optional[int] metric_accumulate_interval: Optional[int] # noinspection PyUnresolvedReferences def __init__( self, num_mini_batch: Optional[int] = None, update_repeats: Optional[int] = None, max_grad_norm: Optional[float] = None, num_steps: Optional[int] = None, gamma: Optional[float] = None, use_gae: Optional[bool] = None, gae_lambda: Optional[float] = None, advance_scene_rollout_period: Optional[int] = None, save_interval: Optional[int] = None, metric_accumulate_interval: Optional[int] = None, ): self._key_to_setting = prepare_locals_for_super(locals(), ignore_kwargs=True) self._training_setting_keys = tuple(sorted(self._key_to_setting.keys())) self._defaults: Optional["TrainingSettings"] = None def keys(self) -> Tuple[str, ...]: return self._training_setting_keys def has_key(self, key: str) -> bool: return key in self._key_to_setting def set_defaults(self, defaults: "TrainingSettings"): assert self._defaults is None, "Defaults can only be set once." self._defaults = defaults def __getattr__(self, item: str): if item in self._key_to_setting: val = self._key_to_setting[item] if val is None and self._defaults is not None: val = getattr(self._defaults, item) return val else: super(TrainingSettings, self).__getattribute__(item) @attr.s(kw_only=True) class StageComponent: """A custom component for a PipelineStage, possibly including overrides to the `TrainingSettings` from the `TrainingPipeline` and `PipelineStage`. # Attributes uuid: the name of this component storage_uuid: the name of the `ExperienceStorage` that will be used with this component. loss_names: list of unique names assigned to off-policy losses training_settings: Instance of `TrainingSettings` loss_weights : A list of floating point numbers describing the relative weights applied to the losses referenced by `loss_names`. Should be the same length as `loss_names`. If this is `None`, all weights will be assumed to be one. """ uuid: str = attr.ib() storage_uuid: str = attr.ib() loss_names: Sequence[str] = attr.ib() training_settings: TrainingSettings = attr.ib( default=attr.Factory(TrainingSettings) ) @training_settings.validator def _validate_training_settings(self, attribute, value: TrainingSettings): must_be_none = [ "num_steps", "gamma", "use_gae", "gae_lambda", "advance_scene_rollout_period", "save_interval", "metric_accumulate_interval", ] for key in must_be_none: assert getattr(value, key) is None, ( f"`{key}` must be `None` in `TrainingSettings` passed to" f" `StageComponent` (as such values will be ignored). Pass such" f" settings to the `PipelineStage` or `TrainingPipeline` objects instead.", ) class PipelineStage: """A single stage in a training pipeline, possibly including overrides to the global `TrainingSettings` in `TrainingPipeline`. # Attributes loss_name : A collection of unique names assigned to losses. These will reference the `Loss` objects in a `TrainingPipeline` instance. max_stage_steps : Either the total number of steps agents should take in this stage or a Callable object (e.g. a function) loss_weights : A list of floating point numbers describing the relative weights applied to the losses referenced by `loss_name`. Should be the same length as `loss_name`. If this is `None`, all weights will be assumed to be one. teacher_forcing : If applicable, defines the probability an agent will take the expert action (as opposed to its own sampled action) at a given time point. early_stopping_criterion: An `EarlyStoppingCriterion` object which determines if training in this stage should be stopped early. If `None` then no early stopping occurs. If `early_stopping_criterion` is not `None` then we do not guarantee reproducibility when restarting a model from a checkpoint (as the `EarlyStoppingCriterion` object may store internal state which is not saved in the checkpoint). Currently, AllenAct only supports using early stopping criterion when **not** using distributed training. training_settings: Instance of `TrainingSettings`. training_settings_kwargs: For backwards compatability: arguments to instantiate TrainingSettings when `training_settings` is `None`. """ def __init__( self, *, # Disables positional arguments. Please provide arguments as keyword arguments. max_stage_steps: Union[int, Callable], loss_names: List[str], loss_weights: Optional[Sequence[float]] = None, teacher_forcing: Optional[Callable[[int], float]] = None, stage_components: Optional[Sequence[StageComponent]] = None, early_stopping_criterion: Optional[EarlyStoppingCriterion] = None, training_settings: Optional[TrainingSettings] = None, callback_to_change_engine_attributes: Optional[Dict[str, Any]] = None, **training_settings_kwargs, ): self.callback_to_change_engine_attributes = callback_to_change_engine_attributes # Populate TrainingSettings members # THIS MUST COME FIRST IN `__init__` as otherwise `__getattr__` will loop infinitely. assert training_settings is None or len(training_settings_kwargs) == 0 if training_settings is None: training_settings = TrainingSettings(**training_settings_kwargs) self.training_settings = training_settings assert self.training_settings.update_repeats is None or isinstance( self.training_settings.update_repeats, numbers.Integral ), ( "`training_settings` passed to `PipelineStage` must have `training_settings.update_repeats`" " equal to `None` or an integer. If you'd like to specify per-loss `update_repeats` then please" " do so in the training settings of a `StageComponent`." ) self.loss_names = loss_names self.max_stage_steps = max_stage_steps self.loss_weights = ( [1.0] * len(loss_names) if loss_weights is None else loss_weights ) assert len(self.loss_weights) == len(self.loss_names) self.teacher_forcing = teacher_forcing self.early_stopping_criterion = early_stopping_criterion self.steps_taken_in_stage: int = 0 self.rollout_count = 0 self.early_stopping_criterion_met = False self.uuid_to_loss_weight: Dict[str, float] = { loss_uuid: loss_weight for loss_uuid, loss_weight in zip(loss_names, self.loss_weights) } self._stage_components: List[StageComponent] = [] self.uuid_to_stage_component: Dict[str, StageComponent] = {} self.storage_uuid_to_steps_taken_in_stage: Dict[str, int] = {} self.stage_component_uuid_to_stream_memory: Dict[str, Memory] = {} if stage_components is not None: for stage_component in stage_components: self.add_stage_component(stage_component) # Sanity check for key in training_settings.keys(): assert not hasattr( self, key ), f"`{key}` should be defined in `TrainingSettings`, not in `PipelineStage`." def reset(self): self.steps_taken_in_stage: int = 0 self.rollout_count = 0 self.early_stopping_criterion_met = False for k in self.storage_uuid_to_steps_taken_in_stage: self.storage_uuid_to_steps_taken_in_stage[k] = 0 for memory in self.stage_component_uuid_to_stream_memory.values(): memory.clear() # TODO: Replace Any with the correct type def change_engine_attributes(self, engine: Any): if self.callback_to_change_engine_attributes is not None: for key, value in self.callback_to_change_engine_attributes.items(): # check if the engine has the attribute assert hasattr(engine, key) func = value["func"] args = value["args"] setattr(engine, key, func(engine, **args)) @property def stage_components(self) -> Tuple[StageComponent]: return tuple(self._stage_components) def add_stage_component(self, stage_component: StageComponent): assert stage_component.uuid not in self.uuid_to_stage_component # Setting default training settings for the `stage_component` sc_ts = stage_component.training_settings sc_ts.set_defaults(self.training_settings) # Handling the case where different losses should be updated different # numbers of times stage_update_repeats = self.training_settings.update_repeats if stage_update_repeats is not None and sc_ts.update_repeats is None: loss_to_update_repeats = dict(zip(self.loss_names, stage_update_repeats)) if isinstance(stage_update_repeats, Sequence): sc_ts.update_repeats = [ loss_to_update_repeats[uuid] for uuid in stage_component.loss_names ] else: sc_ts.update_repeats = stage_update_repeats self._stage_components.append(stage_component) self.uuid_to_stage_component[stage_component.uuid] = stage_component if ( stage_component.storage_uuid not in self.storage_uuid_to_steps_taken_in_stage ): self.storage_uuid_to_steps_taken_in_stage[stage_component.storage_uuid] = 0 else: raise NotImplementedError( "Cannot have multiple stage components which" f" use the same storage (reused storage uuid: '{stage_component.storage_uuid}'." ) self.stage_component_uuid_to_stream_memory[stage_component.uuid] = Memory() def __setattr__(self, key: str, value: Any): if key not in [ "training_settings", "callback_to_change_engine_attributes", ] and self.training_settings.has_key(key): raise NotImplementedError( f"Cannot set {key} in {self.__name__}, update the" f" `training_settings` attribute of {self.__name__} instead." ) else: return super(PipelineStage, self).__setattr__(key, value) @property def is_complete(self): return ( self.early_stopping_criterion_met or self.steps_taken_in_stage >= self.max_stage_steps ) class TrainingPipeline: """Class defining the stages (and global training settings) in a training pipeline. The training pipeline can be used as an iterator to go through the pipeline stages in, for instance, a loop. # Parameters named_losses : Dictionary mapping a the name of a loss to either an instantiation of that loss or a `Builder` that, when called, will return that loss. pipeline_stages : A list of PipelineStages. Each of these define how the agent will be trained and are executed sequentially. optimizer_builder : Builder object to instantiate the optimizer to use during training. named_storages: Map of storage names to corresponding `ExperienceStorage` instances or `Builder` objects. If this is `None` (or does not contain a value of (sub)type `RolloutStorage`) then a new `Builder[RolloutBlockStorage]` will be created and added by default. rollout_storage_uuid: Optional name of `RolloutStorage`, if `None` given, it will be assigned to the `ExperienceStorage` of subclass `RolloutStorage` in `named_storages`. Note that this assumes that there is only a single `RolloutStorage` object in the values of `named_storages`. should_log: `True` if metrics accumulated during training should be logged to the console as well as to a tensorboard file. lr_scheduler_builder : Optional builder object to instantiate the learning rate scheduler used through the pipeline. training_settings: Instance of `TrainingSettings` training_settings_kwargs: For backwards compatability: arguments to instantiate TrainingSettings when `training_settings` is `None`. """ # noinspection PyUnresolvedReferences def __init__( self, *, named_losses: Dict[str, Union[Loss, Builder[Loss]]], pipeline_stages: List[PipelineStage], optimizer_builder: Builder[optim.Optimizer], # type: ignore named_storages: Optional[ Dict[str, Union[ExperienceStorage, Builder[ExperienceStorage]]] ] = None, rollout_storage_uuid: Optional[str] = None, should_log: bool = True, lr_scheduler_builder: Optional[Builder[_LRScheduler]] = None, # type: ignore training_settings: Optional[TrainingSettings] = None, valid_pipeline_stage: Optional[PipelineStage] = None, test_pipeline_stage: Optional[PipelineStage] = None, **training_settings_kwargs, ): """Initializer. See class docstring for parameter definitions. """ # Populate TrainingSettings members assert training_settings is None or len(training_settings_kwargs) == 0 if training_settings is None: training_settings = TrainingSettings(**training_settings_kwargs) self.training_settings = training_settings assert self.training_settings.update_repeats is None or isinstance( self.training_settings.update_repeats, numbers.Integral ), ( "`training_settings` passed to `TrainingPipeline` must have `training_settings.update_repeats`" " equal to `None` or an integer. If you'd like to specify per-loss `update_repeats` then please" " do so in the training settings of a `StageComponent`." ) self.training_settings = training_settings self.optimizer_builder = optimizer_builder self.lr_scheduler_builder = lr_scheduler_builder self._named_losses = named_losses self._named_storages = self._initialize_named_storages( named_storages=named_storages ) self.rollout_storage_uuid = self._initialize_rollout_storage_uuid( rollout_storage_uuid ) if self.rollout_storage_uuid is None: get_logger().warning( f"No rollout storage was specified in the TrainingPipeline. This need not be an issue" f" if you are performing off-policy training but, otherwise, please ensure you have" f" defined a rollout storage in the `named_storages` argument of the TrainingPipeline." ) self.should_log = should_log self.pipeline_stages = pipeline_stages def if_none_then_empty_stage(stage: Optional[PipelineStage]) -> PipelineStage: return ( stage if stage is not None else PipelineStage(max_stage_steps=-1, loss_names=[]) ) self.valid_pipeline_stage = if_none_then_empty_stage(valid_pipeline_stage) self.test_pipeline_stage = if_none_then_empty_stage(test_pipeline_stage) assert ( len(self.pipeline_stages) == len(set(id(ps) for ps in pipeline_stages)) and self.valid_pipeline_stage not in self.pipeline_stages and self.test_pipeline_stage not in self.pipeline_stages ), ( "Duplicate `PipelineStage` object instances found in the pipeline stages input" " to `TrainingPipeline`. `PipelineStage` objects are not immutable, if you'd" " like to have multiple pipeline stages of the same type, please instantiate" " multiple separate instances." ) self._ensure_pipeline_stages_all_have_at_least_one_stage_component() self._current_stage: Optional[PipelineStage] = None self.rollout_count = 0 self._refresh_current_stage(force_stage_search_from_start=True) def _initialize_rollout_storage_uuid( self, rollout_storage_uuid: Optional[str] ) -> str: if rollout_storage_uuid is None: rollout_storage_uuids = self._get_uuids_of_rollout_storages( self._named_storages ) assert len(rollout_storage_uuids) <= 1, ( f"`rollout_storage_uuid` cannot be automatically inferred as there are multiple storages defined" f" (ids: {rollout_storage_uuids}) of type `RolloutStorage`." ) rollout_storage_uuid = next(iter(rollout_storage_uuids), None) assert ( rollout_storage_uuid is None or rollout_storage_uuid in self._named_storages ) return rollout_storage_uuid def _ensure_pipeline_stages_all_have_at_least_one_stage_component(self): rollout_storages_uuids = self._get_uuids_of_rollout_storages( self._named_storages ) named_pipeline_stages = { f"{i}th": ps for i, ps in enumerate(self.pipeline_stages) } named_pipeline_stages["valid"] = self.valid_pipeline_stage named_pipeline_stages["test"] = self.test_pipeline_stage for stage_name, stage in named_pipeline_stages.items(): # Forward default `TrainingSettings` to all `PipelineStage`s settings: stage.training_settings.set_defaults(defaults=self.training_settings) if len(stage.stage_components) == 0: assert len(rollout_storages_uuids) <= 1, ( f"In {stage_name} pipeline stage: you have several storages specified ({rollout_storages_uuids}) which" f" are subclasses of `RolloutStorage`. This is only allowed when stage components are explicitly" f" defined in every `PipelineStage` instance. You have `PipelineStage`s for which stage components" f" are not specified." ) if len(rollout_storages_uuids) > 0: stage.add_stage_component( StageComponent( uuid=rollout_storages_uuids[0], storage_uuid=rollout_storages_uuids[0], loss_names=stage.loss_names, training_settings=TrainingSettings(), ) ) for sc in stage.stage_components: assert sc.storage_uuid in self._named_storages, ( f"In {stage_name} pipeline stage: storage with name '{sc.storage_uuid}' not found in collection of" f" defined storages names: {list(self._named_storages.keys())}" ) if ( self.rollout_storage_uuid is not None and self.rollout_storage_uuid not in stage.storage_uuid_to_steps_taken_in_stage ): stage.storage_uuid_to_steps_taken_in_stage[ self.rollout_storage_uuid ] = 0 @classmethod def _get_uuids_of_rollout_storages( cls, named_storages: Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]], ) -> List[str]: return [ uuid for uuid, storage in named_storages.items() if isinstance(storage, RolloutStorage) or ( isinstance(storage, Builder) and issubclass(storage.class_type, RolloutStorage) ) ] @classmethod def _initialize_named_storages( cls, named_storages: Optional[ Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]] ], ) -> Dict[str, Union[Builder[ExperienceStorage], ExperienceStorage]]: named_storages = {} if named_storages is None else {**named_storages} rollout_storages_uuids = cls._get_uuids_of_rollout_storages(named_storages) if len(named_storages) == 0: assert ( _DEFAULT_ONPOLICY_UUID not in named_storages ), f"Storage uuid '{_DEFAULT_ONPOLICY_UUID}' is reserved, please pick a different uuid." named_storages[_DEFAULT_ONPOLICY_UUID] = Builder(RolloutBlockStorage) rollout_storages_uuids.append(_DEFAULT_ONPOLICY_UUID) return named_storages def _refresh_current_stage( self, force_stage_search_from_start: bool = False ) -> Optional[PipelineStage]: if force_stage_search_from_start: self._current_stage = None if self._current_stage is None or self._current_stage.is_complete: if self._current_stage is None: start_index = 0 else: start_index = self.pipeline_stages.index(self._current_stage) + 1 self._current_stage = None for ps in self.pipeline_stages[start_index:]: if not ps.is_complete: self._current_stage = ps break return self._current_stage @property def total_steps(self) -> int: return sum(ps.steps_taken_in_stage for ps in self.pipeline_stages) @property def storage_uuid_to_total_experiences(self) -> Dict[str, int]: totals = {k: 0 for k in self._named_storages} for ps in self.pipeline_stages: for k in ps.storage_uuid_to_steps_taken_in_stage: totals[k] += ps.storage_uuid_to_steps_taken_in_stage[k] for k in totals: split = k.split("__") if len(split) == 2 and split[1] in ["valid", "test"]: assert totals[k] == 0, ( "Total experiences should be 0 for validation/test storages, i.e." " storages who have `__valid` or `__test` as their suffix. These storages" " will copy their `total_experiences` from the corresponding training" " storage i.e.:\n" " 1. the storage without the above suffix if it exists, else\n" " 2. the total number of steps." ) totals[k] = totals.get(split[0], self.total_steps) return totals @property def current_stage(self) -> Optional[PipelineStage]: return self._current_stage @property def current_stage_index(self) -> Optional[int]: if self.current_stage is None: return None return self.pipeline_stages.index(self.current_stage) def before_rollout(self, train_metrics: Optional[ScalarMeanTracker] = None) -> bool: if ( train_metrics is not None and self.current_stage.early_stopping_criterion is not None ): self.current_stage.early_stopping_criterion_met = ( self.current_stage.early_stopping_criterion( stage_steps=self.current_stage.steps_taken_in_stage, total_steps=self.total_steps, training_metrics=train_metrics, ) ) if self.current_stage.early_stopping_criterion_met: get_logger().debug( f"Early stopping criterion met after {self.total_steps} total steps " f"({self.current_stage.steps_taken_in_stage} in current stage, stage index {self.current_stage_index})." ) return self.current_stage is not self._refresh_current_stage( force_stage_search_from_start=False ) def restart_pipeline(self): for ps in self.pipeline_stages: ps.reset() if self.valid_pipeline_stage: self.valid_pipeline_stage.reset() if self.test_pipeline_stage: self.test_pipeline_stage.reset() self._current_stage = None self._refresh_current_stage(force_stage_search_from_start=True) def state_dict(self): return dict( stage_info_list=[ { "early_stopping_criterion_met": ps.early_stopping_criterion_met, "steps_taken_in_stage": ps.steps_taken_in_stage, "storage_uuid_to_steps_taken_in_stage": ps.storage_uuid_to_steps_taken_in_stage, "rollout_count": ps.rollout_count, } for ps in self.pipeline_stages ], rollout_count=self.rollout_count, ) def load_state_dict(self, state_dict: Dict[str, Any]): if "off_policy_epochs" in state_dict: get_logger().warning( "Loaded state dict was saved using an older version of AllenAct." " If you are attempting to restart training for a model that had an off-policy component, be aware" " that logging for the off-policy component will not behave as it previously did." " Additionally, while the total step count will remain accurate, step counts" " associated with losses will be reset to step 0." ) for ps, stage_info in zip(self.pipeline_stages, state_dict["stage_info_list"]): ps.early_stopping_criterion_met = stage_info["early_stopping_criterion_met"] ps.steps_taken_in_stage = stage_info["steps_taken_in_stage"] if "storage_uuid_to_steps_taken_in_stage" in stage_info: ps.storage_uuid_to_steps_taken_in_stage = stage_info[ "storage_uuid_to_steps_taken_in_stage" ] ps.rollout_count = stage_info["rollout_count"] self.rollout_count = state_dict["rollout_count"] self._refresh_current_stage(force_stage_search_from_start=True) @property def rollout_storage(self) -> Optional[RolloutStorage]: if self.rollout_storage_uuid is None: return None rs = self._named_storages[self.rollout_storage_uuid] if isinstance(rs, Builder): rs = rs() self._named_storages[self.rollout_storage_uuid] = rs return cast(RolloutStorage, rs) def get_stage_storage( self, stage: PipelineStage ) -> "OrderedDict[str, ExperienceStorage]": storage_uuids_for_current_stage_set = set( sc.storage_uuid for sc in stage.stage_components ) # Always include self.rollout_storage_uuid in the current stage storage (when the uuid is defined) if self.rollout_storage_uuid is not None: storage_uuids_for_current_stage_set.add(self.rollout_storage_uuid) storage_uuids_for_current_stage = sorted( list(storage_uuids_for_current_stage_set) ) for storage_uuid in storage_uuids_for_current_stage: if isinstance(self._named_storages[storage_uuid], Builder): self._named_storages[storage_uuid] = cast( Builder["ExperienceStorage"], self._named_storages[storage_uuid], )() return OrderedDict( (k, self._named_storages[k]) for k in storage_uuids_for_current_stage ) @property def current_stage_storage(self) -> "OrderedDict[str, ExperienceStorage]": return self.get_stage_storage(self.current_stage) def get_loss(self, uuid: str): if isinstance(self._named_losses[uuid], Builder): self._named_losses[uuid] = cast( Builder[Union["AbstractActorCriticLoss", "GenericAbstractLoss"]], self._named_losses[uuid], )() return self._named_losses[uuid] @property def current_stage_losses( self, ) -> Dict[str, Union[AbstractActorCriticLoss, GenericAbstractLoss]]: for loss_name in self.current_stage.loss_names: if isinstance(self._named_losses[loss_name], Builder): self._named_losses[loss_name] = cast( Builder[Union["AbstractActorCriticLoss", "GenericAbstractLoss"]], self._named_losses[loss_name], )() return { loss_name: cast( Union[AbstractActorCriticLoss, GenericAbstractLoss], self._named_losses[loss_name], ) for loss_name in self.current_stage.loss_names } def download_checkpoint_from_wandb( checkpoint_path_dir_or_pattern, all_ckpt_dir, only_allow_one_ckpt=False ): api = wandb.Api() run_token = checkpoint_path_dir_or_pattern.split("//")[1] ckpt_steps = checkpoint_path_dir_or_pattern.split("//")[2:] if ckpt_steps[-1] == "": ckpt_steps = ckpt_steps[:-1] if not only_allow_one_ckpt: ckpts_paths = [] for steps in ckpt_steps: ckpt_fn = "{}-step-{}:latest".format(run_token, steps) artifact = api.artifact(ckpt_fn) _ = artifact.download(all_ckpt_dir) ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, steps) shutil.move("{}/ckpt.pt".format(all_ckpt_dir), ckpt_dir) ckpts_paths.append(ckpt_dir) return ckpts_paths else: assert len(ckpt_steps) == 1 step = ckpt_steps[0] ckpt_fn = "{}-step-{}:latest".format(run_token, step) artifact = api.artifact(ckpt_fn) _ = artifact.download(all_ckpt_dir) ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, step) shutil.move("{}/ckpt.pt".format(all_ckpt_dir), ckpt_dir) return ckpt_dir ================================================ FILE: allenact/utils/inference.py ================================================ from typing import Optional, cast, Tuple, Any, Dict import attr import torch from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel from allenact.algorithms.onpolicy_sync.storage import RolloutStorage from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams from allenact.base_abstractions.misc import ( Memory, ObservationType, ActorCriticOutput, DistributionType, ) from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph from allenact.utils import spaces_utils as su from allenact.utils.tensor_utils import batch_observations @attr.s(kw_only=True) class InferenceAgent: actor_critic: ActorCriticModel = attr.ib() rollout_storage: RolloutStorage = attr.ib() device: torch.device = attr.ib() sensor_preprocessor_graph: Optional[SensorPreprocessorGraph] = attr.ib() steps_before_rollout_refresh: int = attr.ib(default=128) memory: Optional[Memory] = attr.ib(default=None) steps_taken_in_task: int = attr.ib(default=0) last_action_flat: Optional = attr.ib(default=None) has_initialized: Optional = attr.ib(default=False) def __attrs_post_init__(self): self.actor_critic.eval() self.actor_critic.to(device=self.device) if self.memory is not None: self.memory.to(device=self.device) if self.sensor_preprocessor_graph is not None: self.sensor_preprocessor_graph.to(self.device) self.rollout_storage.to(self.device) self.rollout_storage.set_partition(index=0, num_parts=1) @classmethod def from_experiment_config( cls, exp_config: ExperimentConfig, device: torch.device, checkpoint_path: Optional[str] = None, model_state_dict: Optional[Dict[str, Any]] = None, mode: str = "test", ): assert ( checkpoint_path is None or model_state_dict is None ), "Cannot have `checkpoint_path` and `model_state_dict` both non-None." rollout_storage = exp_config.training_pipeline().rollout_storage machine_params = exp_config.machine_params(mode) if not isinstance(machine_params, MachineParams): machine_params = MachineParams(**machine_params) sensor_preprocessor_graph = machine_params.sensor_preprocessor_graph actor_critic = cast( ActorCriticModel, exp_config.create_model( sensor_preprocessor_graph=sensor_preprocessor_graph ), ) if checkpoint_path is not None: actor_critic.load_state_dict( torch.load(checkpoint_path, map_location="cpu")["model_state_dict"] ) elif model_state_dict is not None: actor_critic.load_state_dict( model_state_dict if "model_state_dict" not in model_state_dict else model_state_dict["model_state_dict"] ) return cls( actor_critic=actor_critic, rollout_storage=rollout_storage, device=device, sensor_preprocessor_graph=sensor_preprocessor_graph, ) def reset(self): if self.has_initialized: self.rollout_storage.after_updates() self.steps_taken_in_task = 0 self.memory = None def act(self, observations: ObservationType): # Batch of size 1 obs_batch = batch_observations([observations], device=self.device) if self.sensor_preprocessor_graph is not None: obs_batch = self.sensor_preprocessor_graph.get_observations(obs_batch) if self.steps_taken_in_task == 0: self.has_initialized = True self.rollout_storage.initialize( observations=obs_batch, num_samplers=1, recurrent_memory_specification=self.actor_critic.recurrent_memory_specification, action_space=self.actor_critic.action_space, ) self.rollout_storage.after_updates() else: dummy_val = torch.zeros((1, 1), device=self.device) # Unused dummy value self.rollout_storage.add( observations=obs_batch, memory=self.memory, actions=self.last_action_flat[0], action_log_probs=dummy_val, value_preds=dummy_val, rewards=dummy_val, masks=torch.ones( (1, 1), device=self.device ), # Always == 1 as we're in a single task until `reset` ) agent_input = self.rollout_storage.agent_input_for_next_step() actor_critic_output, self.memory = cast( Tuple[ActorCriticOutput[DistributionType], Optional[Memory]], self.actor_critic(**agent_input), ) action = actor_critic_output.distributions.sample() self.last_action_flat = su.flatten(self.actor_critic.action_space, action) self.steps_taken_in_task += 1 if self.steps_taken_in_task % self.steps_before_rollout_refresh == 0: self.rollout_storage.after_updates() return su.action_list(self.actor_critic.action_space, self.last_action_flat)[0] ================================================ FILE: allenact/utils/misc_utils.py ================================================ import copy import functools import hashlib import inspect import json import math import os import pdb import random import subprocess import sys import urllib import urllib.request from collections import Counter from contextlib import contextmanager from typing import Sequence, List, Optional, Tuple, Hashable import filelock import numpy as np import torch from scipy.special import comb from allenact.utils.system import get_logger TABLEAU10_RGB = ( (31, 119, 180), (255, 127, 14), (44, 160, 44), (214, 39, 40), (148, 103, 189), (140, 86, 75), (227, 119, 194), (127, 127, 127), (188, 189, 34), (23, 190, 207), ) def multiprocessing_safe_download_file_from_url(url: str, save_path: str): with filelock.FileLock(save_path + ".lock"): if not os.path.isfile(save_path): get_logger().info(f"Downloading file from {url} to {save_path}.") urllib.request.urlretrieve( url, save_path, ) else: get_logger().debug(f"{save_path} exists - skipping download.") def experimental_api(to_decorate): """Decorate a function to note that it is part of the experimental API.""" have_warned = [False] name = f"{inspect.getmodule(to_decorate).__name__}.{to_decorate.__qualname__}" if to_decorate.__name__ == "__init__": name = name.replace(".__init__", "") @functools.wraps(to_decorate) def decorated(*args, **kwargs): if not have_warned[0]: get_logger().warning( f"'{name}' is a part of AllenAct's experimental API." f" This means: (1) there are likely bugs present and (2)" f" we may remove/change this functionality without warning." f" USE AT YOUR OWN RISK.", ) have_warned[0] = True return to_decorate(*args, **kwargs) return decorated def deprecated(to_decorate): """Decorate a function to note that it has been deprecated.""" have_warned = [False] name = f"{inspect.getmodule(to_decorate).__name__}.{to_decorate.__qualname__}" if to_decorate.__name__ == "__init__": name = name.replace(".__init__", "") @functools.wraps(to_decorate) def decorated(*args, **kwargs): if not have_warned[0]: get_logger().warning( f"'{name}' has been deprecated and will soon be removed from AllenAct's API." f" Please discontinue your use of this function.", ) have_warned[0] = True return to_decorate(*args, **kwargs) return decorated class NumpyJSONEncoder(json.JSONEncoder): """JSON encoder for numpy objects. Based off the stackoverflow answer by Jie Yang here: https://stackoverflow.com/a/57915246. The license for this code is [BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/). """ def default(self, obj): if isinstance(obj, np.void): return None elif isinstance(obj, np.bool_): return bool(obj) elif isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.ndarray): return obj.tolist() else: return super(NumpyJSONEncoder, self).default(obj) @contextmanager def tensor_print_options(**print_opts): torch_print_opts = copy.deepcopy(torch._tensor_str.PRINT_OPTS) np_print_opts = np.get_printoptions() try: torch.set_printoptions(**print_opts) np.set_printoptions(**print_opts) yield None finally: torch.set_printoptions(**{k: getattr(torch_print_opts, k) for k in print_opts}) np.set_printoptions(**np_print_opts) def md5_hash_str_as_int(to_hash: str): return int( hashlib.md5(to_hash.encode()).hexdigest(), 16, ) def get_git_diff_of_project() -> Tuple[str, str]: short_sha = ( subprocess.check_output(["git", "describe", "--always"]).decode("utf-8").strip() ) diff = subprocess.check_output(["git", "diff", short_sha]).decode("utf-8") return short_sha, diff class HashableDict(dict): """A dictionary which is hashable so long as all of its values are hashable. A HashableDict object will allow setting / deleting of items until the first time that `__hash__()` is called on it after which attempts to set or delete items will throw `RuntimeError` exceptions. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._hash_has_been_called = False def __key(self): return tuple((k, self[k]) for k in sorted(self)) def __hash__(self): self._hash_has_been_called = True return hash(self.__key()) def __eq__(self, other): return self.__key() == other.__key() def __setitem__(self, *args, **kwargs): if not self._hash_has_been_called: return super(HashableDict, self).__setitem__(*args, **kwargs) raise RuntimeError("Cannot set item in HashableDict after having called hash.") def __delitem__(self, *args, **kwargs): if not self._hash_has_been_called: return super(HashableDict, self).__delitem__(*args, **kwargs) raise RuntimeError( "Cannot delete item in HashableDict after having called hash." ) def partition_sequence(seq: Sequence, parts: int) -> List: assert 0 < parts, f"parts [{parts}] must be greater > 0" assert parts <= len(seq), f"parts [{parts}] > len(seq) [{len(seq)}]" n = len(seq) quotient = n // parts remainder = n % parts counts = [quotient + (i < remainder) for i in range(parts)] inds = np.cumsum([0] + counts) return [seq[ind0:ind1] for ind0, ind1 in zip(inds[:-1], inds[1:])] def unzip(seq: Sequence[Tuple], n: Optional[int]): """Undoes a `zip` operation. # Parameters seq: The sequence of tuples that should be unzipped n: The number of items in each tuple. This is an optional value but is necessary if `len(seq) == 0` (as there is no other way to infer how many empty lists were zipped together in this case) and can otherwise be used to error check. # Returns A tuple (of length `n` if `n` is given) of lists where the ith list contains all the ith elements from the tuples in the input `seq`. """ assert n is not None or len(seq) != 0 if n is None: n = len(seq[0]) lists = [[] for _ in range(n)] for t in seq: assert len(t) == n for i in range(n): lists[i].append(t[i]) return lists def uninterleave(seq: Sequence, parts: int) -> List: assert 0 < parts <= len(seq) n = len(seq) quotient = n // parts return [ [seq[i + j * parts] for j in range(quotient + 1) if i + j * parts < len(seq)] for i in range(parts) ] @functools.lru_cache(10000) def cached_comb(n: int, m: int): return comb(n, m) def expected_max_of_subset_statistic(vals: List[float], m: int): n = len(vals) assert m <= n vals_and_counts = list(Counter([round(val, 8) for val in vals]).items()) vals_and_counts.sort() count_so_far = 0 logdenom = math.log(comb(n, m)) expected_max = 0.0 for val, num_occurances_of_val in vals_and_counts: count_so_far += num_occurances_of_val if count_so_far < m: continue count_where_max = 0 for i in range(1, min(num_occurances_of_val, m) + 1): count_where_max += cached_comb(num_occurances_of_val, i) * cached_comb( count_so_far - num_occurances_of_val, m - i ) expected_max += val * math.exp(math.log(count_where_max) - logdenom) return expected_max def bootstrap_max_of_subset_statistic( vals: List[float], m: int, reps=1000, seed: Optional[int] = None ): rstate = None if seed is not None: rstate = random.getstate() random.seed(seed) results = [] for _ in range(reps): results.append( expected_max_of_subset_statistic(random.choices(vals, k=len(vals)), m) ) if seed is not None: random.setstate(rstate) return results def rand_float(low: float, high: float, shape): assert low <= high try: return np.random.rand(*shape) * (high - low) + low except TypeError as _: return np.random.rand(shape) * (high - low) + low def all_unique(seq: Sequence[Hashable]): seen = set() for s in seq: if s in seen: return False seen.add(s) return True def all_equal(s: Sequence): if len(s) <= 1: return True return all(s[0] == ss for ss in s[1:]) def prepare_locals_for_super( local_vars, args_name="args", kwargs_name="kwargs", ignore_kwargs=False ): assert ( args_name not in local_vars ), "`prepare_locals_for_super` does not support {}.".format(args_name) new_locals = {k: v for k, v in local_vars.items() if k != "self" and "__" not in k} if kwargs_name in new_locals: if ignore_kwargs: new_locals.pop(kwargs_name) else: kwargs = new_locals.pop(kwargs_name) kwargs.update(new_locals) new_locals = kwargs return new_locals def partition_limits(num_items: int, num_parts: int): return ( np.round(np.linspace(0, num_items, num_parts + 1, endpoint=True)) .astype(np.int32) .tolist() ) def str2bool(v: str): v = v.lower().strip() if v in ("yes", "true", "t", "y", "1"): return True elif v in ("no", "false", "f", "n", "0"): return False else: raise ValueError(f"{v} cannot be converted to a bool") class ForkedPdb(pdb.Pdb): """A Pdb subclass that may be used from a forked multiprocessing child.""" def interaction(self, *args, **kwargs): _stdin = sys.stdin try: sys.stdin = open("/dev/stdin") pdb.Pdb.interaction(self, *args, **kwargs) finally: sys.stdin = _stdin ================================================ FILE: allenact/utils/model_utils.py ================================================ """Functions used to initialize and manipulate pytorch models.""" import hashlib from typing import Sequence, Tuple, Union, Optional, Dict, Any, Callable import numpy as np import torch import torch.nn as nn from allenact.utils.misc_utils import md5_hash_str_as_int def md5_hash_of_state_dict(state_dict: Dict[str, Any]): hashables = [] for piece in sorted(state_dict.items()): if isinstance(piece[1], (np.ndarray, torch.Tensor, nn.Parameter)): hashables.append(piece[0]) if not isinstance(piece[1], np.ndarray): p1 = piece[1].data.cpu().numpy() else: p1 = piece[1] hashables.append( int( hashlib.md5(p1.tobytes()).hexdigest(), 16, ) ) else: hashables.append(md5_hash_str_as_int(str(piece))) return md5_hash_str_as_int(str(hashables)) class Flatten(nn.Module): """Flatten input tensor so that it is of shape (FLATTENED_BATCH x -1).""" # noinspection PyMethodMayBeStatic def forward(self, x): """Flatten input tensor. # Parameters x : Tensor of size (FLATTENED_BATCH x ...) to flatten to size (FLATTENED_BATCH x -1) # Returns Flattened tensor. """ return x.reshape(x.size(0), -1) def init_linear_layer( module: nn.Linear, weight_init: Callable, bias_init: Callable, gain=1 ): """Initialize a torch.nn.Linear layer. # Parameters module : A torch linear layer. weight_init : Function used to initialize the weight parameters of the linear layer. Should take the weight data tensor and gain as input. bias_init : Function used to initialize the bias parameters of the linear layer. Should take the bias data tensor and gain as input. gain : The gain to apply. # Returns The initialized linear layer. """ weight_init(module.weight.data, gain=gain) bias_init(module.bias.data) return module def grad_norm(parameters, norm_type=2): if isinstance(parameters, torch.Tensor): parameters = [parameters] parameters = list(filter(lambda p: p.grad is not None, parameters)) norm_type = float(norm_type) if norm_type == "inf": total_norm = max(p.grad.data.abs().max() for p in parameters) else: total_norm = 0 for p in parameters: param_norm = p.grad.data.norm(norm_type) total_norm += param_norm.item() ** norm_type total_norm = total_norm ** (1.0 / norm_type) return total_norm def make_cnn( input_channels: int, layer_channels: Sequence[int], kernel_sizes: Sequence[Union[int, Tuple[int, int]]], strides: Sequence[Union[int, Tuple[int, int]]], paddings: Sequence[Union[int, Tuple[int, int]]], dilations: Sequence[Union[int, Tuple[int, int]]], output_height: int, output_width: int, output_channels: int, flatten: bool = True, output_relu: bool = True, ) -> nn.Module: assert ( len(layer_channels) == len(kernel_sizes) == len(strides) == len(paddings) == len(dilations) ), "Mismatched sizes: layers {} kernels {} strides {} paddings {} dilations {}".format( layer_channels, kernel_sizes, strides, paddings, dilations ) net = nn.Sequential() input_channels_list = [input_channels] + list(layer_channels) for it, current_channels in enumerate(layer_channels): net.add_module( "conv_{}".format(it), nn.Conv2d( in_channels=input_channels_list[it], out_channels=current_channels, kernel_size=kernel_sizes[it], stride=strides[it], padding=paddings[it], dilation=dilations[it], ), ) if it < len(layer_channels) - 1: net.add_module("relu_{}".format(it), nn.ReLU(inplace=True)) if flatten: net.add_module("flatten", Flatten()) net.add_module( "fc", nn.Linear( layer_channels[-1] * output_width * output_height, output_channels ), ) if output_relu: net.add_module("out_relu", nn.ReLU(True)) return net def compute_cnn_output( cnn: nn.Module, cnn_input: torch.Tensor, permute_order: Optional[Tuple[int, ...]] = ( 0, # FLAT_BATCH (flattening steps, samplers and agents) 3, # CHANNEL 1, # ROW 2, # COL ), # from [FLAT_BATCH x ROW x COL x CHANNEL] flattened input ): """Computes CNN outputs for given inputs. # Parameters cnn : A torch CNN. cnn_input: A torch Tensor with inputs. permute_order: A permutation Tuple to provide PyTorch dimension order, default (0, 3, 1, 2), where 0 corresponds to the flattened batch dimensions (combining step, sampler and agent) # Returns CNN output with dimensions [STEP, SAMPLER, AGENT, CHANNEL, (HEIGHT, WIDTH)]. """ nsteps: int nsamplers: int nagents: int assert len(cnn_input.shape) in [ 5, 6, ], "CNN input must have shape [STEP, SAMPLER, (AGENT,) dim1, dim2, dim3]" nagents: Optional[int] = None if len(cnn_input.shape) == 6: nsteps, nsamplers, nagents = cnn_input.shape[:3] else: nsteps, nsamplers = cnn_input.shape[:2] # Make FLAT_BATCH = nsteps * nsamplers (* nagents) cnn_input = cnn_input.view((-1,) + cnn_input.shape[2 + int(nagents is not None) :]) if permute_order is not None: cnn_input = cnn_input.permute(*permute_order) cnn_output = cnn(cnn_input) if nagents is not None: cnn_output = cnn_output.reshape( ( nsteps, nsamplers, nagents, ) + cnn_output.shape[1:] ) else: cnn_output = cnn_output.reshape( ( nsteps, nsamplers, ) + cnn_output.shape[1:] ) return cnn_output def simple_conv_and_linear_weights_init(m): if type(m) in [ nn.Conv1d, nn.Conv2d, nn.Conv3d, nn.ConvTranspose1d, nn.ConvTranspose2d, nn.ConvTranspose3d, ]: weight_shape = list(m.weight.data.size()) fan_in = np.prod(weight_shape[1:4]) fan_out = np.prod(weight_shape[2:4]) * weight_shape[0] w_bound = np.sqrt(6.0 / (fan_in + fan_out)) m.weight.data.uniform_(-w_bound, w_bound) if m.bias is not None: m.bias.data.fill_(0) elif type(m) == nn.Linear: simple_linear_weights_init(m) def simple_linear_weights_init(m): if type(m) == nn.Linear: weight_shape = list(m.weight.data.size()) fan_in = weight_shape[1] fan_out = weight_shape[0] w_bound = np.sqrt(6.0 / (fan_in + fan_out)) m.weight.data.uniform_(-w_bound, w_bound) if m.bias is not None: m.bias.data.fill_(0) class FeatureEmbedding(nn.Module): """A wrapper of nn.Embedding but support zero output Used for extracting features for actions/rewards.""" def __init__(self, input_size, output_size): super().__init__() self.input_size = input_size self.output_size = output_size if self.output_size != 0: self.fc = nn.Embedding(input_size, output_size) else: # automatically be moved to a device self.null_embedding: torch.Tensor self.register_buffer( "null_embedding", torch.zeros( 0, ), persistent=False, ) def forward(self, inputs): if self.output_size != 0: return self.fc(inputs) else: return self.null_embedding ================================================ FILE: allenact/utils/multi_agent_viz_utils.py ================================================ from typing import Sequence, Any import numpy as np from matplotlib import pyplot as plt, markers from matplotlib.collections import LineCollection from allenact.utils.viz_utils import TrajectoryViz class MultiTrajectoryViz(TrajectoryViz): def __init__( self, path_to_trajectory_prefix: Sequence[str] = ("task_info", "followed_path"), agent_suffixes: Sequence[str] = ("1", "2"), label: str = "trajectories", trajectory_plt_colormaps: Sequence[str] = ("cool", "spring"), marker_plt_colors: Sequence[Any] = ("blue", "orange"), axes_equal: bool = True, **other_base_kwargs, ): super().__init__(label=label, **other_base_kwargs) self.path_to_trajectory_prefix = list(path_to_trajectory_prefix) self.agent_suffixes = list(agent_suffixes) self.trajectory_plt_colormaps = list(trajectory_plt_colormaps) self.marker_plt_colors = marker_plt_colors self.axes_equal = axes_equal def make_fig(self, episode, episode_id): # From https://nbviewer.jupyter.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb def colorline( x, y, z=None, cmap=plt.get_cmap("cool"), norm=plt.Normalize(0.0, 1.0), linewidth=2, alpha=1.0, zorder=1, ): """Plot a colored line with coordinates x and y. Optionally specify colors in the array z Optionally specify a colormap, a norm function and a line width. """ def make_segments(x, y): """Create list of line segments from x and y coordinates, in the correct format for LineCollection: an array of the form numlines x (points per line) x 2 (x and y) array """ points = np.array([x, y]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) return segments # Default colors equally spaced on [0,1]: if z is None: z = np.linspace(0.0, 1.0, len(x)) # Special case if a single number: if not hasattr( z, "__iter__" ): # to check for numerical input -- this is a hack z = np.array([z]) z = np.asarray(z) segments = make_segments(x, y) lc = LineCollection( segments, array=z, cmap=cmap, norm=norm, linewidth=linewidth, alpha=alpha, zorder=zorder, ) ax = plt.gca() ax.add_collection(lc) return lc fig, ax = plt.subplots(figsize=self.figsize) for agent, cmap, marker_color in zip( self.agent_suffixes, self.trajectory_plt_colormaps, self.marker_plt_colors ): path = self.path_to_trajectory_prefix[:] path[-1] = path[-1] + agent trajectory = self._access(episode, path) x, y = [], [] for xy in trajectory: x.append(float(self._access(xy, self.x))) y.append(float(self._access(xy, self.y))) colorline(x, y, zorder=1, cmap=cmap) start_marker = markers.MarkerStyle(marker=self.start_marker_shape) if self.path_to_rot_degrees is not None: rot_degrees = float( self._access(trajectory[0], self.path_to_rot_degrees) ) if self.adapt_rotation is not None: rot_degrees = self.adapt_rotation(rot_degrees) start_marker._transform = start_marker.get_transform().rotate_deg( rot_degrees ) ax.scatter( [x[0]], [y[0]], marker=start_marker, zorder=2, s=self.start_marker_scale, color=marker_color, ) ax.scatter( [x[-1]], [y[-1]], marker="s", color=marker_color ) # stop (square) if self.axes_equal: ax.set_aspect("equal", "box") ax.set_title(episode_id, fontsize=self.fontsize) ax.tick_params(axis="x", labelsize=self.fontsize) ax.tick_params(axis="y", labelsize=self.fontsize) return fig ================================================ FILE: allenact/utils/spaces_utils.py ================================================ # Original work Copyright (c) 2016 OpenAI (https://openai.com). # Modified work Copyright (c) Allen Institute for AI # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from typing import Union, Tuple, List, cast, Iterable, Callable from collections import OrderedDict import numpy as np import torch from gym import spaces as gym ActionType = Union[torch.Tensor, OrderedDict, Tuple, int] def flatdim(space): """Return the number of dimensions a flattened equivalent of this space would have. Accepts a space and returns an integer. Raises ``NotImplementedError`` if the space is not defined in ``gym.spaces``. """ if isinstance(space, gym.Box): return int(np.prod(space.shape)) elif isinstance(space, gym.Discrete): return 1 # we do not expand to one-hot elif isinstance(space, gym.Tuple): return int(sum([flatdim(s) for s in space.spaces])) elif isinstance(space, gym.Dict): return int(sum([flatdim(s) for s in space.spaces.values()])) elif isinstance(space, gym.MultiBinary): return int(space.n) elif isinstance(space, gym.MultiDiscrete): return int(np.prod(space.shape)) else: raise NotImplementedError def flatten(space, torch_x): """Flatten data points from a space.""" if isinstance(space, gym.Box): if len(space.shape) > 0: return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,)) else: return torch_x.view(torch_x.shape + (-1,)) elif isinstance(space, gym.Discrete): # Assume tensor input does NOT contain a dimension for action if isinstance(torch_x, torch.Tensor): return torch_x.unsqueeze(-1) else: return torch.tensor(torch_x).view(1) elif isinstance(space, gym.Tuple): return torch.cat( [flatten(s, x_part) for x_part, s in zip(torch_x, space.spaces)], dim=-1 ) elif isinstance(space, gym.Dict): return torch.cat( [flatten(s, torch_x[key]) for key, s in space.spaces.items()], dim=-1 ) elif isinstance(space, gym.MultiBinary): return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,)) elif isinstance(space, gym.MultiDiscrete): return torch_x.view(torch_x.shape[: -len(space.shape)] + (-1,)) else: raise NotImplementedError def unflatten(space, torch_x): """Unflatten a concatenated data points tensor from a space.""" if isinstance(space, gym.Box): return torch_x.view(torch_x.shape[:-1] + space.shape).float() elif isinstance(space, gym.Discrete): res = torch_x.view(torch_x.shape[:-1] + space.shape).long() return res if len(res.shape) > 0 else res.item() elif isinstance(space, gym.Tuple): dims = [flatdim(s) for s in space.spaces] list_flattened = torch.split(torch_x, dims, dim=-1) list_unflattened = [ unflatten(s, flattened) for flattened, s in zip(list_flattened, space.spaces) ] return tuple(list_unflattened) elif isinstance(space, gym.Dict): dims = [flatdim(s) for s in space.spaces.values()] list_flattened = torch.split(torch_x, dims, dim=-1) list_unflattened = [ (key, unflatten(s, flattened)) for flattened, (key, s) in zip(list_flattened, space.spaces.items()) ] return OrderedDict(list_unflattened) elif isinstance(space, gym.MultiBinary): return torch_x.view(torch_x.shape[:-1] + space.shape).byte() elif isinstance(space, gym.MultiDiscrete): return torch_x.view(torch_x.shape[:-1] + space.shape).long() else: raise NotImplementedError def torch_point(space, np_x): """Convert numpy space point into torch.""" if isinstance(space, gym.Box): return torch.from_numpy(np_x) elif isinstance(space, gym.Discrete): return np_x elif isinstance(space, gym.Tuple): return tuple([torch_point(s, x_part) for x_part, s in zip(np_x, space.spaces)]) elif isinstance(space, gym.Dict): return OrderedDict( [(key, torch_point(s, np_x[key])) for key, s in space.spaces.items()] ) elif isinstance(space, gym.MultiBinary): return torch.from_numpy(np_x) elif isinstance(space, gym.MultiDiscrete): return torch.from_numpy(np.asarray(np_x)) else: raise NotImplementedError def numpy_point( space: gym.Space, torch_x: Union[int, torch.Tensor, OrderedDict, Tuple] ): """Convert torch space point into numpy.""" if isinstance(space, gym.Box): return cast(torch.Tensor, torch_x).cpu().numpy() elif isinstance(space, gym.Discrete): return torch_x elif isinstance(space, gym.Tuple): return tuple( [ numpy_point(s, x_part) for x_part, s in zip(cast(Iterable, torch_x), space.spaces) ] ) elif isinstance(space, gym.Dict): return OrderedDict( [ (key, numpy_point(s, cast(torch.Tensor, torch_x)[key])) for key, s in space.spaces.items() ] ) elif isinstance(space, gym.MultiBinary): return cast(torch.Tensor, torch_x).cpu().numpy() elif isinstance(space, gym.MultiDiscrete): return cast(torch.Tensor, torch_x).cpu().numpy() else: raise NotImplementedError def flatten_space(space: gym.Space): if isinstance(space, gym.Box): return gym.Box(space.low.flatten(), space.high.flatten()) if isinstance(space, gym.Discrete): return gym.Box(low=0, high=space.n, shape=(1,)) if isinstance(space, gym.Tuple): space = [flatten_space(s) for s in space.spaces] return gym.Box( low=np.concatenate([s.low for s in space]), high=np.concatenate([s.high for s in space]), ) if isinstance(space, gym.Dict): space = [flatten_space(s) for s in space.spaces.values()] return gym.Box( low=np.concatenate([s.low for s in space]), high=np.concatenate([s.high for s in space]), ) if isinstance(space, gym.MultiBinary): return gym.Box(low=0, high=1, shape=(space.n,)) if isinstance(space, gym.MultiDiscrete): return gym.Box( low=np.zeros_like(space.nvec), high=space.nvec, ) raise NotImplementedError def policy_space( action_space: gym.Space, box_space_to_policy: Callable[[gym.Box], gym.Space] = None, ) -> gym.Space: if isinstance(action_space, gym.Box): if box_space_to_policy is None: # policy = mean (default) return action_space else: return box_space_to_policy(action_space) if isinstance(action_space, gym.Discrete): # policy = prob of each option return gym.Box( low=np.float32(0.0), high=np.float32(1.0), shape=(action_space.n,) ) if isinstance(action_space, gym.Tuple): # policy = tuple of sub-policies spaces = [policy_space(s, box_space_to_policy) for s in action_space.spaces] return gym.Tuple(spaces) if isinstance(action_space, gym.Dict): # policy = dict of sub-policies spaces = [ ( name, policy_space(s, box_space_to_policy), ) for name, s in action_space.spaces.items() ] return gym.Dict(spaces) if isinstance(action_space, gym.MultiBinary): # policy = prob of 0, 1 in each entry return gym.Box( low=np.float32(0.0), high=np.float32(1.0), shape=(action_space.n, 2) ) if isinstance(action_space, gym.MultiDiscrete): # policy = Tuple of prob of each option for each discrete return gym.Tuple( [ gym.Box(low=np.float32(0.0), high=np.float32(1.0), shape=(n,)) for n in action_space.nvec ] ) raise NotImplementedError def action_list( action_space: gym.Space, flat_actions: torch.Tensor ) -> List[ActionType]: """Convert flattened actions to list. Assumes `flat_actions` are of shape `[step, sampler, flatdim]`. """ def tolist(action): if isinstance(action, torch.Tensor): return action.tolist() if isinstance(action, Tuple): actions = [tolist(ac) for ac in action] return tuple(actions) if isinstance(action, OrderedDict): actions = [(key, tolist(action[key])) for key in action.keys()] return OrderedDict(actions) # else, it's a scalar return action return [tolist(unflatten(action_space, ac)) for ac in flat_actions[0]] ================================================ FILE: allenact/utils/system.py ================================================ import io import logging import os import socket import sys from contextlib import closing from typing import cast, Optional, Tuple from torch import multiprocessing as mp from allenact._constants import ALLENACT_INSTALL_DIR HUMAN_LOG_LEVELS: Tuple[str, ...] = ("debug", "info", "warning", "error", "none") """ Available log levels: "debug", "info", "warning", "error", "none" """ _LOGGER: Optional[logging.Logger] = None class ColoredFormatter(logging.Formatter): """Format a log string with colors. This implementation taken (with modifications) from https://stackoverflow.com/a/384125. """ BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) RESET_SEQ = "\033[0m" COLOR_SEQ = "\033[1;%dm" BOLD_SEQ = "\033[1m" COLORS = { "WARNING": YELLOW, "INFO": GREEN, "DEBUG": BLUE, "ERROR": RED, "CRITICAL": MAGENTA, } def __init__(self, fmt: str, datefmt: Optional[str] = None, use_color=True): super().__init__(fmt=fmt, datefmt=datefmt) self.use_color = use_color def format(self, record: logging.LogRecord) -> str: levelname = record.levelname if self.use_color and levelname in self.COLORS: levelname_with_color = ( self.COLOR_SEQ % (30 + self.COLORS[levelname]) + levelname + self.RESET_SEQ ) record.levelname = levelname_with_color formated_record = logging.Formatter.format(self, record) record.levelname = ( levelname # Resetting levelname as `record` might be used elsewhere ) return formated_record else: return logging.Formatter.format(self, record) def get_logger() -> logging.Logger: """Get a `logging.Logger` to stderr. It can be called whenever we wish to log some message. Messages can get mixed-up (https://docs.python.org/3.6/library/multiprocessing.html#logging), but it works well in most cases. # Returns logger: the `logging.Logger` object """ if _new_logger(): if mp.current_process().name == "MainProcess": _new_logger(logging.DEBUG) _set_log_formatter() return _LOGGER def _human_log_level_to_int(human_log_level): human_log_level = human_log_level.lower().strip() assert human_log_level in HUMAN_LOG_LEVELS, "unknown human_log_level {}".format( human_log_level ) if human_log_level == "debug": log_level = logging.DEBUG elif human_log_level == "info": log_level = logging.INFO elif human_log_level == "warning": log_level = logging.WARNING elif human_log_level == "error": log_level = logging.ERROR elif human_log_level == "none": log_level = logging.CRITICAL + 1 else: raise NotImplementedError(f"Unknown log level {human_log_level}.") return log_level def init_logging(human_log_level: str = "info") -> None: """Init the `logging.Logger`. It should be called only once in the app (e.g. in `main`). It sets the log_level to one of `HUMAN_LOG_LEVELS`. And sets up a handler for stderr. The logging level is propagated to all subprocesses. """ _new_logger(_human_log_level_to_int(human_log_level)) _set_log_formatter() def update_log_level(logger, human_log_level: str): logger.setLevel(_human_log_level_to_int(human_log_level)) def find_free_port(address: str = "127.0.0.1") -> int: """Finds a free port for distributed training. # Returns port: port number that can be used to listen """ with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind((address, 0)) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) port = s.getsockname()[1] return port def _new_logger(log_level: Optional[int] = None): global _LOGGER if _LOGGER is None: _LOGGER = mp.get_logger() if log_level is not None: get_logger().setLevel(log_level) return True if log_level is not None: get_logger().setLevel(log_level) return False def _set_log_formatter(): assert _LOGGER is not None if _LOGGER.getEffectiveLevel() <= logging.CRITICAL: add_style_to_logs = True # In case someone wants to turn this off manually. if add_style_to_logs: default_format = "$BOLD[%(asctime)s$RESET %(levelname)s$BOLD:]$RESET %(message)s\t[%(filename)s: %(lineno)d]" default_format = default_format.replace( "$BOLD", ColoredFormatter.BOLD_SEQ ).replace("$RESET", ColoredFormatter.RESET_SEQ) else: default_format = ( "%(asctime)s %(levelname)s: %(message)s\t[%(filename)s: %(lineno)d]" ) short_date_format = "%m/%d %H:%M:%S" log_format = "default" if log_format == "default": fmt = default_format datefmt = short_date_format elif log_format == "defaultMilliseconds": fmt = default_format datefmt = None else: fmt = log_format datefmt = short_date_format if add_style_to_logs: formatter = ColoredFormatter( fmt=fmt, datefmt=datefmt, ) else: formatter = logging.Formatter(fmt=fmt, datefmt=datefmt) ch = logging.StreamHandler() ch.setFormatter(formatter) ch.addFilter(cast(logging.Filter, _AllenActMessageFilter(os.getcwd()))) _LOGGER.addHandler(ch) sys.excepthook = _excepthook sys.stdout = cast(io.TextIOWrapper, _StreamToLogger()) return _LOGGER class _StreamToLogger: def __init__(self): self.linebuf = "" def write(self, buf): temp_linebuf = self.linebuf + buf self.linebuf = "" for line in temp_linebuf.splitlines(True): if line[-1] == "\n": cast(logging.Logger, _LOGGER).info(line.rstrip()) else: self.linebuf += line def flush(self): if self.linebuf != "": cast(logging.Logger, _LOGGER).info(self.linebuf.rstrip()) self.linebuf = "" def _excepthook(*args): # noinspection PyTypeChecker get_logger().error(msg="Uncaught exception:", exc_info=args) class _AllenActMessageFilter: def __init__(self, working_directory: str): self.working_directory = working_directory # noinspection PyMethodMayBeStatic def filter(self, record): # TODO: Does this work when pip-installing AllenAct? return int( self.working_directory in record.pathname or ALLENACT_INSTALL_DIR in record.pathname or "main" in record.pathname ) class ImportChecker: def __init__(self, msg=None): self.msg = msg def __enter__(self): pass def __exit__(self, exc_type, value, traceback): if exc_type == ModuleNotFoundError and self.msg is not None: value.msg += self.msg return exc_type is None ================================================ FILE: allenact/utils/tensor_utils.py ================================================ """Functions used to manipulate pytorch tensors and numpy arrays.""" import numbers import os import tempfile from collections import defaultdict from typing import List, Dict, Optional, DefaultDict, Union, Any, cast import PIL import numpy as np import torch from PIL import Image from moviepy import editor as mpy from moviepy.editor import concatenate_videoclips from tensorboardX import SummaryWriter as TBXSummaryWriter, summary as tbxsummary from tensorboardX.proto.summary_pb2 import Summary as TBXSummary # noinspection PyProtectedMember from tensorboardX.utils import _prepare_video as tbx_prepare_video from tensorboardX.x2num import make_np as tbxmake_np from allenact.utils.system import get_logger def to_device_recursively( input: Any, device: Union[str, torch.device, int], inplace: bool = True ): """Recursively places tensors on the appropriate device.""" if input is None: return input elif isinstance(input, torch.Tensor): return input.to(device) # type: ignore elif isinstance(input, tuple): return tuple( to_device_recursively(input=subinput, device=device, inplace=inplace) for subinput in input ) elif isinstance(input, list): if inplace: for i in range(len(input)): input[i] = to_device_recursively( input=input[i], device=device, inplace=inplace ) return input else: return [ to_device_recursively(input=subpart, device=device, inplace=inplace) for subpart in input ] elif isinstance(input, dict): if inplace: for key in input: input[key] = to_device_recursively( input=input[key], device=device, inplace=inplace ) return input else: return { k: to_device_recursively(input=input[k], device=device, inplace=inplace) for k in input } elif isinstance(input, set): if inplace: for element in list(input): input.remove(element) input.add( to_device_recursively(element, device=device, inplace=inplace) ) else: return set( to_device_recursively(k, device=device, inplace=inplace) for k in input ) elif isinstance(input, np.ndarray) or np.isscalar(input) or isinstance(input, str): return input elif hasattr(input, "to"): # noinspection PyCallingNonCallable return input.to(device=device, inplace=inplace) else: raise NotImplementedError( "Sorry, value of type {} is not supported.".format(type(input)) ) def detach_recursively(input: Any, inplace=True): """Recursively detaches tensors in some data structure from their computation graph.""" if input is None: return input elif isinstance(input, torch.Tensor): return input.detach() elif isinstance(input, tuple): return tuple( detach_recursively(input=subinput, inplace=inplace) for subinput in input ) elif isinstance(input, list): if inplace: for i in range(len(input)): input[i] = detach_recursively(input[i], inplace=inplace) return input else: return [ detach_recursively(input=subinput, inplace=inplace) for subinput in input ] elif isinstance(input, dict): if inplace: for key in input: input[key] = detach_recursively(input[key], inplace=inplace) return input else: return {k: detach_recursively(input[k], inplace=inplace) for k in input} elif isinstance(input, set): if inplace: for element in list(input): input.remove(element) input.add(detach_recursively(element, inplace=inplace)) else: return set(detach_recursively(k, inplace=inplace) for k in input) elif isinstance(input, np.ndarray) or np.isscalar(input) or isinstance(input, str): return input elif hasattr(input, "detach_recursively"): # noinspection PyCallingNonCallable return input.detach_recursively(inplace=inplace) else: raise NotImplementedError( "Sorry, hidden state of type {} is not supported.".format(type(input)) ) def batch_observations( observations: List[Dict], device: Optional[torch.device] = None ) -> Dict[str, Union[Dict, torch.Tensor]]: """Transpose a batch of observation dicts to a dict of batched observations. # Arguments observations : List of dicts of observations. device : The torch.device to put the resulting tensors on. Will not move the tensors if None. # Returns Transposed dict of lists of observations. """ def dict_from_observation( observation: Dict[str, Any] ) -> Dict[str, Union[Dict, List]]: batch_dict: DefaultDict = defaultdict(list) for sensor in observation: if isinstance(observation[sensor], Dict): batch_dict[sensor] = dict_from_observation(observation[sensor]) else: batch_dict[sensor].append(to_tensor(observation[sensor])) return batch_dict def fill_dict_from_observations( input_batch: Any, observation: Dict[str, Any] ) -> None: for sensor in observation: if isinstance(observation[sensor], Dict): fill_dict_from_observations(input_batch[sensor], observation[sensor]) else: input_batch[sensor].append(to_tensor(observation[sensor])) def dict_to_batch(input_batch: Any) -> None: for sensor in input_batch: if isinstance(input_batch[sensor], Dict): dict_to_batch(input_batch[sensor]) else: input_batch[sensor] = torch.stack( [batch.to(device=device) for batch in input_batch[sensor]], dim=0 ) if len(observations) == 0: return cast(Dict[str, Union[Dict, torch.Tensor]], observations) batch = dict_from_observation(observations[0]) for obs in observations[1:]: fill_dict_from_observations(batch, obs) dict_to_batch(batch) return cast(Dict[str, Union[Dict, torch.Tensor]], batch) def to_tensor(v) -> torch.Tensor: """Return a torch.Tensor version of the input. # Parameters v : Input values that can be coerced into being a tensor. # Returns A tensor version of the input. """ if torch.is_tensor(v): return v elif isinstance(v, np.ndarray): return torch.from_numpy(v) else: return torch.tensor( v, dtype=torch.int64 if isinstance(v, numbers.Integral) else torch.float ) def tile_images(images: List[np.ndarray]) -> np.ndarray: """Tile multiple images into single image. # Parameters images : list of images where each image has dimension (height x width x channels) # Returns Tiled image (new_height x width x channels). """ assert len(images) > 0, "empty list of images" np_images = np.asarray(images) n_images, height, width, n_channels = np_images.shape new_height = int(np.ceil(np.sqrt(n_images))) new_width = int(np.ceil(float(n_images) / new_height)) # pad with empty images to complete the rectangle np_images = np.array( images + [images[0] * 0 for _ in range(n_images, new_height * new_width)] ) # img_HWhwc out_image = np_images.reshape((new_height, new_width, height, width, n_channels)) # img_HhWwc out_image = out_image.transpose(0, 2, 1, 3, 4) # img_Hh_Ww_c out_image = out_image.reshape((new_height * height, new_width * width, n_channels)) return out_image class SummaryWriter(TBXSummaryWriter): @staticmethod def _video(tag, vid): # noinspection PyProtectedMember tag = tbxsummary._clean_tag(tag) return TBXSummary(value=[TBXSummary.Value(tag=tag, image=vid)]) def add_vid(self, tag, vid, global_step=None, walltime=None): self._get_file_writer().add_summary( self._video(tag, vid), global_step, walltime ) def add_image( self, tag, img_tensor, global_step=None, walltime=None, dataformats="CHW" ): self._get_file_writer().add_summary( image(tag, img_tensor, dataformats=dataformats), global_step, walltime ) def image(tag, tensor, rescale=1, dataformats="CHW"): """Outputs a `Summary` protocol buffer with images. The summary has up to `max_images` summary values containing images. The images are built from `tensor` which must be 3-D with shape `[height, width, channels]` and where `channels` can be: * 1: `tensor` is interpreted as Grayscale. * 3: `tensor` is interpreted as RGB. * 4: `tensor` is interpreted as RGBA. # Parameters tag: A name for the generated node. Will also serve as a series name in TensorBoard. tensor: A 3-D `uint8` or `float32` `Tensor` of shape `[height, width, channels]` where `channels` is 1, 3, or 4. 'tensor' can either have values in [0, 1] (float32) or [0, 255] (uint8). The image() function will scale the image values to [0, 255] by applying a scale factor of either 1 (uint8) or 255 (float32). rescale: The scale. dataformats: Input image shape format. # Returns A scalar `Tensor` of type `string`. The serialized `Summary` protocol buffer. """ # noinspection PyProtectedMember tag = tbxsummary._clean_tag(tag) tensor = tbxmake_np(tensor) tensor = convert_to_HWC(tensor, dataformats) # Do not assume that user passes in values in [0, 255], use data type to detect if tensor.dtype != np.uint8: tensor = (tensor * 255.0).astype(np.uint8) img = tbxsummary.make_image(tensor, rescale=rescale) return TBXSummary(value=[TBXSummary.Value(tag=tag, image=img)]) def convert_to_HWC(tensor, input_format): # tensor: numpy array assert len(set(input_format)) == len( input_format ), "You can not use the same dimension shordhand twice. \ input_format: {}".format( input_format ) assert len(tensor.shape) == len( input_format ), "size of input tensor and input format are different. \ tensor shape: {}, input_format: {}".format( tensor.shape, input_format ) input_format = input_format.upper() if len(input_format) == 4: index = [input_format.find(c) for c in "NCHW"] tensor_NCHW = tensor.transpose(index) tensor_CHW = make_grid(tensor_NCHW) # noinspection PyTypeChecker return tensor_CHW.transpose(1, 2, 0) if len(input_format) == 3: index = [input_format.find(c) for c in "HWC"] tensor_HWC = tensor.transpose(index) if tensor_HWC.shape[2] == 1: tensor_HWC = np.concatenate([tensor_HWC, tensor_HWC, tensor_HWC], 2) return tensor_HWC if len(input_format) == 2: index = [input_format.find(c) for c in "HW"] tensor = tensor.transpose(index) tensor = np.stack([tensor, tensor, tensor], 2) return tensor def make_grid(I, ncols=8): # I: N1HW or N3HW assert isinstance(I, np.ndarray), "plugin error, should pass numpy array here" if I.shape[1] == 1: I = np.concatenate([I, I, I], 1) assert I.ndim == 4 and I.shape[1] == 3 or I.shape[1] == 4 nimg = I.shape[0] H = I.shape[2] W = I.shape[3] ncols = min(nimg, ncols) nrows = int(np.ceil(float(nimg) / ncols)) canvas = np.zeros((I.shape[1], H * nrows, W * ncols), dtype=I.dtype) i = 0 for y in range(nrows): for x in range(ncols): if i >= nimg: break canvas[:, y * H : (y + 1) * H, x * W : (x + 1) * W] = I[i] i = i + 1 return canvas def tensor_to_video(tensor, fps=4): tensor = tbxmake_np(tensor) tensor = tbx_prepare_video(tensor) # If user passes in uint8, then we don't need to rescale by 255 if tensor.dtype != np.uint8: tensor = (tensor * 255.0).astype(np.uint8) return tbxsummary.make_video(tensor, fps) def tensor_to_clip(tensor, fps=4): tensor = tbxmake_np(tensor) tensor = tbx_prepare_video(tensor) # If user passes in uint8, then we don't need to rescale by 255 if tensor.dtype != np.uint8: tensor = (tensor * 255.0).astype(np.uint8) t, h, w, c = tensor.shape clip = mpy.ImageSequenceClip(list(tensor), fps=fps) return clip, (h, w, c) def clips_to_video(clips, h, w, c): # encode sequence of images into gif string clip = concatenate_videoclips(clips) filename = tempfile.NamedTemporaryFile(suffix=".gif", delete=False).name # moviepy >= 1.0.0 use logger=None to suppress output. try: clip.write_gif(filename, verbose=False, logger=None) except TypeError: get_logger().warning( "Upgrade to moviepy >= 1.0.0 to suppress the progress bar." ) clip.write_gif(filename, verbose=False) with open(filename, "rb") as f: tensor_string = f.read() try: os.remove(filename) except OSError: get_logger().warning("The temporary file used by moviepy cannot be deleted.") return TBXSummary.Image( height=h, width=w, colorspace=c, encoded_image_string=tensor_string ) def process_video(render, max_clip_len=500, max_video_len=-1, fps=4): output = [] hwc = None if len(render) > 0: if len(render) > max_video_len > 0: get_logger().warning( "Clipping video to first {} frames out of {} original frames".format( max_video_len, len(render) ) ) render = render[:max_video_len] for clipstart in range(0, len(render), max_clip_len): clip = render[clipstart : clipstart + max_clip_len] try: current = np.stack(clip, axis=0) # T, H, W, C current = current.transpose((0, 3, 1, 2)) # T, C, H, W current = np.expand_dims(current, axis=0) # 1, T, C, H, W current, cur_hwc = tensor_to_clip(current, fps=fps) if hwc is None: hwc = cur_hwc else: assert ( hwc == cur_hwc ), "Inconsistent clip shape: previous {} current {}".format( hwc, cur_hwc ) output.append(current) except MemoryError: get_logger().error( "Skipping video due to memory error with clip of length {}".format( len(clip) ) ) return None else: get_logger().warning("Calling process_video with 0 frames") return None assert len(output) > 0, "No clips to concatenate" assert hwc is not None, "No tensor dims assigned" try: result = clips_to_video(output, *hwc) except MemoryError: get_logger().error("Skipping video due to memory error calling clips_to_video") result = None return result class ScaleBothSides(object): """Rescales the input PIL.Image to the given 'width' and `height`. Attributes width: new width height: new height interpolation: Default: PIL.Image.BILINEAR """ def __init__(self, width: int, height: int, interpolation=Image.BILINEAR): self.width = width self.height = height self.interpolation = interpolation def __call__(self, img: PIL.Image) -> PIL.Image: return img.resize((self.width, self.height), self.interpolation) ================================================ FILE: allenact/utils/viz_utils.py ================================================ import abc import json import os import sys from collections import defaultdict from typing import ( Dict, Any, Union, Optional, List, Tuple, Sequence, Callable, cast, Set, ) import numpy as np from allenact.utils.experiment_utils import Builder from allenact.utils.tensor_utils import SummaryWriter, tile_images, process_video try: # Tensorflow not installed for testing from tensorflow.core.util import event_pb2 from tensorflow.python.lib.io import tf_record _TF_AVAILABLE = True except ImportError as _: event_pb2 = None tf_record = None _TF_AVAILABLE = False import matplotlib try: # When debugging we don't want to use the interactive version of matplotlib # as it causes all sorts of problems. # noinspection PyPackageRequirements import pydevd matplotlib.use("agg") except ImportError as _: pass import matplotlib.pyplot as plt import matplotlib.markers as markers import cv2 from allenact.utils.system import get_logger class AbstractViz: def __init__( self, label: Optional[str] = None, vector_task_sources: Sequence[Tuple[str, Dict[str, Any]]] = (), rollout_sources: Sequence[Union[str, Sequence[str]]] = (), actor_critic_source: bool = False, **kwargs, # accepts `max_episodes_in_group` ): self.label = label self.vector_task_sources = list(vector_task_sources) self.rollout_sources = [ [entry] if isinstance(entry, str) else list(entry) for entry in rollout_sources ] self.actor_critic_source = actor_critic_source self.mode: Optional[str] = None self.path_to_id: Optional[Sequence[str]] = None self.episode_ids: Optional[List[Sequence[str]]] = None if "max_episodes_in_group" in kwargs: self.max_episodes_in_group = kwargs["max_episodes_in_group"] self.assigned_max_eps_in_group = True else: self.max_episodes_in_group = 8 self.assigned_max_eps_in_group = False @staticmethod def _source_to_str(source, is_vector_task): source_type = "vector_task" if is_vector_task else "rollout_or_actor_critic" return "{}__{}".format( source_type, "__{}_sep__".format(source_type).join(["{}".format(s) for s in source]), ) @staticmethod def _access(dictionary, path): path = path[::-1] while len(path) > 0: dictionary = dictionary[path.pop()] return dictionary def _auto_viz_order(self, task_outputs): if task_outputs is None: return None, None all_episodes = { self._access(episode, self.path_to_id): episode for episode in task_outputs } if self.episode_ids is None: all_episode_keys = list(all_episodes.keys()) viz_order = [] for page_start in range( 0, len(all_episode_keys), self.max_episodes_in_group ): viz_order.append( all_episode_keys[ page_start : page_start + self.max_episodes_in_group ] ) get_logger().debug("visualizing with order {}".format(viz_order)) else: viz_order = self.episode_ids return viz_order, all_episodes def _setup( self, mode: str, path_to_id: Sequence[str], episode_ids: Optional[Sequence[Union[Sequence[str], str]]], max_episodes_in_group: int, force: bool = False, ): self.mode = mode self.path_to_id = list(path_to_id) if (self.episode_ids is None or force) and episode_ids is not None: self.episode_ids = ( list(episode_ids) if not isinstance(episode_ids[0], str) else [list(cast(List[str], episode_ids))] ) if not self.assigned_max_eps_in_group or force: self.max_episodes_in_group = max_episodes_in_group @abc.abstractmethod def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): raise NotImplementedError() class TrajectoryViz(AbstractViz): def __init__( self, path_to_trajectory: Sequence[str] = ("task_info", "followed_path"), path_to_target_location: Optional[Sequence[str]] = ( "task_info", "target_position", ), path_to_x: Sequence[str] = ("x",), path_to_y: Sequence[str] = ("z",), path_to_rot_degrees: Optional[Sequence[str]] = ("rotation", "y"), adapt_rotation: Optional[Callable[[float], float]] = None, label: str = "trajectory", figsize: Tuple[float, float] = (2, 2), fontsize: float = 5, start_marker_shape: str = r"$\spadesuit$", start_marker_scale: int = 100, **other_base_kwargs, ): super().__init__(label, **other_base_kwargs) self.path_to_trajectory = list(path_to_trajectory) self.path_to_target_location = ( list(path_to_target_location) if path_to_target_location is not None else None ) self.adapt_rotation = adapt_rotation self.x = list(path_to_x) self.y = list(path_to_y) self.path_to_rot_degrees = ( list(path_to_rot_degrees) if path_to_rot_degrees is not None else None ) self.figsize = figsize self.fontsize = fontsize self.start_marker_shape = start_marker_shape self.start_marker_scale = start_marker_scale def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): viz_order, all_episodes = self._auto_viz_order(task_outputs) if viz_order is None: get_logger().debug("trajectory viz returning without visualizing") return for page, current_ids in enumerate(viz_order): figs = [] for episode_id in current_ids: # assert episode_id in all_episodes if episode_id not in all_episodes: get_logger().warning( "skipping viz for missing episode {}".format(episode_id) ) continue figs.append(self.make_fig(all_episodes[episode_id], episode_id)) if len(figs) == 0: continue log_writer.add_figure( "{}/{}_group{}".format(self.mode, self.label, page), figs, global_step=num_steps, ) plt.close( "all" ) # close all current figures (SummaryWriter already closes all figures we log) def make_fig(self, episode, episode_id): # From https://nbviewer.jupyter.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb def colorline( x, y, z=None, cmap=plt.get_cmap("cool"), norm=plt.Normalize(0.0, 1.0), linewidth=2, alpha=1.0, zorder=1, ): """Plot a colored line with coordinates x and y. Optionally specify colors in the array z Optionally specify a colormap, a norm function and a line width. """ def make_segments(x, y): """Create list of line segments from x and y coordinates, in the correct format for LineCollection: an array of the form numlines x (points per line) x 2 (x and y) array """ points = np.array([x, y]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) return segments # Default colors equally spaced on [0,1]: if z is None: z = np.linspace(0.0, 1.0, len(x)) # Special case if a single number: if not hasattr( z, "__iter__" ): # to check for numerical input -- this is a hack z = np.array([z]) z = np.asarray(z) segments = make_segments(x, y) lc = matplotlib.collections.LineCollection( segments, array=z, cmap=cmap, norm=norm, linewidth=linewidth, alpha=alpha, zorder=zorder, ) ax = plt.gca() ax.add_collection(lc) return lc trajectory = self._access(episode, self.path_to_trajectory) x, y = [], [] for xy in trajectory: x.append(float(self._access(xy, self.x))) y.append(float(self._access(xy, self.y))) fig, ax = plt.subplots(figsize=self.figsize) colorline(x, y, zorder=1) start_marker = markers.MarkerStyle(marker=self.start_marker_shape) if self.path_to_rot_degrees is not None: rot_degrees = float(self._access(trajectory[0], self.path_to_rot_degrees)) if self.adapt_rotation is not None: rot_degrees = self.adapt_rotation(rot_degrees) start_marker._transform = start_marker.get_transform().rotate_deg( rot_degrees ) ax.scatter( [x[0]], [y[0]], marker=start_marker, zorder=2, s=self.start_marker_scale ) ax.scatter([x[-1]], [y[-1]], marker="s") # stop if self.path_to_target_location is not None: target = self._access(episode, self.path_to_target_location) ax.scatter( [float(self._access(target, self.x))], [float(self._access(target, self.y))], marker="*", ) ax.set_title(episode_id, fontsize=self.fontsize) ax.tick_params(axis="x", labelsize=self.fontsize) ax.tick_params(axis="y", labelsize=self.fontsize) return fig class AgentViewViz(AbstractViz): def __init__( self, label: str = "agent_view", max_clip_length: int = 100, # control memory used when converting groups of images into clips max_video_length: int = -1, # no limit, if > 0, limit the maximum video length (discard last frames) vector_task_source: Tuple[str, Dict[str, Any]] = ( "render", {"mode": "raw_rgb_list"}, ), episode_ids: Optional[Sequence[Union[Sequence[str], str]]] = None, fps: int = 4, max_render_size: int = 400, **other_base_kwargs, ): super().__init__( label, vector_task_sources=[vector_task_source], **other_base_kwargs, ) self.max_clip_length = max_clip_length self.max_video_length = max_video_length self.fps = fps self.max_render_size = max_render_size self.episode_ids = ( ( list(episode_ids) if not isinstance(episode_ids[0], str) else [list(cast(List[str], episode_ids))] ) if episode_ids is not None else None ) def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): if render is None: return datum_id = self._source_to_str(self.vector_task_sources[0], is_vector_task=True) viz_order, _ = self._auto_viz_order(task_outputs) if viz_order is None: get_logger().debug("agent view viz returning without visualizing") return for page, current_ids in enumerate(viz_order): images = [] # list of lists of rgb frames for episode_id in current_ids: # assert episode_id in render if episode_id not in render: get_logger().warning( "skipping viz for missing episode {}".format(episode_id) ) continue images.append( [ self._overlay_label(step[datum_id], episode_id) for step in render[episode_id] ] ) if len(images) == 0: continue vid = self.make_vid(images) if vid is not None: log_writer.add_vid( f"{self.mode}/{self.label}_group{page}", vid, global_step=num_steps, ) @staticmethod def _overlay_label( img, text, pos=(0, 0), bg_color=(255, 255, 255), fg_color=(0, 0, 0), scale=0.4, thickness=1, margin=2, font_face=cv2.FONT_HERSHEY_SIMPLEX, ): txt_size = cv2.getTextSize(text, font_face, scale, thickness) end_x = pos[0] + txt_size[0][0] + margin end_y = pos[1] pos = (pos[0], pos[1] + txt_size[0][1] + margin) cv2.rectangle(img, pos, (end_x, end_y), bg_color, cv2.FILLED) cv2.putText( img=img, text=text, org=pos, fontFace=font_face, fontScale=scale, color=fg_color, thickness=thickness, lineType=cv2.LINE_AA, ) return img def make_vid(self, images): max_length = max([len(ep) for ep in images]) if max_length == 0: return None valid_im = None for ep in images: if len(ep) > 0: valid_im = ep[0] break frames = [] for it in range(max_length): current_images = [] for ep in images: if it < len(ep): current_images.append(ep[it]) else: if it == 0: current_images.append(np.zeros_like(valid_im)) else: gray = ep[-1].copy() gray[:, :, 0] = gray[:, :, 2] = gray[:, :, 1] current_images.append(gray) frames.append(tile_images(current_images)) return process_video( frames, self.max_clip_length, self.max_video_length, fps=self.fps ) class AbstractTensorViz(AbstractViz): def __init__( self, rollout_source: Union[str, Sequence[str]], label: Optional[str] = None, figsize: Tuple[float, float] = (3, 3), **other_base_kwargs, ): if label is None: if isinstance(rollout_source, str): label = rollout_source[:] else: label = "/".join(rollout_source) super().__init__(label, rollout_sources=[rollout_source], **other_base_kwargs) self.figsize = figsize self.datum_id = self._source_to_str( self.rollout_sources[0], is_vector_task=False ) def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): if render is None: return viz_order, _ = self._auto_viz_order(task_outputs) if viz_order is None: get_logger().debug("tensor viz returning without visualizing") return for page, current_ids in enumerate(viz_order): figs = [] for episode_id in current_ids: if episode_id not in render or len(render[episode_id]) == 0: get_logger().warning( "skipping viz for missing or 0-length episode {}".format( episode_id ) ) continue episode_src = [ step[self.datum_id] for step in render[episode_id] if self.datum_id in step ] if len(episode_src) > 0: # If the last episode for an inference worker is of length 1, there's no captured rollout sources figs.append(self.make_fig(episode_src, episode_id)) if len(figs) == 0: continue log_writer.add_figure( "{}/{}_group{}".format(self.mode, self.label, page), figs, global_step=num_steps, ) plt.close( "all" ) # close all current figures (SummaryWriter already closes all figures we log) @abc.abstractmethod def make_fig( self, episode_src: Sequence[np.ndarray], episode_id: str ) -> matplotlib.figure.Figure: raise NotImplementedError() class TensorViz1D(AbstractTensorViz): def __init__( self, rollout_source: Union[str, Sequence[str]] = "action_log_probs", label: Optional[str] = None, figsize: Tuple[float, float] = (3, 3), **other_base_kwargs, ): super().__init__(rollout_source, label, figsize, **other_base_kwargs) def make_fig(self, episode_src, episode_id): assert episode_src[0].size == 1 # Concatenate along step axis (0) seq = np.concatenate(episode_src, axis=0).squeeze() # remove all singleton dims fig, ax = plt.subplots(figsize=self.figsize) ax.plot(seq) ax.set_title(episode_id) ax.set_aspect("auto") plt.tight_layout() return fig class TensorViz2D(AbstractTensorViz): def __init__( self, rollout_source: Union[str, Sequence[str]] = ("memory_first_last", "rnn"), label: Optional[str] = None, figsize: Tuple[float, float] = (10, 10), fontsize: float = 5, **other_base_kwargs, ): super().__init__(rollout_source, label, figsize, **other_base_kwargs) self.fontsize = fontsize def make_fig(self, episode_src, episode_id): # Concatenate along step axis (0) seq = np.concatenate( episode_src, axis=0 ).squeeze() # remove num_layers if it's equal to 1, else die assert len(seq.shape) == 2, "No support for higher-dimensions" # get_logger().debug("basic {} h render {}".format(episode_id, seq[:10, 0])) fig, ax = plt.subplots(figsize=self.figsize) ax.matshow(seq) ax.set_xlabel(episode_id, fontsize=self.fontsize) ax.tick_params(axis="x", labelsize=self.fontsize) ax.tick_params(axis="y", labelsize=self.fontsize) ax.tick_params(bottom=False) ax.set_aspect("auto") plt.tight_layout() return fig class ActorViz(AbstractViz): def __init__( self, label: str = "action_probs", action_names_path: Optional[Sequence[str]] = ("task_info", "action_names"), figsize: Tuple[float, float] = (1, 5), fontsize: float = 5, **other_base_kwargs, ): super().__init__(label, actor_critic_source=True, **other_base_kwargs) self.action_names_path: Optional[Sequence[str]] = ( list(action_names_path) if action_names_path is not None else None ) self.figsize = figsize self.fontsize = fontsize self.action_names: Optional[List[str]] = None def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): if render is None: return if ( self.action_names is None and task_outputs is not None and len(task_outputs) > 0 and self.action_names_path is not None ): self.action_names = list( self._access(task_outputs[0], self.action_names_path) ) viz_order, _ = self._auto_viz_order(task_outputs) if viz_order is None: get_logger().debug("actor viz returning without visualizing") return for page, current_ids in enumerate(viz_order): figs = [] for episode_id in current_ids: # assert episode_id in render if episode_id not in render: get_logger().warning( "skipping viz for missing episode {}".format(episode_id) ) continue episode_src = [ step["actor_probs"] for step in render[episode_id] if "actor_probs" in step ] assert len(episode_src) == len(render[episode_id]) figs.append(self.make_fig(episode_src, episode_id)) if len(figs) == 0: continue log_writer.add_figure( "{}/{}_group{}".format(self.mode, self.label, page), figs, global_step=num_steps, ) plt.close( "all" ) # close all current figures (SummaryWriter already closes all figures we log) def make_fig(self, episode_src, episode_id): # Concatenate along step axis (0, reused from kept sampler axis) mat = np.concatenate(episode_src, axis=0) fig, ax = plt.subplots(figsize=self.figsize) ax.matshow(mat) if self.action_names is not None: assert len(self.action_names) == mat.shape[-1] ax.set_xticklabels([""] + self.action_names, rotation="vertical") ax.set_xlabel(episode_id, fontsize=self.fontsize) ax.tick_params(axis="x", labelsize=self.fontsize) ax.tick_params(axis="y", labelsize=self.fontsize) ax.tick_params(bottom=False) # Gridlines based on minor ticks ax.set_yticks(np.arange(-0.5, mat.shape[0], 1), minor=True) ax.set_xticks(np.arange(-0.5, mat.shape[1], 1), minor=True) ax.grid(which="minor", color="w", linestyle="-", linewidth=0.05) ax.tick_params( axis="both", which="minor", left=False, top=False, right=False, bottom=False ) ax.set_aspect("auto") plt.tight_layout() return fig class VizSuite(AbstractViz): def __init__( self, episode_ids: Optional[Sequence[Union[Sequence[str], str]]] = None, path_to_id: Sequence[str] = ("task_info", "id"), mode: str = "valid", force_episodes_and_max_episodes_in_group: bool = False, max_episodes_in_group: int = 8, *viz, **kw_viz, ): super().__init__(max_episodes_in_group=max_episodes_in_group) self._setup( mode=mode, path_to_id=path_to_id, episode_ids=episode_ids, max_episodes_in_group=max_episodes_in_group, ) self.force_episodes_and_max_episodes_in_group = ( force_episodes_and_max_episodes_in_group ) self.all_episode_ids = self._episodes_set() self.viz = [ v() if isinstance(v, Builder) else v for v in viz if isinstance(v, Builder) or isinstance(v, AbstractViz) ] + [ v() if isinstance(v, Builder) else v for k, v in kw_viz.items() if isinstance(v, Builder) or isinstance(v, AbstractViz) ] self.max_render_size: Optional[int] = None ( self.rollout_sources, self.vector_task_sources, self.actor_critic_source, ) = self._setup_sources() self.data: Dict[str, List[Dict]] = ( {} ) # dict of episode id to list of dicts with collected data self.last_it2epid: List[str] = [] def _setup_sources(self): rollout_sources, vector_task_sources = [], [] labels = [] actor_critic_source = False new_episodes = [] for v in self.viz: labels.append(v.label) rollout_sources += v.rollout_sources vector_task_sources += v.vector_task_sources actor_critic_source |= v.actor_critic_source if ( v.episode_ids is not None and not self.force_episodes_and_max_episodes_in_group ): cur_episodes = self._episodes_set(v.episode_ids) for ep in cur_episodes: if ( self.all_episode_ids is not None and ep not in self.all_episode_ids ): new_episodes.append(ep) get_logger().info( "Added new episode {} from {}".format(ep, v.label) ) v._setup( mode=self.mode, path_to_id=self.path_to_id, episode_ids=self.episode_ids, max_episodes_in_group=self.max_episodes_in_group, force=self.force_episodes_and_max_episodes_in_group, ) if isinstance(v, AgentViewViz): self.max_render_size = v.max_render_size get_logger().info("Logging labels {}".format(labels)) if len(new_episodes) > 0: get_logger().info("Added new episodes {}".format(new_episodes)) self.episode_ids.append(new_episodes) # new group with all added episodes self.all_episode_ids = self._episodes_set() rol_flat = {json.dumps(src, sort_keys=True): src for src in rollout_sources} vt_flat = {json.dumps(src, sort_keys=True): src for src in vector_task_sources} rol_keys = list(set(rol_flat.keys())) vt_keys = list(set(vt_flat.keys())) return ( [rol_flat[k] for k in rol_keys], [vt_flat[k] for k in vt_keys], actor_critic_source, ) def _episodes_set(self, episode_list=None) -> Optional[Set[str]]: source = self.episode_ids if episode_list is None else episode_list if source is None: return None all_episode_ids: List[str] = [] for group in source: all_episode_ids += group return set(all_episode_ids) def empty(self): return len(self.data) == 0 def _update(self, collected_data): for epid in collected_data: assert epid in self.data self.data[epid][-1].update(collected_data[epid]) def _append(self, vector_task_data): for epid in vector_task_data: if epid in self.data: self.data[epid].append(vector_task_data[epid]) else: self.data[epid] = [vector_task_data[epid]] def _collect_actor_critic(self, actor_critic): actor_critic_data = { epid: dict() for epid in self.last_it2epid if self.all_episode_ids is None or epid in self.all_episode_ids } if len(actor_critic_data) > 0 and actor_critic is not None: if self.actor_critic_source: # TODO this code only supports Discrete action spaces! probs = ( actor_critic.distributions.probs ) # step (=1) x sampler x agent (=1) x action values = actor_critic.values # step x sampler x agent x 1 for it, epid in enumerate(self.last_it2epid): if epid in actor_critic_data: # Select current episode (sampler axis will be reused as step axis) prob = ( # probs.narrow(dim=0, start=it, length=1) # works for sampler x action probs.narrow( dim=1, start=it, length=1 ) # step x sampler x agent x action -> step x 1 x agent x action .squeeze( 0 ) # step x 1 x agent x action -> 1 x agent x action # .squeeze(-2) # 1 x agent x action -> 1 x action .to("cpu") .detach() .numpy() ) assert "actor_probs" not in actor_critic_data[epid] actor_critic_data[epid]["actor_probs"] = prob val = ( # values.narrow(dim=0, start=it, length=1) # works for sampler x 1 values.narrow( dim=1, start=it, length=1 ) # step x sampler x agent x 1 -> step x 1 x agent x 1 .squeeze(0) # step x 1 x agent x 1 -> 1 x agent x 1 # .squeeze(-2) # 1 x agent x 1 -> 1 x 1 .to("cpu") .detach() .numpy() ) assert "critic_value" not in actor_critic_data[epid] actor_critic_data[epid]["critic_value"] = val self._update(actor_critic_data) def _collect_rollout(self, rollout, alive): alive_set = set(alive) assert len(alive_set) == len(alive) alive_it2epid = [ epid for it, epid in enumerate(self.last_it2epid) if it in alive_set ] rollout_data = { epid: dict() for epid in alive_it2epid if self.all_episode_ids is None or epid in self.all_episode_ids } if len(rollout_data) > 0 and rollout is not None: for source in self.rollout_sources: datum_id = self._source_to_str(source, is_vector_task=False) storage, path = source[0], source[1:] # Access storage res = getattr(rollout, storage) episode_dim = rollout.dim_names.index("sampler") # Access sub-storage if path not empty if len(path) > 0: if storage == "memory_first_last": storage = "memory" flattened_name = rollout.unflattened_to_flattened[storage][ tuple(path) ] # for path_step in path: # res = res[path_step] res = res[flattened_name] res, episode_dim = res if rollout.step > 0: if rollout.step > res.shape[0]: # e.g. rnn with only latest memory saved rollout_step = res.shape[0] - 1 else: rollout_step = rollout.step - 1 else: if rollout.num_steps - 1 < res.shape[0]: rollout_step = rollout.num_steps - 1 else: # e.g. rnn with only latest memory saved rollout_step = res.shape[0] - 1 # Select latest step res = res.narrow( dim=0, start=rollout_step, length=1, # step dimension ) # 1 x ... x sampler x ... # get_logger().debug("basic collect h {}".format(res[..., 0])) for it, epid in enumerate(alive_it2epid): if epid in rollout_data: # Select current episode and remove episode/sampler axis datum = ( res.narrow(dim=episode_dim, start=it, length=1) .squeeze(axis=episode_dim) .to("cpu") .detach() .numpy() ) # 1 x ... (no sampler dim) # get_logger().debug("basic collect ep {} h {}".format(epid, res[..., 0])) assert datum_id not in rollout_data[epid] rollout_data[epid][ datum_id ] = datum.copy() # copy needed when running on CPU! self._update(rollout_data) def _collect_vector_task(self, vector_task): it2epid = [ self._access(info, self.path_to_id[1:]) for info in vector_task.attr("task_info") ] # get_logger().debug("basic epids {}".format(it2epid)) def limit_spatial_res(data: np.ndarray, max_size=400): if data.shape[0] <= max_size and data.shape[1] <= max_size: return data else: f = float(max_size) / max(data.shape[0], data.shape[1]) size = (int(data.shape[1] * f), int(data.shape[0] * f)) return cv2.resize(data, size, 0, 0, interpolation=cv2.INTER_AREA) vector_task_data = { epid: dict() for epid in it2epid if self.all_episode_ids is None or epid in self.all_episode_ids } if len(vector_task_data) > 0: for ( source ) in self.vector_task_sources: # these are observations for next step! datum_id = self._source_to_str(source, is_vector_task=True) method, kwargs = source res = getattr(vector_task, method)(**kwargs) if not isinstance(res, Sequence): assert len(it2epid) == 1 res = [res] if method == "render": res = [limit_spatial_res(r, self.max_render_size) for r in res] assert len(res) == len(it2epid) for datum, epid in zip(res, it2epid): if epid in vector_task_data: assert datum_id not in vector_task_data[epid] vector_task_data[epid][datum_id] = datum self._append(vector_task_data) return it2epid # to be called by engine def collect(self, vector_task=None, alive=None, rollout=None, actor_critic=None): if actor_critic is not None: # in phase with last_it2epid try: self._collect_actor_critic(actor_critic) except (AssertionError, RuntimeError): get_logger().debug( msg=f"Failed collect (actor_critic) for viz due to exception:", exc_info=sys.exc_info(), ) get_logger().error(f"Failed collect (actor_critic) for viz") if alive is not None and rollout is not None: # in phase with last_it2epid that stay alive try: self._collect_rollout(rollout=rollout, alive=alive) except (AssertionError, RuntimeError): get_logger().debug( msg=f"Failed collect (rollout) for viz due to exception:", exc_info=sys.exc_info(), ) get_logger().error(f"Failed collect (rollout) for viz") # Always call this one last! if vector_task is not None: # in phase with identifiers of current episodes from vector_task try: self.last_it2epid = self._collect_vector_task(vector_task) except (AssertionError, RuntimeError): get_logger().debug( msg=f"Failed collect (vector_task) for viz due to exception:", exc_info=sys.exc_info(), ) get_logger().error(f"Failed collect (vector_task) for viz") def read_and_reset(self) -> Dict[str, List[Dict[str, Any]]]: res = self.data self.data = {} # get_logger().debug("Returning episodes {}".format(list(res.keys()))) return res # to be called by logger def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): for v in self.viz: try: v.log(log_writer, task_outputs, render, num_steps) except (AssertionError, RuntimeError): get_logger().debug( msg=f"Dropped {v.label} viz due to exception:", exc_info=sys.exc_info(), ) get_logger().error(f"Dropped {v.label} viz") class TensorboardSummarizer: """Assumption: tensorboard tags/labels include a valid/test/train substr indicating the data modality""" def __init__( self, experiment_to_train_events_paths_map: Dict[str, Sequence[str]], experiment_to_test_events_paths_map: Dict[str, Sequence[str]], eval_min_mega_steps: Optional[Sequence[float]] = None, tensorboard_tags_to_labels_map: Optional[Dict[str, str]] = None, tensorboard_output_summary_folder: str = "tensorboard_plotter_output", ): if not _TF_AVAILABLE: raise ImportError( "Please install tensorflow e.g. with `pip install tensorflow` to enable TensorboardSummarizer" ) self.experiment_to_train_events_paths_map = experiment_to_train_events_paths_map self.experiment_to_test_events_paths_map = experiment_to_test_events_paths_map train_experiments = set(list(experiment_to_train_events_paths_map.keys())) test_experiments = set(list(experiment_to_test_events_paths_map.keys())) assert (train_experiments - test_experiments) in [ set(), train_experiments, ], ( f"`experiment_to_test_events_paths_map` must have identical keys (experiment names) to those" f" in `experiment_to_train_events_paths_map`, or be empty." f" Got {train_experiments} train keys and {test_experiments} test keys." ) self.eval_min_mega_steps = eval_min_mega_steps self.tensorboard_tags_to_labels_map = tensorboard_tags_to_labels_map if self.tensorboard_tags_to_labels_map is not None: for tag, label in self.tensorboard_tags_to_labels_map.items(): assert ("valid" in label) + ("train" in label) + ( "test" in label ) == 1, ( f"One (and only one) of {'train', 'valid', 'test'} must be part of the label for" f" tag {tag} ({label} given)." ) self.tensorboard_output_summary_folder = tensorboard_output_summary_folder self.train_data = self._read_tensorflow_experiment_events( self.experiment_to_train_events_paths_map ) self.test_data = self._read_tensorflow_experiment_events( self.experiment_to_test_events_paths_map ) def _read_tensorflow_experiment_events( self, experiment_to_events_paths_map, skip_map=False ): def my_summary_iterator(path): try: for r in tf_record.tf_record_iterator(path): yield event_pb2.Event.FromString(r) except IOError: get_logger().debug(f"IOError for path {path}") return None collected_data = {} for experiment_name, path_list in experiment_to_events_paths_map.items(): experiment_data = defaultdict(list) for filename_path in path_list: for event in my_summary_iterator(filename_path): if event is None: break for value in event.summary.value: if self.tensorboard_tags_to_labels_map is None or skip_map: label = value.tag elif value.tag in self.tensorboard_tags_to_labels_map: label = self.tensorboard_tags_to_labels_map[value.tag] else: continue experiment_data[label].append( dict( score=value.simple_value, time=event.wall_time, steps=event.step, ) ) collected_data[experiment_name] = experiment_data return collected_data def _eval_vs_train_time_steps(self, eval_data, train_data): min_mega_steps = self.eval_min_mega_steps if min_mega_steps is None: min_mega_steps = [(item["steps"] - 1) / 1e6 for item in eval_data] scores, times, steps = [], [], [] i, t, last_i = 0, 0, -1 while len(times) < len(min_mega_steps): while eval_data[i]["steps"] / min_mega_steps[len(times)] / 1e6 < 1: i += 1 while train_data[t]["steps"] / min_mega_steps[len(times)] / 1e6 < 1: t += 1 # step might be missing in valid! (and would duplicate future value at previous steps!) # solution: move forward last entry's time if no change in i (instead of new entry) if i == last_i: times[-1] = train_data[t]["time"] else: scores.append(eval_data[i]["score"]) times.append(train_data[t]["time"]) steps.append(eval_data[i]["steps"]) last_i = i scores.insert(0, train_data[0]["score"]) times.insert(0, train_data[0]["time"]) steps.insert(0, 0) return scores, times, steps def _train_vs_time_steps(self, train_data): last_eval_step = ( self.eval_min_mega_steps[-1] * 1e6 if self.eval_min_mega_steps is not None else float("inf") ) scores = [train_data[0]["score"]] times = [train_data[0]["time"]] steps = [train_data[0]["steps"]] t = 1 while steps[-1] < last_eval_step and t < len(train_data): scores.append(train_data[t]["score"]) times.append(train_data[t]["time"]) steps.append(train_data[t]["steps"]) t += 1 return scores, times, steps def make_tensorboard_summary(self): all_experiments = list(self.experiment_to_train_events_paths_map.keys()) for experiment_name in all_experiments: summary_writer = SummaryWriter( os.path.join(self.tensorboard_output_summary_folder, experiment_name) ) test_labels = ( sorted(list(self.test_data[experiment_name].keys())) if len(self.test_data) > 0 else [] ) for test_label in test_labels: train_label = test_label.replace("valid", "test").replace( "test", "train" ) if train_label not in self.train_data[experiment_name]: print( f"Missing matching 'train' label {train_label} for eval label {test_label}. Skipping" ) continue train_data = self.train_data[experiment_name][train_label] test_data = self.test_data[experiment_name][test_label] scores, times, steps = self._eval_vs_train_time_steps( test_data, train_data ) for score, t, step in zip(scores, times, steps): summary_writer.add_scalar( test_label, score, global_step=step, walltime=t ) valid_labels = sorted( [ key for key in list(self.train_data[experiment_name].keys()) if "valid" in key ] ) for valid_label in valid_labels: train_label = valid_label.replace("valid", "train") assert ( train_label in self.train_data[experiment_name] ), f"Missing matching 'train' label {train_label} for valid label {valid_label}" train_data = self.train_data[experiment_name][train_label] valid_data = self.train_data[experiment_name][valid_label] scores, times, steps = self._eval_vs_train_time_steps( valid_data, train_data ) for score, t, step in zip(scores, times, steps): summary_writer.add_scalar( valid_label, score, global_step=step, walltime=t ) train_labels = sorted( [ key for key in list(self.train_data[experiment_name].keys()) if "train" in key ] ) for train_label in train_labels: scores, times, steps = self._train_vs_time_steps( self.train_data[experiment_name][train_label] ) for score, t, step in zip(scores, times, steps): summary_writer.add_scalar( train_label, score, global_step=step, walltime=t ) summary_writer.close() ================================================ FILE: allenact_plugins/__init__.py ================================================ try: # noinspection PyProtectedMember,PyUnresolvedReferences from allenact_plugins._version import __version__ except ModuleNotFoundError: __version__ = None ================================================ FILE: allenact_plugins/babyai_plugin/__init__.py ================================================ from allenact.utils.system import ImportChecker with ImportChecker( "\n\nPlease install babyai with:\n\n" "pip install -e git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\n", ): # noinspection PyUnresolvedReferences import babyai ================================================ FILE: allenact_plugins/babyai_plugin/babyai_constants.py ================================================ import os from pathlib import Path BABYAI_EXPERT_TRAJECTORIES_DIR = os.path.abspath( os.path.join(os.path.dirname(Path(__file__)), "data", "demos") ) ================================================ FILE: allenact_plugins/babyai_plugin/babyai_models.py ================================================ from typing import Dict, Optional, List, cast, Tuple, Any import babyai.model import babyai.rl import gym import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from gym.spaces.dict import Dict as SpaceDict from allenact.algorithms.onpolicy_sync.policy import ( ActorCriticModel, ObservationType, Memory, DistributionType, ) from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput class BabyAIACModelWrapped(babyai.model.ACModel): def __init__( self, obs_space: Dict[str, int], action_space: gym.spaces.Discrete, image_dim=128, memory_dim=128, instr_dim=128, use_instr=False, lang_model="gru", use_memory=False, arch="cnn1", aux_info=None, include_auxiliary_head: bool = False, ): self.use_cnn2 = arch == "cnn2" super().__init__( obs_space=obs_space, action_space=action_space, image_dim=image_dim, memory_dim=memory_dim, instr_dim=instr_dim, use_instr=use_instr, lang_model=lang_model, use_memory=use_memory, arch="cnn1" if self.use_cnn2 else arch, aux_info=aux_info, ) self.semantic_embedding = None if self.use_cnn2: self.semantic_embedding = nn.Embedding(33, embedding_dim=8) self.image_conv = nn.Sequential( nn.Conv2d(in_channels=24, out_channels=16, kernel_size=(2, 2)), *self.image_conv[1:] # type:ignore ) self.image_conv[0].apply(babyai.model.initialize_parameters) self.include_auxiliary_head = include_auxiliary_head if self.use_memory and self.lang_model == "gru": self.memory_rnn = nn.LSTM(self.image_dim, self.memory_dim) if self.include_auxiliary_head: self.aux = nn.Sequential( nn.Linear(self.memory_dim, 64), nn.Tanh(), nn.Linear(64, action_space.n), ) self.aux.apply(babyai.model.initialize_parameters) self.train() def forward_once(self, obs, memory, instr_embedding=None): """Copied (with minor modifications) from `babyai.model.ACModel.forward(...)`.""" if self.use_instr and instr_embedding is None: instr_embedding = self._get_instr_embedding(obs.instr) if self.use_instr and self.lang_model == "attgru": # outputs: B x L x D # memory: B x M mask = (obs.instr != 0).float() # The mask tensor has the same length as obs.instr, and # thus can be both shorter and longer than instr_embedding. # It can be longer if instr_embedding is computed # for a subbatch of obs.instr. # It can be shorter if obs.instr is a subbatch of # the batch that instr_embeddings was computed for. # Here, we make sure that mask and instr_embeddings # have equal length along dimension 1. mask = mask[:, : instr_embedding.shape[1]] instr_embedding = instr_embedding[:, : mask.shape[1]] keys = self.memory2key(memory) pre_softmax = (keys[:, None, :] * instr_embedding).sum(2) + 1000 * mask attention = F.softmax(pre_softmax, dim=1) instr_embedding = (instr_embedding * attention[:, :, None]).sum(1) x = torch.transpose(torch.transpose(obs.image, 1, 3), 2, 3) if self.arch.startswith("expert_filmcnn"): x = self.image_conv(x) for controler in self.controllers: x = controler(x, instr_embedding) x = F.relu(self.film_pool(x)) else: x = self.image_conv(x.contiguous()) x = x.reshape(x.shape[0], -1) if self.use_memory: hidden = ( memory[:, : self.semi_memory_size], memory[:, self.semi_memory_size :], ) hidden = self.memory_rnn(x, hidden) embedding = hidden[0] memory = torch.cat(hidden, dim=1) # type: ignore else: embedding = x if self.use_instr and not "filmcnn" in self.arch: embedding = torch.cat((embedding, instr_embedding), dim=1) if hasattr(self, "aux_info") and self.aux_info: extra_predictions = { info: self.extra_heads[info](embedding) for info in self.extra_heads } else: extra_predictions = dict() return { "embedding": embedding, "memory": memory, "extra_predictions": extra_predictions, } def forward_loop( self, observations: ObservationType, recurrent_hidden_states: torch.FloatTensor, prev_actions: torch.Tensor, masks: torch.FloatTensor, ): results = [] images = cast(torch.FloatTensor, observations["minigrid_ego_image"]).float() instrs: Optional[torch.Tensor] = None if "minigrid_mission" in observations: instrs = cast(torch.Tensor, observations["minigrid_mission"]) _, nsamplers, _ = recurrent_hidden_states.shape rollouts_len = images.shape[0] // nsamplers obs = babyai.rl.DictList() images = images.view(rollouts_len, nsamplers, *images.shape[1:]) masks = masks.view(rollouts_len, nsamplers, *masks.shape[1:]) # type:ignore # needs_reset = (masks != 1.0).view(nrollouts, -1).any(-1) if instrs is not None: instrs = instrs.view(rollouts_len, nsamplers, instrs.shape[-1]) needs_instr_reset_mask = masks != 1.0 needs_instr_reset_mask[0] = 1 needs_instr_reset_mask = needs_instr_reset_mask.squeeze(-1) instr_embeddings: Optional[torch.Tensor] = None if self.use_instr: instr_reset_multi_inds = list( (int(a), int(b)) for a, b in zip(*np.where(needs_instr_reset_mask.cpu().numpy())) ) time_ind_to_which_need_instr_reset: List[List] = [ [] for _ in range(rollouts_len) ] reset_multi_ind_to_index = { mi: i for i, mi in enumerate(instr_reset_multi_inds) } for a, b in instr_reset_multi_inds: time_ind_to_which_need_instr_reset[a].append(b) unique_instr_embeddings = self._get_instr_embedding( instrs[needs_instr_reset_mask] ) instr_embeddings_list = [unique_instr_embeddings[:nsamplers]] current_instr_embeddings_list = list(instr_embeddings_list[-1]) for time_ind in range(1, rollouts_len): if len(time_ind_to_which_need_instr_reset[time_ind]) == 0: instr_embeddings_list.append(instr_embeddings_list[-1]) else: for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[ time_ind ]: current_instr_embeddings_list[sampler_needing_reset_ind] = ( unique_instr_embeddings[ reset_multi_ind_to_index[ (time_ind, sampler_needing_reset_ind) ] ] ) instr_embeddings_list.append( torch.stack(current_instr_embeddings_list, dim=0) ) instr_embeddings = torch.stack(instr_embeddings_list, dim=0) assert recurrent_hidden_states.shape[0] == 1 memory = recurrent_hidden_states[0] # instr_embedding: Optional[torch.Tensor] = None for i in range(rollouts_len): obs.image = images[i] if "minigrid_mission" in observations: obs.instr = instrs[i] # reset = needs_reset[i].item() # if self.baby_ai_model.use_instr and (reset or i == 0): # instr_embedding = self.baby_ai_model._get_instr_embedding(obs.instr) results.append( self.forward_once( obs, memory=memory * masks[i], instr_embedding=instr_embeddings[i] ) ) memory = results[-1]["memory"] embedding = torch.cat([r["embedding"] for r in results], dim=0) extra_predictions_list = [r["extra_predictions"] for r in results] extra_predictions = { key: torch.cat([ep[key] for ep in extra_predictions_list], dim=0) for key in extra_predictions_list[0] } return ( ActorCriticOutput( distributions=CategoricalDistr( logits=self.actor(embedding), ), values=self.critic(embedding), extras=( extra_predictions if not self.include_auxiliary_head else { **extra_predictions, "auxiliary_distributions": cast( Any, CategoricalDistr(logits=self.aux(embedding)) ), } ), ), torch.stack([r["memory"] for r in results], dim=0), ) # noinspection PyMethodOverriding def forward( self, observations: ObservationType, recurrent_hidden_states: torch.FloatTensor, prev_actions: torch.Tensor, masks: torch.FloatTensor, ): ( observations, recurrent_hidden_states, prev_actions, masks, num_steps, num_samplers, num_agents, num_layers, ) = self.adapt_inputs( observations, recurrent_hidden_states, prev_actions, masks ) if self.lang_model != "gru": ac_output, hidden_states = self.forward_loop( observations=observations, recurrent_hidden_states=recurrent_hidden_states, prev_actions=prev_actions, masks=masks, # type: ignore ) return self.adapt_result( ac_output, hidden_states[-1:], num_steps, num_samplers, num_agents, num_layers, observations, ) assert recurrent_hidden_states.shape[0] == 1 images = cast(torch.FloatTensor, observations["minigrid_ego_image"]) if self.use_cnn2: images_shape = images.shape # noinspection PyArgumentList images = images + torch.LongTensor([0, 11, 22]).view( # type:ignore 1, 1, 1, 3 ).to(images.device) images = self.semantic_embedding(images).view( # type:ignore *images_shape[:3], 24 ) images = images.permute(0, 3, 1, 2).float() # type:ignore _, nsamplers, _ = recurrent_hidden_states.shape rollouts_len = images.shape[0] // nsamplers masks = cast( torch.FloatTensor, masks.view(rollouts_len, nsamplers, *masks.shape[1:]) ) instrs: Optional[torch.Tensor] = None if "minigrid_mission" in observations and self.use_instr: instrs = cast(torch.FloatTensor, observations["minigrid_mission"]) instrs = instrs.view(rollouts_len, nsamplers, instrs.shape[-1]) needs_instr_reset_mask = masks != 1.0 needs_instr_reset_mask[0] = 1 needs_instr_reset_mask = needs_instr_reset_mask.squeeze(-1) blocking_inds: List[int] = np.where( needs_instr_reset_mask.view(rollouts_len, -1).any(-1).cpu().numpy() )[0].tolist() blocking_inds.append(rollouts_len) instr_embeddings: Optional[torch.Tensor] = None if self.use_instr: instr_reset_multi_inds = list( (int(a), int(b)) for a, b in zip(*np.where(needs_instr_reset_mask.cpu().numpy())) ) time_ind_to_which_need_instr_reset: List[List] = [ [] for _ in range(rollouts_len) ] reset_multi_ind_to_index = { mi: i for i, mi in enumerate(instr_reset_multi_inds) } for a, b in instr_reset_multi_inds: time_ind_to_which_need_instr_reset[a].append(b) unique_instr_embeddings = self._get_instr_embedding( instrs[needs_instr_reset_mask] ) instr_embeddings_list = [unique_instr_embeddings[:nsamplers]] current_instr_embeddings_list = list(instr_embeddings_list[-1]) for time_ind in range(1, rollouts_len): if len(time_ind_to_which_need_instr_reset[time_ind]) == 0: instr_embeddings_list.append(instr_embeddings_list[-1]) else: for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[ time_ind ]: current_instr_embeddings_list[sampler_needing_reset_ind] = ( unique_instr_embeddings[ reset_multi_ind_to_index[ (time_ind, sampler_needing_reset_ind) ] ] ) instr_embeddings_list.append( torch.stack(current_instr_embeddings_list, dim=0) ) instr_embeddings = torch.stack(instr_embeddings_list, dim=0) # The following code can be used to compute the instr_embeddings in another way # and thus verify that the above logic is (more likely to be) correct # needs_instr_reset_mask = (masks != 1.0) # needs_instr_reset_mask[0] *= 0 # needs_instr_reset_inds = needs_instr_reset_mask.view(nrollouts, -1).any(-1).cpu().numpy() # # # Get inds where a new task has started # blocking_inds: List[int] = np.where(needs_instr_reset_inds)[0].tolist() # blocking_inds.append(needs_instr_reset_inds.shape[0]) # if nrollouts != 1: # pdb.set_trace() # if blocking_inds[0] != 0: # blocking_inds.insert(0, 0) # if self.use_instr: # instr_embeddings_list = [] # for ind0, ind1 in zip(blocking_inds[:-1], blocking_inds[1:]): # instr_embeddings_list.append( # self._get_instr_embedding(instrs[ind0]) # .unsqueeze(0) # .repeat(ind1 - ind0, 1, 1) # ) # tmp_instr_embeddings = torch.cat(instr_embeddings_list, dim=0) # assert (instr_embeddings - tmp_instr_embeddings).abs().max().item() < 1e-6 # Embed images # images = images.view(nrollouts, nsamplers, *images.shape[1:]) image_embeddings = self.image_conv(images) if self.arch.startswith("expert_filmcnn"): instr_embeddings_flatter = instr_embeddings.view( -1, *instr_embeddings.shape[2:] ) for controller in self.controllers: image_embeddings = controller( image_embeddings, instr_embeddings_flatter ) image_embeddings = F.relu(self.film_pool(image_embeddings)) image_embeddings = image_embeddings.view(rollouts_len, nsamplers, -1) if self.use_instr and self.lang_model == "attgru": raise NotImplementedError("Currently attgru is not implemented.") memory = None if self.use_memory: assert recurrent_hidden_states.shape[0] == 1 hidden = ( recurrent_hidden_states[:, :, : self.semi_memory_size], recurrent_hidden_states[:, :, self.semi_memory_size :], ) embeddings_list = [] for ind0, ind1 in zip(blocking_inds[:-1], blocking_inds[1:]): hidden = (hidden[0] * masks[ind0], hidden[1] * masks[ind0]) rnn_out, hidden = self.memory_rnn(image_embeddings[ind0:ind1], hidden) embeddings_list.append(rnn_out) # embedding = hidden[0] embedding = torch.cat(embeddings_list, dim=0) memory = torch.cat(hidden, dim=-1) else: embedding = image_embeddings if self.use_instr and not "filmcnn" in self.arch: embedding = torch.cat((embedding, instr_embeddings), dim=-1) if hasattr(self, "aux_info") and self.aux_info: extra_predictions = { info: self.extra_heads[info](embedding) for info in self.extra_heads } else: extra_predictions = dict() embedding = embedding.view(rollouts_len * nsamplers, -1) ac_output = ActorCriticOutput( distributions=CategoricalDistr( logits=self.actor(embedding), ), values=self.critic(embedding), extras=( extra_predictions if not self.include_auxiliary_head else { **extra_predictions, "auxiliary_distributions": CategoricalDistr( logits=self.aux(embedding) ), } ), ) hidden_states = memory return self.adapt_result( ac_output, hidden_states, num_steps, num_samplers, num_agents, num_layers, observations, ) @staticmethod def adapt_inputs( # type: ignore observations: ObservationType, recurrent_hidden_states: torch.FloatTensor, prev_actions: torch.Tensor, masks: torch.FloatTensor, ): # INPUTS # observations are of shape [num_steps, num_samplers, ...] # recurrent_hidden_states are of shape [num_layers, num_samplers, (num_agents,) num_dims] # prev_actions are of shape [num_steps, num_samplers, ...] # masks are of shape [num_steps, num_samplers, 1] # num_agents is assumed to be 1 num_steps, num_samplers = masks.shape[:2] num_layers = recurrent_hidden_states.shape[0] num_agents = 1 # Flatten all observation batch dims def recursively_adapt_observations(obs): for entry in obs: if isinstance(obs[entry], Dict): recursively_adapt_observations(obs[entry]) else: assert isinstance(obs[entry], torch.Tensor) if entry in ["minigrid_ego_image", "minigrid_mission"]: final_dims = obs[entry].shape[2:] obs[entry] = obs[entry].view( num_steps * num_samplers, *final_dims ) # Old-style inputs need to be # observations [num_steps * num_samplers, ...] # recurrent_hidden_states [num_layers, num_samplers (* num_agents), num_dims] # prev_actions [num_steps * num_samplers, -1] # masks [num_steps * num_samplers, 1] recursively_adapt_observations(observations) recurrent_hidden_states = cast( torch.FloatTensor, recurrent_hidden_states.view(num_layers, num_samplers * num_agents, -1), ) if prev_actions is not None: prev_actions = prev_actions.view( # type:ignore num_steps * num_samplers, -1 ) masks = masks.view(num_steps * num_samplers, 1) # type:ignore return ( observations, recurrent_hidden_states, prev_actions, masks, num_steps, num_samplers, num_agents, num_layers, ) @staticmethod def adapt_result(ac_output, hidden_states, num_steps, num_samplers, num_agents, num_layers, observations): # type: ignore distributions = CategoricalDistr( logits=ac_output.distributions.logits.view(num_steps, num_samplers, -1), ) values = ac_output.values.view(num_steps, num_samplers, num_agents) extras = ac_output.extras # ignore shape # TODO confirm the shape of the auxiliary distribution is the same as the actor's if "auxiliary_distributions" in extras: extras["auxiliary_distributions"] = CategoricalDistr( logits=extras["auxiliary_distributions"].logits.view( num_steps, num_samplers, -1 # assume single-agent ), ) hidden_states = hidden_states.view(num_layers, num_samplers * num_agents, -1) # Unflatten all observation batch dims def recursively_adapt_observations(obs): for entry in obs: if isinstance(obs[entry], Dict): recursively_adapt_observations(obs[entry]) else: assert isinstance(obs[entry], torch.Tensor) if entry in ["minigrid_ego_image", "minigrid_mission"]: final_dims = obs[entry].shape[ 1: ] # assumes no agents dim in observations! obs[entry] = obs[entry].view( num_steps, num_samplers * num_agents, *final_dims ) recursively_adapt_observations(observations) return ( ActorCriticOutput( distributions=distributions, values=values, extras=extras ), hidden_states, ) class BabyAIRecurrentACModel(ActorCriticModel[CategoricalDistr]): def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, image_dim=128, memory_dim=128, instr_dim=128, use_instr=False, lang_model="gru", use_memory=False, arch="cnn1", aux_info=None, include_auxiliary_head: bool = False, ): super().__init__(action_space=action_space, observation_space=observation_space) assert "minigrid_ego_image" in observation_space.spaces assert not use_instr or "minigrid_mission" in observation_space.spaces self.memory_dim = memory_dim self.include_auxiliary_head = include_auxiliary_head self.baby_ai_model = BabyAIACModelWrapped( obs_space={ "image": 7 * 7 * 3, "instr": 100, }, action_space=action_space, image_dim=image_dim, memory_dim=memory_dim, instr_dim=instr_dim, use_instr=use_instr, lang_model=lang_model, use_memory=use_memory, arch=arch, aux_info=aux_info, include_auxiliary_head=self.include_auxiliary_head, ) self.memory_key = "rnn" @property def recurrent_hidden_state_size(self) -> int: return 2 * self.memory_dim @property def num_recurrent_layers(self): return 1 def _recurrent_memory_specification(self): return { self.memory_key: ( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) } def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: out, recurrent_hidden_states = self.baby_ai_model.forward( observations=observations, recurrent_hidden_states=cast( torch.FloatTensor, memory.tensor(self.memory_key) ), prev_actions=prev_actions, masks=masks, ) return out, memory.set_tensor(self.memory_key, recurrent_hidden_states) ================================================ FILE: allenact_plugins/babyai_plugin/babyai_tasks.py ================================================ import random import signal from typing import Tuple, Any, List, Dict, Optional, Union, Callable import babyai import babyai.bot import gym import numpy as np from gym.utils import seeding from gym_minigrid.minigrid import MiniGridEnv from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor, SensorSuite from allenact.base_abstractions.task import Task, TaskSampler from allenact.utils.system import get_logger class BabyAITask(Task[MiniGridEnv]): def __init__( self, env: MiniGridEnv, sensors: Union[SensorSuite, List[Sensor]], task_info: Dict[str, Any], expert_view_size: int = 7, expert_can_see_through_walls: bool = False, **kwargs, ): super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=env.max_steps, **kwargs, ) self._was_successful: bool = False self.bot: Optional[babyai.bot.Bot] = None self._bot_died = False self.expert_view_size = expert_view_size self.expert_can_see_through_walls = expert_can_see_through_walls self._last_action: Optional[int] = None env.max_steps = env.max_steps + 1 @property def action_space(self) -> gym.spaces.Discrete: return self.env.action_space def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: return self.env.render(mode=mode) def _step(self, action: int) -> RLStepResult: assert isinstance(action, int) minigrid_obs, reward, done, info = self.env.step(action=action) self._last_action = action self._was_successful = done and reward > 0 return RLStepResult( observation=self.get_observations(minigrid_output_obs=minigrid_obs), reward=reward, done=self.is_done(), info=info, ) def get_observations( self, *args, minigrid_output_obs: Optional[Dict[str, Any]] = None, **kwargs ) -> Any: return self.sensor_suite.get_observations( env=self.env, task=self, minigrid_output_obs=minigrid_output_obs ) def reached_terminal_state(self) -> bool: return self._was_successful @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return tuple( x for x, _ in sorted( [(str(a), a.value) for a in MiniGridEnv.Actions], key=lambda x: x[1] ) ) def close(self) -> None: pass def _expert_timeout_hander(self, signum, frame): raise TimeoutError def query_expert(self, **kwargs) -> Tuple[Any, bool]: see_through_walls = self.env.see_through_walls agent_view_size = self.env.agent_view_size if self._bot_died: return 0, False try: self.env.agent_view_size = self.expert_view_size self.env.expert_can_see_through_walls = self.expert_can_see_through_walls if self.bot is None: self.bot = babyai.bot.Bot(self.env) signal.signal(signal.SIGALRM, self._expert_timeout_hander) signal.alarm(kwargs.get("timeout", 4 if self.num_steps_taken() == 0 else 2)) return self.bot.replan(self._last_action), True except TimeoutError as _: self._bot_died = True return 0, False finally: signal.alarm(0) self.env.see_through_walls = see_through_walls self.env.agent_view_size = agent_view_size def metrics(self) -> Dict[str, Any]: metrics = { **super(BabyAITask, self).metrics(), "success": 1.0 * (self.reached_terminal_state()), } return metrics class BabyAITaskSampler(TaskSampler): def __init__( self, env_builder: Union[str, Callable[..., MiniGridEnv]], sensors: Union[SensorSuite, List[Sensor]], max_tasks: Optional[int] = None, num_unique_seeds: Optional[int] = None, task_seeds_list: Optional[List[int]] = None, deterministic_sampling: bool = False, extra_task_kwargs: Optional[Dict] = None, **kwargs, ): super(BabyAITaskSampler, self).__init__() self.sensors = ( SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors ) self.max_tasks = max_tasks self.num_unique_seeds = num_unique_seeds self.deterministic_sampling = deterministic_sampling self.extra_task_kwargs = ( extra_task_kwargs if extra_task_kwargs is not None else {} ) self._last_env_seed: Optional[int] = None self._last_task: Optional[BabyAITask] = None assert (self.num_unique_seeds is None) or ( 0 < self.num_unique_seeds ), "`num_unique_seeds` must be a positive integer." self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list if self.task_seeds_list is not None: if self.num_unique_seeds is not None: assert self.num_unique_seeds == len( self.task_seeds_list ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified." self.num_unique_seeds = len(self.task_seeds_list) elif self.num_unique_seeds is not None: self.task_seeds_list = list(range(self.num_unique_seeds)) if (not deterministic_sampling) and self.max_tasks: get_logger().warning( "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`," " this might be a mistake when running testing." ) if isinstance(env_builder, str): self.env = gym.make(env_builder) else: self.env = env_builder() self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1)) self.num_tasks_generated = 0 @property def length(self) -> Union[int, float]: return ( float("inf") if self.max_tasks is None else self.max_tasks - self.num_tasks_generated ) @property def total_unique(self) -> Optional[Union[int, float]]: return None if self.num_unique_seeds is None else self.num_unique_seeds @property def last_sampled_task(self) -> Optional[Task]: raise NotImplementedError def next_task(self, force_advance_scene: bool = False) -> Optional[BabyAITask]: if self.length <= 0: return None if self.num_unique_seeds is not None: if self.deterministic_sampling: self._last_env_seed = self.task_seeds_list[ self.num_tasks_generated % len(self.task_seeds_list) ] else: self._last_env_seed = self.np_seeded_random_gen.choice( self.task_seeds_list ) else: self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1) self.env.seed(self._last_env_seed) self.env.saved_seed = self._last_env_seed self.env.reset() self.num_tasks_generated += 1 self._last_task = BabyAITask(env=self.env, sensors=self.sensors, task_info={}) return self._last_task def close(self) -> None: self.env.close() @property def all_observation_spaces_equal(self) -> bool: return True def reset(self) -> None: self.num_tasks_generated = 0 self.env.reset() def set_seed(self, seed: int) -> None: self.np_seeded_random_gen, _ = seeding.np_random(seed) ================================================ FILE: allenact_plugins/babyai_plugin/configs/__init__.py ================================================ ================================================ FILE: allenact_plugins/babyai_plugin/data/__init__.py ================================================ ================================================ FILE: allenact_plugins/babyai_plugin/extra_environment.yml ================================================ dependencies: - networkx - pip - pip: - "--editable=git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai" ================================================ FILE: allenact_plugins/babyai_plugin/extra_requirements.txt ================================================ babyai @ git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd networkx ================================================ FILE: allenact_plugins/babyai_plugin/scripts/__init__.py ================================================ ================================================ FILE: allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py ================================================ import argparse import os import platform from allenact_plugins.babyai_plugin.babyai_constants import ( BABYAI_EXPERT_TRAJECTORIES_DIR, ) LEVEL_TO_TRAIN_VALID_IDS = { "BossLevel": ( "1DkVVpIEVtpyo1LxOXQL_bVyjFCTO3cHD", "1ccEFA_n5RT4SWD0Wa_qO65z2HACJBace", ), "GoToObjMaze": ( "1P1CuMbGDJtZit1f-8hmd-HwweXZMj77T", "1MVlVsIpJUZ0vjrYGXY6Ku4m4vBxtWjRZ", ), "GoTo": ("1ABR1q-TClgjSlbhVdVJjzOBpTmTtlTN1", "13DlEx5woi31MIs_dzyLxfi7dPe1g59l2"), "GoToLocal": ( "1U8YWdd3viN2lxOP5BByNUZRPVDKVvDAN", "1Esy-J0t8eJUg6_RT8F4kkegHYDWwqmSl", ), } def get_args(): """Creates the argument parser and parses input arguments.""" # noinspection PyTypeChecker parser = argparse.ArgumentParser( description="download_babyai_expert_demos", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "dataset", nargs="?", default="all", help="dataset name (one of {}, or all)".format( ", ".join(LEVEL_TO_TRAIN_VALID_IDS.keys()) ), ) return parser.parse_args() if __name__ == "__main__": args = get_args() if platform.system() == "Linux": download_template = """wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id={}" -O {}""" elif platform.system() == "Darwin": download_template = """wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={}' -O- | gsed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id={}" -O {}""" else: raise NotImplementedError("{} is not supported".format(platform.system())) try: os.makedirs(BABYAI_EXPERT_TRAJECTORIES_DIR, exist_ok=True) if args.dataset == "all": id_items = LEVEL_TO_TRAIN_VALID_IDS else: assert ( args.dataset in LEVEL_TO_TRAIN_VALID_IDS ), "Only {} are valid datasets".format( ", ".join(LEVEL_TO_TRAIN_VALID_IDS.keys()) ) id_items = {args.dataset: LEVEL_TO_TRAIN_VALID_IDS[args.dataset]} for level_name, (train_id, valid_id) in id_items.items(): train_path = os.path.join( BABYAI_EXPERT_TRAJECTORIES_DIR, "BabyAI-{}-v0.pkl".format(level_name) ) if os.path.exists(train_path): print("{} already exists, skipping...".format(train_path)) else: os.system(download_template.format(train_id, train_id, train_path)) print("Demos saved to {}.".format(train_path)) valid_path = os.path.join( BABYAI_EXPERT_TRAJECTORIES_DIR, "BabyAI-{}-v0_valid.pkl".format(level_name), ) if os.path.exists(valid_path): print("{} already exists, skipping...".format(valid_path)) else: os.system(download_template.format(valid_id, valid_id, valid_path)) print("Demos saved to {}.".format(valid_path)) except Exception as _: raise Exception( "Failed to download babyai demos. Make sure you have the appropriate command line" " tools installed for your platform. For MacOS you'll need to install `gsed` and `gwget (the gnu version" " of sed) using homebrew or some other method." ) ================================================ FILE: allenact_plugins/babyai_plugin/scripts/get_instr_length_percentiles.py ================================================ import glob import os import babyai import numpy as np from allenact_plugins.babyai_plugin.babyai_constants import ( BABYAI_EXPERT_TRAJECTORIES_DIR, ) # Boss level # [(50, 11.0), (90, 22.0), (99, 32.0), (99.9, 38.0), (99.99, 43.0)] if __name__ == "__main__": # level = "BossLevel" level = "GoToLocal" files = glob.glob( os.path.join(BABYAI_EXPERT_TRAJECTORIES_DIR, "*{}-v0.pkl".format(level)) ) assert len(files) == 1 demos = babyai.utils.load_demos(files[0]) percentiles = [50, 90, 99, 99.9, 99.99, 100] print( list( zip( percentiles, np.percentile([len(d[0].split(" ")) for d in demos], percentiles), ) ) ) ================================================ FILE: allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py ================================================ import glob import os import babyai from allenact_plugins.babyai_plugin.babyai_constants import ( BABYAI_EXPERT_TRAJECTORIES_DIR, ) def make_small_demos(dir: str): for file_path in glob.glob(os.path.join(dir, "*.pkl")): if "valid" not in file_path and "small" not in file_path: new_file_path = file_path.replace(".pkl", "-small.pkl") if os.path.exists(new_file_path): continue print( "Saving small version of {} to {}...".format( os.path.basename(file_path), new_file_path ) ) babyai.utils.save_demos( babyai.utils.load_demos(file_path)[:1000], new_file_path ) print("Done.") if __name__ == "__main__": make_small_demos(BABYAI_EXPERT_TRAJECTORIES_DIR) ================================================ FILE: allenact_plugins/clip_plugin/__init__.py ================================================ from allenact.utils.system import ImportChecker with ImportChecker( "Cannot `import clip`. Please install clip from the openai/CLIP git repository:" "\n`pip install git+https://github.com/openai/CLIP.git@b46f5ac7587d2e1862f8b7b1573179d80dcdd620`" ): # noinspection PyUnresolvedReferences import clip ================================================ FILE: allenact_plugins/clip_plugin/clip_preprocessors.py ================================================ from typing import List, Optional, Any, cast, Dict, Tuple import clip import gym import numpy as np import torch import torch.nn as nn from clip.model import CLIP from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.misc_utils import prepare_locals_for_super class ClipResNetEmbedder(nn.Module): def __init__(self, resnet: CLIP, pool=True, pooling_type="avg"): super().__init__() self.model = resnet self.pool = pool self.pooling_type = pooling_type if not pool: self.model.visual.attnpool = nn.Identity() elif self.pooling_type == "attn": pass elif self.pooling_type == "avg": self.model.visual.attnpool = nn.Sequential( nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(start_dim=-3, end_dim=-1) ) else: raise NotImplementedError("`pooling_type` must be 'avg' or 'attn'.") self.eval() def forward(self, x): with torch.no_grad(): return self.model.visual(x) class ClipResNetPreprocessor(Preprocessor): """Preprocess RGB or depth image using a ResNet model with CLIP model weights.""" CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073) CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711) def __init__( self, rgb_input_uuid: str, clip_model_type: str, pool: bool, device: Optional[torch.device] = None, device_ids: Optional[List[torch.device]] = None, input_img_height_width: Tuple[int, int] = (224, 224), chunk_size: Optional[int] = None, **kwargs: Any, ): assert clip_model_type in clip.available_models() assert pool == False or input_img_height_width == (224, 224) assert all(iis % 32 == 0 for iis in input_img_height_width) output_height_width = tuple(iis // 32 for iis in input_img_height_width) if clip_model_type == "RN50": output_shape = (2048,) + output_height_width elif clip_model_type == "RN50x16": output_shape = (3072,) + output_height_width else: raise NotImplementedError( f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'" ) if pool: output_shape = output_shape[:1] self.clip_model_type = clip_model_type self.pool = pool self.device = torch.device("cpu") if device is None else device self.device_ids = device_ids or cast( List[torch.device], list(range(torch.cuda.device_count())) ) self._resnet: Optional[ClipResNetEmbedder] = None self.chunk_size = chunk_size low = -np.inf high = np.inf shape = output_shape input_uuids = [rgb_input_uuid] assert ( len(input_uuids) == 1 ), "resnet preprocessor can only consume one observation type" observation_space = gym.spaces.Box(low=low, high=high, shape=shape) super().__init__(**prepare_locals_for_super(locals())) @property def resnet(self) -> ClipResNetEmbedder: if self._resnet is None: self._resnet = ClipResNetEmbedder( clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool ).to(self.device) for module in self._resnet.modules(): if "BatchNorm" in type(module).__name__: module.momentum = 0.0 self._resnet.eval() return self._resnet def to(self, device: torch.device) -> "ClipResNetPreprocessor": self._resnet = self.resnet.to(device) self.device = device return self def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw # If the input is depth, repeat it across all 3 channels if x.shape[1] == 1: x = x.repeat(1, 3, 1, 1) n = x.shape[0] if self.chunk_size is not None and x.shape[0] > self.chunk_size: processed_chunks = [] for idx in range(0, n, self.chunk_size): processed_chunks.append( self.resnet(x[idx : min(idx + self.chunk_size, n)]).float() ) x = torch.cat(processed_chunks, dim=0) else: x = self.resnet(x).float() return x class ClipViTEmbedder(nn.Module): def __init__(self, model: CLIP, class_emb_only: bool = False): super().__init__() self.model = model self.model.visual.transformer.resblocks = nn.Sequential( *list(self.model.visual.transformer.resblocks)[:-1] ) self.class_emb_only = class_emb_only self.eval() def forward(self, x): m = self.model.visual with torch.no_grad(): x = m.conv1(x) # shape = [*, width, grid, grid] x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2] x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] x = torch.cat( [ m.class_embedding.to(x.dtype) + torch.zeros( x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device ), x, ], dim=1, ) # shape = [*, grid ** 2 + 1, width] x = x + m.positional_embedding.to(x.dtype) x = m.ln_pre(x) x = x.permute(1, 0, 2) # NLD -> LND x = m.transformer(x) x = x.permute(1, 0, 2) # LND -> NLD if self.class_emb_only: return x[:, 0, :] else: return x class ClipViTPreprocessor(Preprocessor): """Preprocess RGB or depth image using a ResNet model with CLIP model weights.""" CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073) CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711) def __init__( self, rgb_input_uuid: str, clip_model_type: str, class_emb_only: bool, device: Optional[torch.device] = None, device_ids: Optional[List[torch.device]] = None, **kwargs: Any, ): assert clip_model_type in clip.available_models() if clip_model_type == "ViT-B/32": output_shape = (7 * 7 + 1, 768) elif clip_model_type == "ViT-B/16": output_shape = (14 * 14 + 1, 768) elif clip_model_type == "ViT-L/14": output_shape = (16 * 16 + 1, 1024) else: raise NotImplementedError( f"Currently `clip_model_type` must be one of 'ViT-B/32', 'ViT-B/16', or 'ViT-B/14'" ) if class_emb_only: output_shape = output_shape[1:] self.clip_model_type = clip_model_type self.class_emb_only = class_emb_only self.device = torch.device("cpu") if device is None else device self.device_ids = device_ids or cast( List[torch.device], list(range(torch.cuda.device_count())) ) self._vit: Optional[ClipViTEmbedder] = None low = -np.inf high = np.inf shape = output_shape input_uuids = [rgb_input_uuid] assert ( len(input_uuids) == 1 ), "resnet preprocessor can only consume one observation type" observation_space = gym.spaces.Box(low=low, high=high, shape=shape) super().__init__(**prepare_locals_for_super(locals())) @property def vit(self) -> ClipViTEmbedder: if self._vit is None: self._vit = ClipViTEmbedder( model=clip.load(self.clip_model_type, device=self.device)[0], class_emb_only=self.class_emb_only, ).to(self.device) for module in self._vit.modules(): if "BatchNorm" in type(module).__name__: module.momentum = 0.0 self._vit.eval() return self._vit def to(self, device: torch.device) -> "ClipViTPreprocessor": self._vit = self.vit.to(device) self.device = device return self def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw # If the input is depth, repeat it across all 3 channels if x.shape[1] == 1: x = x.repeat(1, 3, 1, 1) x = self.vit(x).float() return x ================================================ FILE: allenact_plugins/clip_plugin/extra_environment.yml ================================================ channels: - pytorch - defaults - conda-forge dependencies: - pytorch>=1.7.1 - torchvision - pip: - ftfy - regex - tqdm - "--editable=git+https://github.com/openai/CLIP.git@e184f608c5d5e58165682f7c332c3a8b4c1545f2#egg=clip" ================================================ FILE: allenact_plugins/clip_plugin/extra_requirements.txt ================================================ torch>=1.7.1 torchvision ftfy regex tqdm clip @ git+https://github.com/openai/clip@e184f608c5d5e58165682f7c332c3a8b4c1545f2#egg=clip ================================================ FILE: allenact_plugins/gym_plugin/__init__.py ================================================ ================================================ FILE: allenact_plugins/gym_plugin/extra_environment.yml ================================================ channels: - defaults - conda-forge dependencies: - gym-box2d>=0.17.0,<0.20.0 ================================================ FILE: allenact_plugins/gym_plugin/extra_requirements.txt ================================================ gym[box2d]>=0.17.0,<0.20.0 ================================================ FILE: allenact_plugins/gym_plugin/gym_distributions.py ================================================ import torch from allenact.base_abstractions.distributions import Distr class GaussianDistr(torch.distributions.Normal, Distr): """PyTorch's Normal distribution with a `mode` method.""" def mode(self) -> torch.FloatTensor: return super().mean ================================================ FILE: allenact_plugins/gym_plugin/gym_environment.py ================================================ from typing import Optional import gym import numpy as np class GymEnvironment(gym.Wrapper): """gym.Wrapper with minimal bookkeeping (initial observation).""" def __init__(self, gym_env_name: str): super().__init__(gym.make(gym_env_name)) self._initial_observation: Optional[np.ndarray] = None self.reset() # generate initial observation def reset(self) -> np.ndarray: self._initial_observation = self.env.reset() return self._initial_observation @property def initial_observation(self) -> np.ndarray: assert ( self._initial_observation is not None ), "Attempted to read initial_observation without calling reset()" res = self._initial_observation self._initial_observation = None return res ================================================ FILE: allenact_plugins/gym_plugin/gym_models.py ================================================ from typing import Dict, Union, Optional, Tuple, Any, Sequence, cast import gym import torch import torch.nn as nn from allenact.algorithms.onpolicy_sync.policy import ( ActorCriticModel, DistributionType, ) from allenact.base_abstractions.misc import ActorCriticOutput, Memory from allenact_plugins.gym_plugin.gym_distributions import GaussianDistr class MemorylessActorCritic(ActorCriticModel[GaussianDistr]): """ActorCriticModel for gym tasks with continuous control in the range [-1, 1].""" def __init__( self, input_uuid: str, action_space: gym.spaces.Box, observation_space: gym.spaces.Dict, action_std: float = 0.5, mlp_hidden_dims: Sequence[int] = (64, 32), ): super().__init__(action_space, observation_space) self.input_uuid = input_uuid assert len(observation_space[self.input_uuid].shape) == 1 state_dim = observation_space[self.input_uuid].shape[0] assert len(action_space.shape) == 1 action_dim = action_space.shape[0] mlp_hidden_dims = (state_dim,) + tuple(mlp_hidden_dims) # action mean range -1 to 1 self.actor = nn.Sequential( *self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims), nn.Linear(32, action_dim), nn.Tanh(), ) # critic self.critic = nn.Sequential( *self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims), nn.Linear(32, 1), ) # maximum standard deviation self.register_buffer( "action_std", torch.tensor([action_std] * action_dim).view(1, 1, -1), persistent=False, ) @staticmethod def make_mlp_hidden(nl, *dims): res = [] for it, dim in enumerate(dims[:-1]): res.append( nn.Linear(dim, dims[it + 1]), ) res.append(nl()) return res def _recurrent_memory_specification(self): return None def forward( # type:ignore self, observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]], memory: Memory, prev_actions: Any, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: means = self.actor(observations[self.input_uuid]) values = self.critic(observations[self.input_uuid]) return ( ActorCriticOutput( cast(DistributionType, GaussianDistr(loc=means, scale=self.action_std)), values, {}, ), None, # no Memory ) ================================================ FILE: allenact_plugins/gym_plugin/gym_sensors.py ================================================ from typing import Optional, Any import gym import numpy as np from allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super from allenact.base_abstractions.task import Task, SubTaskType from allenact_plugins.gym_plugin.gym_environment import GymEnvironment class GymBox2DSensor(Sensor[gym.Env, Task[gym.Env]]): """Wrapper for gym Box2D tasks' observations.""" def __init__( self, gym_env_name: str = "LunarLanderContinuous-v2", uuid: str = "gym_box2d_sensor", **kwargs: Any ): self.gym_env_name = gym_env_name observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self) -> gym.Space: if self.gym_env_name in ["LunarLanderContinuous-v2", "LunarLander-v2"]: return gym.spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32) elif self.gym_env_name in ["BipedalWalker-v2", "BipedalWalkerHardcore-v2"]: high = np.array([np.inf] * 24) return gym.spaces.Box(-high, high, dtype=np.float32) elif self.gym_env_name == "CarRacing-v0": state_w, state_h = 96, 96 return gym.spaces.Box( low=0, high=255, shape=(state_h, state_w, 3), dtype=np.uint8 ) raise NotImplementedError() def get_observation( self, env: GymEnvironment, task: Optional[SubTaskType], *args, gym_obs: Optional[np.ndarray] = None, **kwargs: Any ) -> np.ndarray: if gym_obs is not None: return gym_obs else: return env.initial_observation class GymMuJoCoSensor(Sensor[gym.Env, Task[gym.Env]]): """Wrapper for gym MuJoCo and Robotics tasks observations.""" def __init__(self, gym_env_name: str, uuid: str, **kwargs: Any): self.gym_env_name = gym_env_name observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self) -> gym.Space: # observation space for gym MoJoCo if self.gym_env_name == "InvertedPendulum-v2": return gym.spaces.Box(-np.inf, np.inf, shape=(4,), dtype="float32") elif self.gym_env_name == "Ant-v2": return gym.spaces.Box(-np.inf, np.inf, shape=(111,), dtype="float32") elif self.gym_env_name in ["Reacher-v2", "Hopper-v2"]: return gym.spaces.Box(-np.inf, np.inf, shape=(11,), dtype="float32") elif self.gym_env_name == "InvertedDoublePendulum-v2": return gym.spaces.Box(-np.inf, np.inf, (11,), "float32") elif self.gym_env_name in ["HumanoidStandup-v2", "Humanoid-v2"]: return gym.spaces.Box(-np.inf, np.inf, (376,), "float32") elif self.gym_env_name in ["HalfCheetah-v2", "Walker2d-v2"]: return gym.spaces.Box(-np.inf, np.inf, (17,), "float32") elif self.gym_env_name == "Swimmer-v2": return gym.spaces.Box(-np.inf, np.inf, (8,), "float32") # TODO observation space for gym Robotics elif self.gym_env_name == "HandManipulateBlock-v0": return gym.spaces.Dict( dict( desired_goal=gym.spaces.Box( -np.inf, np.inf, shape=(7,), dtype="float32" ), achieved_goal=gym.spaces.Box( -np.inf, np.inf, shape=(7,), dtype="float32" ), observation=gym.spaces.Box( -np.inf, np.inf, shape=(61,), dtype="float32" ), ) ) else: raise NotImplementedError def get_observation( self, env: GymEnvironment, task: Optional[SubTaskType], *args, gym_obs: Optional[np.ndarray] = None, **kwargs: Any ) -> np.ndarray: if gym_obs is not None: return np.array(gym_obs, dtype=np.float32) # coerce to be float32 else: return np.array(env.initial_observation, dtype=np.float32) ================================================ FILE: allenact_plugins/gym_plugin/gym_tasks.py ================================================ import random from typing import Any, List, Dict, Optional, Union, Callable, Sequence, Tuple import gym import numpy as np from gym.utils import seeding from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor, SensorSuite from allenact.base_abstractions.task import Task, TaskSampler from allenact.utils.experiment_utils import set_seed from allenact.utils.system import get_logger from allenact_plugins.gym_plugin.gym_environment import GymEnvironment from allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor, GymMuJoCoSensor class GymTask(Task[gym.Env]): """Abstract gym task. Subclasses need to implement `class_action_names` and `_step`. """ def __init__( self, env: GymEnvironment, sensors: Union[SensorSuite, List[Sensor]], task_info: Dict[str, Any], **kwargs, ): max_steps = env.spec.max_episode_steps super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self._gym_done = False self.task_name: str = self.env.spec.id @property def action_space(self) -> gym.spaces.Space: return self.env.action_space def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: if mode == "rgb": mode = "rgb_array" return self.env.render(mode=mode) def get_observations( self, *args, gym_obs: Optional[Dict[str, Any]] = None, **kwargs ) -> Any: return self.sensor_suite.get_observations( env=self.env, task=self, gym_obs=gym_obs ) def reached_terminal_state(self) -> bool: return self._gym_done def close(self) -> None: pass def metrics(self) -> Dict[str, Any]: # noinspection PyUnresolvedReferences,PyCallingNonCallable env_metrics = self.env.metrics() if hasattr(self.env, "metrics") else {} return { **super().metrics(), **{k: float(v) for k, v in env_metrics.items()}, "success": int( self.env.was_successful if hasattr(self.env, "was_successful") else self.cumulative_reward > 0 ), } class GymContinuousTask(GymTask): """Task for a continuous-control gym Box2D & MuJoCo Env; it allows interfacing allenact with gym tasks.""" @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return tuple() def _step(self, action: Sequence[float]) -> RLStepResult: action = np.array(action) gym_obs, reward, self._gym_done, info = self.env.step(action=action) return RLStepResult( observation=self.get_observations(gym_obs=gym_obs), reward=reward, done=self.is_done(), info=info, ) def default_task_selector(env_name: str) -> type: """Helper function for `GymTaskSampler`.""" if env_name in [ # Box2d Env "CarRacing-v0", "LunarLanderContinuous-v2", "BipedalWalker-v2", "BipedalWalkerHardcore-v2", # MuJoCo Env "InvertedPendulum-v2", "Ant-v2", "InvertedDoublePendulum-v2", "Humanoid-v2", "Reacher-v2", "Hopper-v2", "HalfCheetah-v2", "Swimmer-v2", "Walker2d-v2", ]: return GymContinuousTask raise NotImplementedError() def sensor_selector(env_name: str) -> Sensor: """Helper function for `GymTaskSampler`.""" if env_name in [ "CarRacing-v0", "LunarLanderContinuous-v2", "BipedalWalker-v2", "BipedalWalkerHardcore-v2", "LunarLander-v2", ]: return GymBox2DSensor(env_name) elif env_name in [ "InvertedPendulum-v2", "Ant-v2", "InvertedDoublePendulum-v2", "Humanoid-v2", "Reacher-v2", "Hopper-v2", "HalfCheetah-v2", "Swimmer-v2", "Walker2d-v2", ]: return GymMuJoCoSensor(gym_env_name=env_name, uuid="gym_mujoco_data") raise NotImplementedError() class GymTaskSampler(TaskSampler): """TaskSampler for gym environments.""" def __init__( self, gym_env_type: str = "LunarLanderContinuous-v2", sensors: Optional[Union[SensorSuite, List[Sensor]]] = None, max_tasks: Optional[int] = None, num_unique_seeds: Optional[int] = None, task_seeds_list: Optional[List[int]] = None, deterministic_sampling: bool = False, task_selector: Callable[[str], type] = default_task_selector, repeat_failed_task_for_min_steps: int = 0, extra_task_kwargs: Optional[Dict] = None, seed: Optional[int] = None, **kwargs, ): super().__init__() self.gym_env_type = gym_env_type self.sensors: SensorSuite if sensors is None: self.sensors = SensorSuite([sensor_selector(self.gym_env_type)]) else: self.sensors = ( SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors ) self.max_tasks = max_tasks self.num_unique_seeds = num_unique_seeds self.deterministic_sampling = deterministic_sampling self.repeat_failed_task_for_min_steps = repeat_failed_task_for_min_steps self.extra_task_kwargs = ( extra_task_kwargs if extra_task_kwargs is not None else {} ) self._last_env_seed: Optional[int] = None self._last_task: Optional[GymTask] = None self._number_of_steps_taken_with_task_seed = 0 assert (not deterministic_sampling) or repeat_failed_task_for_min_steps <= 0, ( "If `deterministic_sampling` is True then we require" " `repeat_failed_task_for_min_steps <= 0`" ) assert (self.num_unique_seeds is None) or ( 0 < self.num_unique_seeds ), "`num_unique_seeds` must be a positive integer." self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list if self.task_seeds_list is not None: if self.num_unique_seeds is not None: assert self.num_unique_seeds == len( self.task_seeds_list ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified." self.num_unique_seeds = len(self.task_seeds_list) elif self.num_unique_seeds is not None: self.task_seeds_list = list(range(self.num_unique_seeds)) if num_unique_seeds is not None and repeat_failed_task_for_min_steps > 0: raise NotImplementedError( "`repeat_failed_task_for_min_steps` must be <=0 if number" " of unique seeds is not None." ) assert (not deterministic_sampling) or ( self.num_unique_seeds is not None ), "Cannot use deterministic sampling when `num_unique_seeds` is `None`." if (not deterministic_sampling) and self.max_tasks: get_logger().warning( "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`," " this might be a mistake when running testing." ) if seed is not None: self.set_seed(seed) else: self.np_seeded_random_gen, _ = seeding.np_random( random.randint(0, 2**31 - 1) ) self.num_tasks_generated = 0 self.task_type = task_selector(self.gym_env_type) self.env: GymEnvironment = GymEnvironment(self.gym_env_type) @property def length(self) -> Union[int, float]: return ( float("inf") if self.max_tasks is None else self.max_tasks - self.num_tasks_generated ) @property def total_unique(self) -> Optional[Union[int, float]]: return None if self.num_unique_seeds is None else self.num_unique_seeds @property def last_sampled_task(self) -> Optional[Task]: raise NotImplementedError def next_task(self, force_advance_scene: bool = False) -> Optional[GymTask]: if self.length <= 0: return None repeating = False if self.num_unique_seeds is not None: if self.deterministic_sampling: self._last_env_seed = self.task_seeds_list[ self.num_tasks_generated % len(self.task_seeds_list) ] else: self._last_env_seed = self.np_seeded_random_gen.choice( self.task_seeds_list ) else: if self._last_task is not None: self._number_of_steps_taken_with_task_seed += ( self._last_task.num_steps_taken() ) if ( self._last_env_seed is not None and self._number_of_steps_taken_with_task_seed < self.repeat_failed_task_for_min_steps and self._last_task.cumulative_reward == 0 ): repeating = True else: self._number_of_steps_taken_with_task_seed = 0 self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1) task_has_same_seed_reset = hasattr(self.env, "same_seed_reset") if repeating and task_has_same_seed_reset: # noinspection PyUnresolvedReferences self.env.same_seed_reset() else: self.env.seed(self._last_env_seed) self.env.saved_seed = self._last_env_seed self.env.reset() self.num_tasks_generated += 1 task_info = {"id": "random%d" % random.randint(0, 2**63 - 1)} self._last_task = self.task_type( **dict(env=self.env, sensors=self.sensors, task_info=task_info), **self.extra_task_kwargs, ) return self._last_task def close(self) -> None: self.env.close() @property def all_observation_spaces_equal(self) -> bool: return True def reset(self) -> None: self.num_tasks_generated = 0 self.env.reset() def set_seed(self, seed: int) -> None: self.np_seeded_random_gen, _ = seeding.np_random(seed) if seed is not None: set_seed(seed) ================================================ FILE: allenact_plugins/habitat_plugin/__init__.py ================================================ from allenact.utils.system import ImportChecker with ImportChecker( "\n\nPlease install habitat following\n\n" "https://allenact.org/installation/installation-framework/#installation-of-habitat\n\n" ): import habitat import habitat_sim ================================================ FILE: allenact_plugins/habitat_plugin/data/__init__.py ================================================ ================================================ FILE: allenact_plugins/habitat_plugin/extra_environment.yml ================================================ channels: - defaults - conda-forge - aihabitat dependencies: - habitat-sim=0.1.5 - numba - pip - pip: - "--editable=git+https://github.com/Lucaweihs/habitat-lab.git@99124c785bd5ca51e321ea20462f71071cd43ae2#egg=habitat" - numpy-quaternion - pyquaternion>=0.9.9 ================================================ FILE: allenact_plugins/habitat_plugin/extra_environment_headless.yml ================================================ channels: - defaults - conda-forge - aihabitat dependencies: - habitat-sim=0.1.5 - headless - numba - pip - pip: - "--editable=git+https://github.com/Lucaweihs/habitat-lab.git@99124c785bd5ca51e321ea20462f71071cd43ae2#egg=habitat" - numpy-quaternion - pyquaternion>=0.9.9 ================================================ FILE: allenact_plugins/habitat_plugin/extra_requirements.txt ================================================ habitat @ git+https://github.com/facebookresearch/habitat-lab.git@33654923dc733f5fcea23aea6391034c3f694a67 numpy-quaternion pyquaternion>=0.9.9 numba ================================================ FILE: allenact_plugins/habitat_plugin/habitat_constants.py ================================================ import os HABITAT_BASE = os.getenv( "HABITAT_BASE_DIR", default=os.path.join(os.getcwd(), "external_projects", "habitat-lab"), ) HABITAT_DATA_BASE = os.path.join( os.getcwd(), "data", ) if (not os.path.exists(HABITAT_BASE)) or (not os.path.exists(HABITAT_DATA_BASE)): raise ImportError( "In order to run properly the Habitat environment makes several assumptions about the file structure of" " the local system. The file structure of the current environment does not seem to respect this required" " file structure. Please see https://allenact.org/installation/installation-framework/#installation-of-habitat" " for details as to how to set up your local environment to make it possible to use the habitat plugin of" " AllenAct." ) HABITAT_DATASETS_DIR = os.path.join(HABITAT_DATA_BASE, "datasets") HABITAT_SCENE_DATASETS_DIR = os.path.join(HABITAT_DATA_BASE, "scene_datasets") HABITAT_CONFIGS_DIR = os.path.join(HABITAT_BASE, "configs") TESTED_HABITAT_COMMIT = "33654923dc733f5fcea23aea6391034c3f694a67" MOVE_AHEAD = "MOVE_FORWARD" ROTATE_LEFT = "TURN_LEFT" ROTATE_RIGHT = "TURN_RIGHT" LOOK_DOWN = "LOOK_DOWN" LOOK_UP = "LOOK_UP" END = "STOP" ================================================ FILE: allenact_plugins/habitat_plugin/habitat_environment.py ================================================ """A wrapper for interacting with the Habitat environment.""" import os from typing import Dict, Union, List, Optional import numpy as np import habitat from allenact.utils.cache_utils import DynamicDistanceCache from allenact.utils.system import get_logger from habitat.config import Config from habitat.core.dataset import Dataset from habitat.core.simulator import Observations, AgentState, ShortestPathPoint from habitat.tasks.nav.nav import NavigationEpisode as HabitatNavigationEpisode class HabitatEnvironment: def __init__(self, config: Config, dataset: Dataset, verbose: bool = False) -> None: self.env = habitat.Env(config=config, dataset=dataset) if not verbose: os.environ["GLOG_minloglevel"] = "2" os.environ["MAGNUM_LOG"] = "quiet" # Set the target to a random goal from the provided list for this episode self.goal_index = 0 self.last_geodesic_distance = None self.distance_cache = DynamicDistanceCache(rounding=1) self._current_frame: Optional[np.ndarray] = None @property def scene_name(self) -> str: return self.env.current_episode.scene_id @property def current_frame(self) -> np.ndarray: assert self._current_frame is not None return self._current_frame def step(self, action_dict: Dict[str, Union[str, int]]) -> Observations: obs = self.env.step(action_dict["action"]) self._current_frame = obs return obs def get_location(self) -> Optional[np.ndarray]: return self.env.sim.get_agent_state().position def get_rotation(self) -> Optional[List[float]]: return self.env.sim.get_agent_state().rotation def get_shortest_path( self, source_state: AgentState, target_state: AgentState, ) -> List[ShortestPathPoint]: return self.env.sim.action_space_shortest_path(source_state, [target_state]) def get_current_episode(self) -> HabitatNavigationEpisode: return self.env.current_episode # type: ignore # noinspection PyMethodMayBeStatic def start(self): get_logger().debug("No need to start a habitat_plugin env") def stop(self): self.env.close() def reset(self): self._current_frame = self.env.reset() @property def last_action_success(self) -> bool: # For now we can not have failure of actions return True @property def num_episodes(self) -> int: ep_iterator = self.env.episode_iterator assert isinstance(ep_iterator, habitat.core.dataset.EpisodeIterator) return len(ep_iterator.episodes) ================================================ FILE: allenact_plugins/habitat_plugin/habitat_preprocessors.py ================================================ ================================================ FILE: allenact_plugins/habitat_plugin/habitat_sensors.py ================================================ from typing import Any, Optional, Tuple, TYPE_CHECKING import gym import numpy as np from pyquaternion import Quaternion from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.misc_utils import prepare_locals_for_super from allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment if TYPE_CHECKING: from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask, ObjectNavTask # type: ignore class RGBSensorHabitat(RGBSensor[HabitatEnvironment, Task[HabitatEnvironment]]): # For backwards compatibility def __init__( self, use_resnet_normalization: bool = False, mean: Optional[np.ndarray] = np.array( [[[0.485, 0.456, 0.406]]], dtype=np.float32 ), stdev: Optional[np.ndarray] = np.array( [[[0.229, 0.224, 0.225]]], dtype=np.float32 ), height: Optional[int] = None, width: Optional[int] = None, uuid: str = "rgb", output_shape: Optional[Tuple[int, ...]] = None, output_channels: int = 3, unnormalized_infimum: float = 0.0, unnormalized_supremum: float = 1.0, scale_first: bool = True, **kwargs: Any ): super().__init__(**prepare_locals_for_super(locals())) def frame_from_env( self, env: HabitatEnvironment, task: Optional[Task[HabitatEnvironment]] ) -> np.ndarray: return env.current_frame["rgb"].copy() class DepthSensorHabitat(DepthSensor[HabitatEnvironment, Task[HabitatEnvironment]]): # For backwards compatibility def __init__( self, use_resnet_normalization: Optional[bool] = None, use_normalization: Optional[bool] = None, mean: Optional[np.ndarray] = np.array([[0.5]], dtype=np.float32), stdev: Optional[np.ndarray] = np.array([[0.25]], dtype=np.float32), height: Optional[int] = None, width: Optional[int] = None, uuid: str = "depth", output_shape: Optional[Tuple[int, ...]] = None, output_channels: int = 1, unnormalized_infimum: float = 0.0, unnormalized_supremum: float = 5.0, scale_first: bool = False, **kwargs: Any ): # Give priority to use_normalization, but use_resnet_normalization for backward compat. if not set if use_resnet_normalization is not None and use_normalization is None: use_normalization = use_resnet_normalization elif use_normalization is None: use_normalization = False super().__init__(**prepare_locals_for_super(locals())) def frame_from_env( self, env: HabitatEnvironment, task: Optional[Task[HabitatEnvironment]] ) -> np.ndarray: return env.current_frame["depth"].copy() class TargetCoordinatesSensorHabitat(Sensor[HabitatEnvironment, "PointNavTask"]): def __init__( self, coordinate_dims: int, uuid: str = "target_coordinates_ind", **kwargs: Any ): self.coordinate_dims = coordinate_dims observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self): # Distance is a non-negative real and angle is normalized to the range (-Pi, Pi] or [-Pi, Pi) return gym.spaces.Box( np.float32(-3.15), np.float32(1000), shape=(self.coordinate_dims,) ) def get_observation( self, env: HabitatEnvironment, task: Optional["PointNavTask"], *args: Any, **kwargs: Any ) -> Any: frame = env.current_frame goal = frame["pointgoal_with_gps_compass"] return goal class TargetObjectSensorHabitat(Sensor[HabitatEnvironment, "ObjectNavTask"]): def __init__(self, num_objects: int, uuid: str = "target_object_id", **kwargs: Any): observation_space = self._get_observation_space(num_objects) super().__init__(**prepare_locals_for_super(locals())) @staticmethod def _get_observation_space(num_objects: int): return gym.spaces.Discrete(num_objects) def get_observation( self, env: HabitatEnvironment, task: Optional["ObjectNavTask"], *args: Any, **kwargs: Any ) -> Any: frame = env.current_frame goal = frame["objectgoal"][0] return goal class AgentCoordinatesSensorHabitat(Sensor[HabitatEnvironment, "PointNavTask"]): def __init__(self, uuid: str = "agent_position_and_rotation", **kwargs: Any): observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) @staticmethod def _get_observation_space(): return gym.spaces.Box(np.float32(-1000), np.float32(1000), shape=(4,)) @staticmethod def get_observation( env: HabitatEnvironment, task: Optional["PointNavTask"], *args: Any, **kwargs: Any ) -> Any: position = env.env.sim.get_agent_state().position quaternion = Quaternion(env.env.sim.get_agent_state().rotation.components) return np.array([position[0], position[1], position[2], quaternion.radians]) ================================================ FILE: allenact_plugins/habitat_plugin/habitat_task_samplers.py ================================================ from typing import List, Optional, Union, Callable, Any, Dict, Type import gym import habitat from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import Builder from allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask, ObjectNavTask # type: ignore from habitat.config import Config class PointNavTaskSampler(TaskSampler): def __init__( self, env_config: Config, sensors: List[Sensor], max_steps: int, action_space: gym.Space, distance_to_goal: float, filter_dataset_func: Optional[ Callable[[habitat.Dataset], habitat.Dataset] ] = None, **task_init_kwargs, ) -> None: self.grid_size = 0.25 self.env: Optional[HabitatEnvironment] = None self.max_tasks: Optional[int] = None self.reset_tasks: Optional[int] = None self.sensors = sensors self.max_steps = max_steps self._action_space = action_space self.env_config = env_config self.distance_to_goal = distance_to_goal self.seed: Optional[int] = None self.filter_dataset_func = filter_dataset_func self._last_sampled_task: Optional[PointNavTask] = None self.task_init_kwargs = task_init_kwargs def _create_environment(self) -> HabitatEnvironment: dataset = habitat.make_dataset( self.env_config.DATASET.TYPE, config=self.env_config.DATASET ) if len(dataset.episodes) == 0: raise RuntimeError("Empty input dataset.") if self.filter_dataset_func is not None: dataset = self.filter_dataset_func(dataset) if len(dataset.episodes) == 0: raise RuntimeError("Empty dataset after filtering.") env = HabitatEnvironment(config=self.env_config, dataset=dataset) self.max_tasks = None if self.env_config.MODE == "train" else env.num_episodes self.reset_tasks = self.max_tasks return env @property def length(self) -> Union[int, float]: """ @return: Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks @property def total_unique(self) -> Union[int, float, None]: return self.env.num_episodes @property def last_sampled_task(self) -> Optional[PointNavTask]: return self._last_sampled_task def close(self) -> None: if self.env is not None: self.env.stop() @property def all_observation_spaces_equal(self) -> bool: """ @return: True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ return True def next_task(self, force_advance_scene=False) -> Optional[PointNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.env is not None: self.env.reset() else: self.env = self._create_environment() self.env.reset() ep_info = self.env.get_current_episode() assert len(ep_info.goals) == 1 target = ep_info.goals[0].position task_info = { "target": target, "distance_to_goal": self.distance_to_goal, "episode_id": ep_info.episode_id, "scene_id": ep_info.scene_id.split("/")[-1], **ep_info.info, } self._last_sampled_task = PointNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, **self.task_init_kwargs, ) if self.max_tasks is not None: self.max_tasks -= 1 return self._last_sampled_task def reset(self): self.max_tasks = self.reset_tasks def set_seed(self, seed: int): self.seed = seed if seed is not None: self.env.env.seed(seed) class ObjectNavTaskSampler(TaskSampler): def __init__( self, env_config: Config, sensors: List[Sensor], max_steps: int, action_space: gym.Space, filter_dataset_func: Optional[ Callable[[habitat.Dataset], habitat.Dataset] ] = None, task_kwargs: Dict[str, Any] = None, objectnav_task_type: Union[ Type[ObjectNavTask], Builder[ObjectNavTask] ] = ObjectNavTask, **kwargs, ) -> None: self.grid_size = 0.25 self.env: Optional[HabitatEnvironment] = None self.max_tasks: Optional[int] = None self.reset_tasks: Optional[int] = None self.sensors = sensors self.max_steps = max_steps self._action_space = action_space self.env_config = env_config self.seed: Optional[int] = None self.filter_dataset_func = filter_dataset_func self.objectnav_task_type = objectnav_task_type self.task_kwargs = {} if task_kwargs is None else task_kwargs self._last_sampled_task: Optional[ObjectNavTask] = None def _create_environment(self) -> HabitatEnvironment: dataset = habitat.make_dataset( self.env_config.DATASET.TYPE, config=self.env_config.DATASET ) if self.filter_dataset_func is not None: dataset = self.filter_dataset_func(dataset) if len(dataset.episodes) == 0: raise RuntimeError("Empty dataset after filtering.") env = HabitatEnvironment(config=self.env_config, dataset=dataset) self.max_tasks = ( None if self.env_config.MODE == "train" else env.num_episodes ) # mp3d objectnav val -> 2184 self.reset_tasks = self.max_tasks return env @property def length(self) -> Union[int, float]: """ @return: Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks @property def total_unique(self) -> Union[int, float, None]: return self.env.num_episodes @property def last_sampled_task(self) -> Optional[ObjectNavTask]: return self._last_sampled_task def close(self) -> None: if self.env is not None: self.env.stop() @property def all_observation_spaces_equal(self) -> bool: """ @return: True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ return True def next_task(self, force_advance_scene=False) -> Optional[ObjectNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.env is not None: if force_advance_scene: self.env.env._episode_iterator._forced_scene_switch() self.env.env._episode_iterator._set_shuffle_intervals() self.env.reset() else: self.env = self._create_environment() self.env.reset() ep_info = self.env.get_current_episode() target_categories = {g.object_category for g in ep_info.goals} assert len(target_categories) == 1 target_category = list(target_categories)[0] task_info = { "target_category": target_category, "episode_id": ep_info.episode_id, "scene_id": ep_info.scene_id.split("/")[-1], **ep_info.info, } self._last_sampled_task = self.objectnav_task_type( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, **self.task_kwargs, ) if self.max_tasks is not None: self.max_tasks -= 1 return self._last_sampled_task def reset(self): self.max_tasks = self.reset_tasks def set_seed(self, seed: int): self.seed = seed if seed is not None: self.env.env.seed(seed) ================================================ FILE: allenact_plugins/habitat_plugin/habitat_tasks.py ================================================ from abc import ABC from typing import Tuple, List, Dict, Any, Optional, Union, Sequence, cast import gym import numpy as np from habitat.sims.habitat_simulator.actions import HabitatSimActions from habitat.sims.habitat_simulator.habitat_simulator import HabitatSim from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task from allenact.utils.system import get_logger from allenact_plugins.habitat_plugin.habitat_constants import ( MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN, ) from allenact_plugins.habitat_plugin.habitat_environment import HabitatEnvironment from allenact_plugins.habitat_plugin.habitat_sensors import ( AgentCoordinatesSensorHabitat, ) class HabitatTask(Task[HabitatEnvironment], ABC): def __init__( self, env: HabitatEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, **kwargs, ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self._last_action: Optional[str] = None self._last_action_ind: Optional[int] = None self._last_action_success: Optional[bool] = None self._actions_taken: List[str] = [] self._positions = [] pos = self.get_agent_position_and_rotation() self._positions.append( {"x": pos[0], "y": pos[1], "z": pos[2], "rotation": pos[3]} ) ep = self.env.get_current_episode() # Extract the scene name from the scene path and append the episode id to generate # a globally unique episode_id self._episode_id = ep.scene_id.split("/")[-1][:-4] + "_" + ep.episode_id def get_agent_position_and_rotation(self): return AgentCoordinatesSensorHabitat.get_observation(self.env, self) @property def last_action(self): return self._last_action @last_action.setter def last_action(self, value: str): self._last_action = value @property def last_action_success(self): return self._last_action_success @last_action_success.setter def last_action_success(self, value: Optional[bool]): self._last_action_success = value def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: if mode == "rgb": return self.env.current_frame["rgb"] elif mode == "depth": return self.env.current_frame["depth"] else: raise NotImplementedError() class PointNavTask(Task[HabitatEnvironment]): _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END) def __init__( self, env: HabitatEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, failed_end_reward: float = 0.0, **kwargs, ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self._took_end_action: bool = False self._success: Optional[bool] = False self._subsampled_locations_from_which_obj_visible = None # Get the geodesic distance to target from the environment and make sure it is # a valid value self.last_geodesic_distance = self.current_geodesic_dist_to_target() self.start_distance = self.last_geodesic_distance assert self.last_geodesic_distance is not None # noinspection PyProtectedMember self._shortest_path_follower = ShortestPathFollower( cast(HabitatSim, env.env.sim), env.env._config.TASK.SUCCESS_DISTANCE, False ) self._shortest_path_follower.mode = "geodesic_path" self._rewards: List[float] = [] self._metrics = None self.failed_end_reward = failed_end_reward def current_geodesic_dist_to_target(self) -> Optional[float]: metrics = self.env.env.get_metrics() if metrics["distance_to_goal"] is None: habitat_env = self.env.env habitat_env.task.measurements.update_measures( episode=habitat_env.current_episode, action=None, task=habitat_env.task ) metrics = self.env.env.get_metrics() return metrics["distance_to_goal"] @property def action_space(self): return gym.spaces.Discrete(len(self._actions)) def reached_terminal_state(self) -> bool: return self.env.env.episode_over @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return cls._actions def close(self) -> None: self.env.stop() def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.class_action_names()[action] self.env.step({"action": action_str}) if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.last_action_success = self.env.last_action_success step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented" return self.env.current_frame["rgb"] def _is_goal_in_range(self) -> bool: return ( self.current_geodesic_dist_to_target() <= self.task_info["distance_to_goal"] ) def judge(self) -> float: reward = -0.01 new_geodesic_distance = self.current_geodesic_dist_to_target() if self.last_geodesic_distance is None: self.last_geodesic_distance = new_geodesic_distance if self.last_geodesic_distance is not None: if ( new_geodesic_distance is None or new_geodesic_distance in [float("-inf"), float("inf")] or np.isnan(new_geodesic_distance) ): new_geodesic_distance = self.last_geodesic_distance delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance reward += delta_distance_reward self.last_geodesic_distance = new_geodesic_distance if self.is_done(): reward += 10.0 if self._success else self.failed_end_reward else: get_logger().warning("Could not get geodesic distance from habitat env.") self._rewards.append(float(reward)) return float(reward) def metrics(self) -> Dict[str, Any]: if not self.is_done(): return {} _metrics = self.env.env.get_metrics() metrics = { **super(PointNavTask, self).metrics(), "success": 1 * self._success, "ep_length": self.num_steps_taken(), "reward": np.sum(self._rewards), "spl": _metrics["spl"] if _metrics["spl"] is not None else 0.0, "dist_to_target": self.current_geodesic_dist_to_target(), } self._rewards = [] return metrics def query_expert(self, **kwargs) -> Tuple[int, bool]: if self._is_goal_in_range(): return self.class_action_names().index(END), True target = self.task_info["target"] habitat_action = self._shortest_path_follower.get_next_action(target) if habitat_action == HabitatSimActions.MOVE_FORWARD: return self.class_action_names().index(MOVE_AHEAD), True elif habitat_action == HabitatSimActions.TURN_LEFT: return self.class_action_names().index(ROTATE_LEFT), True elif habitat_action == HabitatSimActions.TURN_RIGHT: return self.class_action_names().index(ROTATE_RIGHT), True else: return 0, False class ObjectNavTask(HabitatTask): _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN) def __init__( self, env: HabitatEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, look_constraints: Optional[Tuple[int, int]] = None, **kwargs, ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self.look_constraints = look_constraints self._look_state = 0 self._took_end_action: bool = False self._success: Optional[bool] = False self._subsampled_locations_from_which_obj_visible = None # Get the geodesic distance to target from the environemnt and make sure it is # a valid value self.last_geodesic_distance = self.current_geodesic_dist_to_target() assert not ( self.last_geodesic_distance is None or self.last_geodesic_distance in [float("-inf"), float("inf")] or np.isnan(self.last_geodesic_distance) ), "Bad geodesic distance" self._min_distance_to_goal = self.last_geodesic_distance self._num_invalid_actions = 0 # noinspection PyProtectedMember self._shortest_path_follower = ShortestPathFollower( env.env.sim, env.env._config.TASK.SUCCESS.SUCCESS_DISTANCE, False ) self._shortest_path_follower.mode = "geodesic_path" self._rewards: List[float] = [] self._metrics = None self.task_info["episode_id"] = self._episode_id @property def action_space(self): return gym.spaces.Discrete(len(self._actions)) def reached_terminal_state(self) -> bool: return self.env.env.episode_over @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return cls._actions def action_names(self, **kwargs) -> Tuple[str, ...]: return self._actions def close(self) -> None: self.env.stop() def current_geodesic_dist_to_target(self) -> Optional[float]: metrics = self.env.env.get_metrics() if metrics["distance_to_goal"] is None: habitat_env = self.env.env habitat_env.task.measurements.update_measures( episode=habitat_env.current_episode, action=None, task=habitat_env.task ) metrics = self.env.env.get_metrics() return metrics["distance_to_goal"] def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) old_pos = self.get_agent_position_and_rotation() action_str = self.action_names()[action] self._actions_taken.append(action_str) skip_action = False if self.look_constraints is not None: max_look_up, max_look_down = self.look_constraints if action_str == LOOK_UP: num_look_ups = self._look_state # assert num_look_ups <= max_look_up skip_action = num_look_ups >= max_look_up self._look_state += 1 if action_str == LOOK_DOWN: num_look_downs = -self._look_state # assert num_look_downs <= max_look_down skip_action = num_look_downs >= max_look_down self._look_state -= 1 self._look_state = min(max(self._look_state, -max_look_down), max_look_up) if not skip_action: self.env.step({"action": action_str}) if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.last_action_success = self.env.last_action_success step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) new_pos = self.get_agent_position_and_rotation() if np.all(old_pos == new_pos): self._num_invalid_actions += 1 pos = self.get_agent_position_and_rotation() self._positions.append( {"x": pos[0], "y": pos[1], "z": pos[2], "rotation": pos[3]} ) return step_result def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented" return self.env.current_frame["rgb"] def _is_goal_in_range(self) -> bool: # The habitat simulator will return an SPL value of 0.0 whenever the goal is not in range return bool(self.env.env.get_metrics()["spl"]) def judge(self) -> float: # Set default reward reward = -0.01 # Get geodesic distance reward new_geodesic_distance = self.current_geodesic_dist_to_target() self._min_distance_to_goal = min( new_geodesic_distance, self._min_distance_to_goal ) if ( new_geodesic_distance is None or new_geodesic_distance in [float("-inf"), float("inf")] or np.isnan(new_geodesic_distance) ): new_geodesic_distance = self.last_geodesic_distance delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance reward += delta_distance_reward if self._took_end_action: reward += 10.0 if self._success else 0.0 # Get success reward self._rewards.append(float(reward)) self.last_geodesic_distance = new_geodesic_distance return float(reward) def metrics(self) -> Dict[str, Any]: self.task_info["taken_actions"] = self._actions_taken self.task_info["action_names"] = self.action_names() self.task_info["followed_path"] = self._positions if not self.is_done(): return {} else: _metrics = self.env.env.get_metrics() metrics = { "success": self._success, "ep_length": self.num_steps_taken(), "total_reward": np.sum(self._rewards), "spl": _metrics["spl"] if _metrics["spl"] is not None else 0.0, "min_distance_to_target": self._min_distance_to_goal, "num_invalid_actions": self._num_invalid_actions, "task_info": self.task_info, } self._rewards = [] return metrics def query_expert(self, **kwargs) -> Tuple[int, bool]: if self._is_goal_in_range(): return self.class_action_names().index(END), True target = self.task_info["target"] action = self._shortest_path_follower.get_next_action(target) return action, action is not None ================================================ FILE: allenact_plugins/habitat_plugin/habitat_utils.py ================================================ import os from typing import List import habitat from allenact_plugins.habitat_plugin.habitat_constants import ( HABITAT_BASE, HABITAT_CONFIGS_DIR, ) from habitat import Config def construct_env_configs( config: Config, allow_scene_repeat: bool = False, ) -> List[Config]: """Create list of Habitat Configs for training on multiple processes To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. # Parameters config : configs that contain num_processes as well as information necessary to create individual environments. allow_scene_repeat: if `True` and the number of distinct scenes in the dataset is less than the total number of processes this will result in scenes being repeated across processes. If `False`, then if the total number of processes is greater than the number of scenes, this will result in a RuntimeError exception being raised. # Returns List of Configs, one for each process. """ config.freeze() num_processes = config.NUM_PROCESSES configs = [] dataset = habitat.make_dataset(config.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.DATASET) if len(scenes) > 0: if len(scenes) < num_processes: if not allow_scene_repeat: raise RuntimeError( "reduce the number of processes as there aren't enough number of scenes." ) else: scenes = (scenes * (1 + (num_processes // len(scenes))))[:num_processes] scene_splits: List[List] = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): task_config = config.clone() task_config.defrost() if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] if len(config.SIMULATOR_GPU_IDS) == 0: task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = -1 else: task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( config.SIMULATOR_GPU_IDS[i % len(config.SIMULATOR_GPU_IDS)] ) task_config.freeze() configs.append(task_config.clone()) return configs def construct_env_configs_mp3d(config: Config) -> List[Config]: r"""Create list of Habitat Configs for training on multiple processes To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. Returns: List of Configs, one for each process """ config.freeze() num_processes = config.NUM_PROCESSES configs = [] # dataset = habitat.make_dataset(config.DATASET.TYPE) # scenes = dataset.get_scenes_to_load(config.DATASET) if num_processes == 1: scene_splits = [["pRbA3pwrgk9"]] else: small = [ "rPc6DW4iMge", "e9zR4mvMWw7", "uNb9QFRL6hY", "qoiz87JEwZ2", "sKLMLpTHeUy", "s8pcmisQ38h", "759xd9YjKW5", "XcA2TqTSSAj", "SN83YJsR3w2", "8WUmhLawc2A", "JeFG25nYj2p", "17DRP5sb8fy", "Uxmj2M2itWa", "XcA2TqTSSAj", "SN83YJsR3w2", "8WUmhLawc2A", "JeFG25nYj2p", "17DRP5sb8fy", "Uxmj2M2itWa", "D7N2EKCX4Sj", "b8cTxDM8gDG", "sT4fr6TAbpF", "S9hNv5qa7GM", "82sE5b5pLXE", "pRbA3pwrgk9", "aayBHfsNo7d", "cV4RVeZvu5T", "i5noydFURQK", "YmJkqBEsHnH", "jh4fc5c5qoQ", "VVfe2KiqLaN", "29hnd4uzFmX", "Pm6F8kyY3z2", "JF19kD82Mey", "GdvgFV5R1Z5", "HxpKQynjfin", "vyrNrziPKCB", ] med = [ "V2XKFyX4ASd", "VFuaQ6m2Qom", "ZMojNkEp431", "5LpN3gDmAk7", "r47D5H71a5s", "ULsKaCPVFJR", "E9uDoFAP3SH", "kEZ7cmS4wCh", "ac26ZMwG7aT", "dhjEzFoUFzH", "mJXqzFtmKg4", "p5wJjkQkbXX", "Vvot9Ly1tCj", "EDJbREhghzL", "VzqfbhrpDEA", "7y3sRwLe3Va", ] scene_splits = [[] for _ in range(config.NUM_PROCESSES)] distribute( small, scene_splits, num_gpus=8, procs_per_gpu=3, proc_offset=1, scenes_per_process=2, ) distribute( med, scene_splits, num_gpus=8, procs_per_gpu=3, proc_offset=0, scenes_per_process=1, ) # gpu0 = [['pRbA3pwrgk9', '82sE5b5pLXE', 'S9hNv5qa7GM'], # ['Uxmj2M2itWa', '17DRP5sb8fy', 'JeFG25nYj2p'], # ['5q7pvUzZiYa', '759xd9YjKW5', 's8pcmisQ38h'], # ['e9zR4mvMWw7', 'rPc6DW4iMge', 'vyrNrziPKCB']] # gpu1 = [['sT4fr6TAbpF', 'b8cTxDM8gDG', 'D7N2EKCX4Sj'], # ['8WUmhLawc2A', 'SN83YJsR3w2', 'XcA2TqTSSAj'], # ['sKLMLpTHeUy', 'qoiz87JEwZ2', 'uNb9QFRL6hY'], # ['V2XKFyX4ASd', 'VFuaQ6m2Qom', 'ZMojNkEp431']] # gpu2 = [['5LpN3gDmAk7', 'r47D5H71a5s', 'ULsKaCPVFJR', 'E9uDoFAP3SH'], # ['VVfe2KiqLaN', 'jh4fc5c5qoQ', 'YmJkqBEsHnH'], # small # ['i5noydFURQK', 'cV4RVeZvu5T', 'aayBHfsNo7d']] # small # gpu3 = [['kEZ7cmS4wCh', 'ac26ZMwG7aT', 'dhjEzFoUFzH'], # ['mJXqzFtmKg4', 'p5wJjkQkbXX', 'Vvot9Ly1tCj']] # gpu4 = [['EDJbREhghzL', 'VzqfbhrpDEA', '7y3sRwLe3Va'], # ['ur6pFq6Qu1A', 'PX4nDJXEHrG', 'PuKPg4mmafe']] # gpu5 = [['r1Q1Z4BcV1o', 'gTV8FGcVJC9', '1pXnuDYAj8r'], # ['JF19kD82Mey', 'Pm6F8kyY3z2', '29hnd4uzFmX']] # small # gpu6 = [['VLzqgDo317F', '1LXtFkjw3qL'], # ['HxpKQynjfin', 'gZ6f7yhEvPG', 'GdvgFV5R1Z5']] # small # gpu7 = [['D7G3Y4RVNrH', 'B6ByNegPMKs']] # # scene_splits = gpu0 + gpu1 + gpu2 + gpu3 + gpu4 + gpu5 + gpu6 + gpu7 for i in range(num_processes): task_config = config.clone() task_config.defrost() task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_IDS[ i % len(config.SIMULATOR_GPU_IDS) ] task_config.freeze() configs.append(task_config.clone()) return configs def distribute( data: List[str], scene_splits: List[List], num_gpus=8, procs_per_gpu=4, proc_offset=0, scenes_per_process=1, ) -> None: for idx, scene in enumerate(data): i = (idx // num_gpus) % scenes_per_process j = idx % num_gpus scene_splits[j * procs_per_gpu + i + proc_offset].append(scene) def get_habitat_config(path: str): assert ( path[-4:].lower() == ".yml" or path[-5:].lower() == ".yaml" ), f"path ({path}) must be a .yml or .yaml file." if not os.path.isabs(path): candidate_paths = [ os.path.join(d, path) for d in [os.getcwd(), HABITAT_BASE, HABITAT_CONFIGS_DIR] ] success = False for candidate_path in candidate_paths: if os.path.exists(candidate_path): success = True path = candidate_path break if not success: raise FileExistsError( f"Could not find config file with given relative path {path}. Tried the following possible absolute" f" paths {candidate_paths}." ) elif not os.path.exists(path): raise FileExistsError(f"Could not find config file with given path {path}.") return habitat.get_config(path) ================================================ FILE: allenact_plugins/habitat_plugin/scripts/__init__.py ================================================ ================================================ FILE: allenact_plugins/habitat_plugin/scripts/agent_demo.py ================================================ import os import cv2 import habitat from pyquaternion import Quaternion from allenact_plugins.habitat_plugin.habitat_constants import ( HABITAT_CONFIGS_DIR, HABITAT_DATASETS_DIR, HABITAT_SCENE_DATASETS_DIR, ) from allenact_plugins.habitat_plugin.habitat_utils import get_habitat_config FORWARD_KEY = "w" LEFT_KEY = "a" RIGHT_KEY = "d" FINISH = "f" def transform_rgb_bgr(image): return image[:, :, [2, 1, 0]] def agent_demo(): config = get_habitat_config( os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav.yaml") ) config.defrost() config.DATASET.DATA_PATH = os.path.join( HABITAT_DATASETS_DIR, "pointnav/gibson/v1/train/train.json.gz" ) config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR config.DATASET.CONTENT_SCENES = ["Adrian"] config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = 0 config.freeze() env = habitat.Env(config=config) print("Environment creation successful") observations = env.reset() cv2.imshow("RGB", transform_rgb_bgr(observations["rgb"])) print("Agent stepping around inside environment.") count_steps = 0 action = None while not env.episode_over: keystroke = cv2.waitKey(0) if keystroke == ord(FORWARD_KEY): action = 1 print("action: FORWARD") elif keystroke == ord(LEFT_KEY): action = 2 print("action: LEFT") elif keystroke == ord(RIGHT_KEY): action = 3 print("action: RIGHT") elif keystroke == ord(FINISH): action = 0 print("action: FINISH") else: print("INVALID KEY") continue observations = env.step(action) count_steps += 1 print("Position:", env.sim.get_agent_state().position) print("Quaternions:", env.sim.get_agent_state().rotation) quat = Quaternion(env.sim.get_agent_state().rotation.components) print(quat.radians) cv2.imshow("RGB", transform_rgb_bgr(observations["rgb"])) print("Episode finished after {} steps.".format(count_steps)) if action == habitat.SimulatorActions.STOP and observations["pointgoal"][0] < 0.2: print("you successfully navigated to destination point") else: print("your navigation was unsuccessful") if __name__ == "__main__": agent_demo() ================================================ FILE: allenact_plugins/habitat_plugin/scripts/make_map.py ================================================ import os import habitat import numpy as np from tqdm import tqdm from allenact_plugins.habitat_plugin.habitat_constants import ( HABITAT_CONFIGS_DIR, HABITAT_DATA_BASE, HABITAT_SCENE_DATASETS_DIR, HABITAT_DATASETS_DIR, ) from allenact_plugins.habitat_plugin.habitat_utils import get_habitat_config map_resolution = 0.05 map_size = 960 def make_map(env, scene): vacancy_map = np.zeros([map_size, map_size], dtype=bool) for i in tqdm(range(map_size)): for j in range(map_size): x = (i - map_size // 2) * map_resolution z = (j - map_size // 2) * map_resolution vacancy_map[j, i] = env.sim.is_navigable([x, 0.0, z]) np.save( os.path.join(HABITAT_DATA_BASE, "map_data/pointnav/v1/gibson/data/" + scene), vacancy_map, ) def generate_maps(): config = get_habitat_config( os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav.yaml") ) config.defrost() config.DATASET.DATA_PATH = os.path.join( HABITAT_DATASETS_DIR, "pointnav/gibson/v1/train/train.json.gz" ) config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = 0 config.freeze() dataset = habitat.make_dataset(config.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.DATASET) for scene in scenes: print("Making environment for:", scene) config.defrost() config.DATASET.CONTENT_SCENES = [scene] config.freeze() env = habitat.Env(config=config) make_map(env, scene) env.close() if __name__ == "__main__": generate_maps() ================================================ FILE: allenact_plugins/ithor_plugin/__init__.py ================================================ from allenact.utils.system import ImportChecker with ImportChecker( "Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`)." ): # noinspection PyUnresolvedReferences import ai2thor ================================================ FILE: allenact_plugins/ithor_plugin/extra_environment.yml ================================================ channels: - defaults - conda-forge dependencies: - ai2thor>=2.5.3 - numba - pip - colour - packaging - pip: - numpy-quaternion - pyquaternion>=0.9.9 - python-xlib ================================================ FILE: allenact_plugins/ithor_plugin/extra_requirements.txt ================================================ ai2thor>=2.5.3 numpy-quaternion pyquaternion>=0.9.9 colour numba packaging python-xlib ================================================ FILE: allenact_plugins/ithor_plugin/ithor_constants.py ================================================ """Common constants used when training agents to complete tasks in iTHOR, the interactive version of AI2-THOR.""" from collections import OrderedDict from typing import Set, Dict MOVE_AHEAD = "MoveAhead" ROTATE_LEFT = "RotateLeft" ROTATE_RIGHT = "RotateRight" LOOK_DOWN = "LookDown" LOOK_UP = "LookUp" END = "End" VISIBILITY_DISTANCE = 1.25 FOV = 90.0 ORDERED_SCENE_TYPES = ("kitchens", "livingrooms", "bedrooms", "bathrooms") NUM_SCENE_TYPES = len(ORDERED_SCENE_TYPES) def make_scene_name(type_ind, scene_num): if type_ind == 1: return "FloorPlan" + str(scene_num) + "_physics" elif scene_num < 10: return "FloorPlan" + str(type_ind) + "0" + str(scene_num) + "_physics" else: return "FloorPlan" + str(type_ind) + str(scene_num) + "_physics" SCENES_TYPE_TO_SCENE_NAMES = OrderedDict( [ ( ORDERED_SCENE_TYPES[type_ind - 1], tuple( make_scene_name(type_ind=type_ind, scene_num=scene_num) for scene_num in range(1, 31) ), ) for type_ind in range(1, NUM_SCENE_TYPES + 1) ] ) SCENES_TYPE_TO_TRAIN_SCENE_NAMES = OrderedDict( (key, scenes[:20]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items() ) SCENES_TYPE_TO_VALID_SCENE_NAMES = OrderedDict( (key, scenes[20:25]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items() ) SCENES_TYPE_TO_TEST_SCENE_NAMES = OrderedDict( (key, scenes[25:30]) for key, scenes in SCENES_TYPE_TO_SCENE_NAMES.items() ) ALL_SCENE_NAMES = sum(SCENES_TYPE_TO_SCENE_NAMES.values(), tuple()) TRAIN_SCENE_NAMES = sum( (scenes for scenes in SCENES_TYPE_TO_TRAIN_SCENE_NAMES.values()), tuple() ) VALID_SCENE_NAMES = sum( (scenes for scenes in SCENES_TYPE_TO_VALID_SCENE_NAMES.values()), tuple() ) TEST_SCENE_NAMES = sum( (scenes for scenes in SCENES_TYPE_TO_TEST_SCENE_NAMES.values()), tuple() ) TRAIN_SCENE_NAMES_SET = set(TRAIN_SCENE_NAMES) VALID_SCENE_NAMES_SET = set(VALID_SCENE_NAMES) TEST_SCENE_NAMES_SET = set(TEST_SCENE_NAMES) _object_type_and_location_tsv = """ AlarmClock bedrooms Apple kitchens ArmChair livingrooms,bedrooms BaseballBat bedrooms BasketBall bedrooms Bathtub bathrooms BathtubBasin bathrooms Bed bedrooms Blinds kitchens,bedrooms Book kitchens,livingrooms,bedrooms Boots livingrooms,bedrooms Bottle kitchens Bowl kitchens,livingrooms,bedrooms Box livingrooms,bedrooms Bread kitchens ButterKnife kitchens Cabinet kitchens,livingrooms,bedrooms,bathrooms Candle livingrooms,bathrooms Cart bathrooms CD bedrooms CellPhone kitchens,livingrooms,bedrooms Chair kitchens,livingrooms,bedrooms Cloth bedrooms,bathrooms CoffeeMachine kitchens CoffeeTable livingrooms,bedrooms CounterTop kitchens,livingrooms,bedrooms,bathrooms CreditCard kitchens,livingrooms,bedrooms Cup kitchens Curtains kitchens,livingrooms,bedrooms Desk bedrooms DeskLamp livingrooms,bedrooms DiningTable kitchens,livingrooms,bedrooms DishSponge kitchens,bathrooms Drawer kitchens,livingrooms,bedrooms,bathrooms Dresser livingrooms,bedrooms,bathrooms Egg kitchens Faucet kitchens,bathrooms FloorLamp livingrooms,bedrooms Footstool bedrooms Fork kitchens Fridge kitchens GarbageCan kitchens,livingrooms,bedrooms,bathrooms HandTowel bathrooms HandTowelHolder bathrooms HousePlant kitchens,livingrooms,bedrooms,bathrooms Kettle kitchens KeyChain livingrooms,bedrooms Knife kitchens Ladle kitchens Laptop kitchens,livingrooms,bedrooms LaundryHamper bedrooms LaundryHamperLid bedrooms Lettuce kitchens LightSwitch kitchens,livingrooms,bedrooms,bathrooms Microwave kitchens Mirror kitchens,livingrooms,bedrooms,bathrooms Mug kitchens,bedrooms Newspaper livingrooms Ottoman livingrooms,bedrooms Painting kitchens,livingrooms,bedrooms,bathrooms Pan kitchens PaperTowel kitchens,bathrooms Pen kitchens,livingrooms,bedrooms Pencil kitchens,livingrooms,bedrooms PepperShaker kitchens Pillow livingrooms,bedrooms Plate kitchens,livingrooms Plunger bathrooms Poster bedrooms Pot kitchens Potato kitchens RemoteControl livingrooms,bedrooms Safe kitchens,livingrooms,bedrooms SaltShaker kitchens ScrubBrush bathrooms Shelf kitchens,livingrooms,bedrooms,bathrooms ShowerCurtain bathrooms ShowerDoor bathrooms ShowerGlass bathrooms ShowerHead bathrooms SideTable livingrooms,bedrooms Sink kitchens,bathrooms SinkBasin kitchens,bathrooms SoapBar bathrooms SoapBottle kitchens,bathrooms Sofa livingrooms,bedrooms Spatula kitchens Spoon kitchens SprayBottle bathrooms Statue kitchens,livingrooms,bedrooms StoveBurner kitchens StoveKnob kitchens TeddyBear bedrooms Television livingrooms,bedrooms TennisRacket bedrooms TissueBox livingrooms,bedrooms,bathrooms Toaster kitchens Toilet bathrooms ToiletPaper bathrooms ToiletPaperHanger bathrooms Tomato kitchens Towel bathrooms TowelHolder bathrooms TVStand livingrooms Vase kitchens,livingrooms,bedrooms Watch livingrooms,bedrooms WateringCan livingrooms Window kitchens,livingrooms,bedrooms,bathrooms WineBottle kitchens """ OBJECT_TYPE_TO_SCENE_TYPES = OrderedDict() for ot_tab_scene_types in _object_type_and_location_tsv.split("\n"): if ot_tab_scene_types != "": ot, scene_types_csv = ot_tab_scene_types.split("\t") OBJECT_TYPE_TO_SCENE_TYPES[ot] = tuple(sorted(scene_types_csv.split(","))) SCENE_TYPE_TO_OBJECT_TYPES: Dict[str, Set[str]] = OrderedDict( ((k, set()) for k in ORDERED_SCENE_TYPES) ) for ot_tab_scene_types in _object_type_and_location_tsv.split("\n"): if ot_tab_scene_types != "": ot, scene_types_csv = ot_tab_scene_types.split("\t") for scene_type in scene_types_csv.split(","): SCENE_TYPE_TO_OBJECT_TYPES[scene_type].add(ot) ================================================ FILE: allenact_plugins/ithor_plugin/ithor_environment.py ================================================ """A wrapper for engaging with the THOR environment.""" import copy import functools import math import random from typing import Tuple, Dict, List, Set, Union, Any, Optional, Mapping, cast import ai2thor.server import networkx as nx import numpy as np from ai2thor.controller import Controller from scipy.spatial.transform import Rotation from allenact.utils.system import get_logger from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV from allenact_plugins.ithor_plugin.ithor_util import round_to_factor class IThorEnvironment(object): """Wrapper for the ai2thor controller providing additional functionality and bookkeeping. See [here](https://ai2thor.allenai.org/documentation/installation) for comprehensive documentation on AI2-THOR. # Attributes controller : The ai2thor controller. """ def __init__( self, x_display: Optional[str] = None, docker_enabled: bool = False, local_thor_build: Optional[str] = None, visibility_distance: float = VISIBILITY_DISTANCE, fov: float = FOV, player_screen_width: int = 300, player_screen_height: int = 300, quality: str = "Very Low", restrict_to_initially_reachable_points: bool = False, make_agents_visible: bool = True, object_open_speed: float = 1.0, simplify_physics: bool = False, ) -> None: """Initializer. # Parameters x_display : The x display into which to launch ai2thor (possibly necessarily if you are running on a server without an attached display). docker_enabled : Whether or not to run thor in a docker container (useful on a server without an attached display so that you don't have to start an x display). local_thor_build : The path to a local build of ai2thor. This is probably not necessary for your use case and can be safely ignored. visibility_distance : The distance (in meters) at which objects, in the viewport of the agent, are considered visible by ai2thor and will have their "visible" flag be set to `True` in the metadata. fov : The agent's camera's field of view. player_screen_width : The width resolution (in pixels) of the images returned by ai2thor. player_screen_height : The height resolution (in pixels) of the images returned by ai2thor. quality : The quality at which to render. Possible quality settings can be found in `ai2thor._quality_settings.QUALITY_SETTINGS`. restrict_to_initially_reachable_points : Whether or not to restrict the agent to locations in ai2thor that were found to be (initially) reachable by the agent (i.e. reachable by the agent after resetting the scene). This can be useful if you want to ensure there are only a fixed set of locations where the agent can go. make_agents_visible : Whether or not the agent should be visible. Most noticable when there are multiple agents or when quality settings are high so that the agent casts a shadow. object_open_speed : How quickly objects should be opened. High speeds mean faster simulation but also mean that opening objects have a lot of kinetic energy and can, possibly, knock other objects away. simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object interactions when opening drawers (when simplified, objects within a drawer do not slide around on their own when the drawer is opened or closed, instead they are effectively glued down). """ self._start_player_screen_width = player_screen_width self._start_player_screen_height = player_screen_height self._local_thor_build = local_thor_build self.x_display = x_display self.controller: Optional[Controller] = None self._started = False self._quality = quality self._initially_reachable_points: Optional[List[Dict]] = None self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None self._move_mag: Optional[float] = None self._grid_size: Optional[float] = None self._visibility_distance = visibility_distance self._fov = fov self.restrict_to_initially_reachable_points = ( restrict_to_initially_reachable_points ) self.make_agents_visible = make_agents_visible self.object_open_speed = object_open_speed self._always_return_visible_range = False self.simplify_physics = simplify_physics self.start(None) # noinspection PyTypeHints self.controller.docker_enabled = docker_enabled # type: ignore @property def scene_name(self) -> str: """Current ai2thor scene.""" return self.controller.last_event.metadata["sceneName"] @property def current_frame(self) -> np.ndarray: """Returns rgb image corresponding to the agent's egocentric view.""" return self.controller.last_event.frame @property def last_event(self) -> ai2thor.server.Event: """Last event returned by the controller.""" return self.controller.last_event @property def started(self) -> bool: """Has the ai2thor controller been started.""" return self._started @property def last_action(self) -> str: """Last action, as a string, taken by the agent.""" return self.controller.last_event.metadata["lastAction"] @last_action.setter def last_action(self, value: str) -> None: """Set the last action taken by the agent. Doing this is rewriting history, be careful. """ self.controller.last_event.metadata["lastAction"] = value @property def last_action_success(self) -> bool: """Was the last action taken by the agent a success?""" return self.controller.last_event.metadata["lastActionSuccess"] @last_action_success.setter def last_action_success(self, value: bool) -> None: """Set whether or not the last action taken by the agent was a success. Doing this is rewriting history, be careful. """ self.controller.last_event.metadata["lastActionSuccess"] = value @property def last_action_return(self) -> Any: """Get the value returned by the last action (if applicable). For an example of an action that returns a value, see `"GetReachablePositions"`. """ return self.controller.last_event.metadata["actionReturn"] @last_action_return.setter def last_action_return(self, value: Any) -> None: """Set the value returned by the last action. Doing this is rewriting history, be careful. """ self.controller.last_event.metadata["actionReturn"] = value def start( self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs, ) -> None: """Starts the ai2thor controller if it was previously stopped. After starting, `reset` will be called with the scene name and move magnitude. # Parameters scene_name : The scene to load. move_mag : The amount of distance the agent moves in a single `MoveAhead` step. kwargs : additional kwargs, passed to reset. """ if self._started: raise RuntimeError( "Trying to start the environment but it is already started." ) # noinspection PyUnresolvedReferences self.controller = Controller( x_display=self.x_display, width=self._start_player_screen_width, height=self._start_player_screen_height, local_executable_path=self._local_thor_build, quality=self._quality, server_class=ai2thor.fifo_server.FifoServer, ) if ( self._start_player_screen_height, self._start_player_screen_width, ) != self.current_frame.shape[:2]: self.controller.step( { "action": "ChangeResolution", "x": self._start_player_screen_width, "y": self._start_player_screen_height, } ) self._started = True self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs) def stop(self) -> None: """Stops the ai2thor controller.""" try: self.controller.stop() except Exception as e: get_logger().warning(str(e)) finally: self._started = False def reset( self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs, ): """Resets the ai2thor in a new scene. Resets ai2thor into a new scene and initializes the scene/agents with prespecified settings (e.g. move magnitude). # Parameters scene_name : The scene to load. move_mag : The amount of distance the agent moves in a single `MoveAhead` step. kwargs : additional kwargs, passed to the controller "Initialize" action. """ self._move_mag = move_mag self._grid_size = self._move_mag if scene_name is None: scene_name = self.controller.last_event.metadata["sceneName"] self.controller.reset(scene_name) self.controller.step( { "action": "Initialize", "gridSize": self._grid_size, "visibilityDistance": self._visibility_distance, "fieldOfView": self._fov, "makeAgentsVisible": self.make_agents_visible, "alwaysReturnVisibleRange": self._always_return_visible_range, **kwargs, } ) if self.object_open_speed != 1.0: self.controller.step( {"action": "ChangeOpenSpeed", "x": self.object_open_speed} ) self._initially_reachable_points = None self._initially_reachable_points_set = None self.controller.step({"action": "GetReachablePositions"}) if not self.controller.last_event.metadata["lastActionSuccess"]: get_logger().warning( "Error when getting reachable points: {}".format( self.controller.last_event.metadata["errorMessage"] ) ) self._initially_reachable_points = self.last_action_return def teleport_agent_to( self, x: float, y: float, z: float, rotation: float, horizon: float, standing: Optional[bool] = None, force_action: bool = False, only_initially_reachable: Optional[bool] = None, verbose=True, ignore_y_diffs=False, ) -> None: """Helper function teleporting the agent to a given location.""" if standing is None: standing = self.last_event.metadata.get( "isStanding", self.last_event.metadata["agent"].get("isStanding") ) original_location = self.get_agent_location() target = {"x": x, "y": y, "z": z} if only_initially_reachable is None: only_initially_reachable = self.restrict_to_initially_reachable_points if only_initially_reachable: reachable_points = self.initially_reachable_points reachable = False for p in reachable_points: if self.position_dist(target, p, ignore_y=ignore_y_diffs) < 0.01: reachable = True break if not reachable: self.last_action = "TeleportFull" self.last_event.metadata["errorMessage"] = ( "Target position was not initially reachable." ) self.last_action_success = False return self.controller.step( dict( action="TeleportFull", x=x, y=y, z=z, rotation={"x": 0.0, "y": rotation, "z": 0.0}, horizon=horizon, standing=standing, forceAction=force_action, ) ) if not self.last_action_success: agent_location = self.get_agent_location() rot_diff = ( agent_location["rotation"] - original_location["rotation"] ) % 360 new_old_dist = self.position_dist( original_location, agent_location, ignore_y=ignore_y_diffs ) if ( self.position_dist( original_location, agent_location, ignore_y=ignore_y_diffs ) > 1e-2 or min(rot_diff, 360 - rot_diff) > 1 ): get_logger().warning( "Teleportation FAILED but agent still moved (position_dist {}, rot diff {})" " (\nprevious location\n{}\ncurrent_location\n{}\n)".format( new_old_dist, rot_diff, original_location, agent_location ) ) return if force_action: assert self.last_action_success return agent_location = self.get_agent_location() rot_diff = (agent_location["rotation"] - rotation) % 360 if ( self.position_dist(agent_location, target, ignore_y=ignore_y_diffs) > 1e-2 or min(rot_diff, 360 - rot_diff) > 1 ): if only_initially_reachable: self._snap_agent_to_initially_reachable(verbose=False) if verbose: get_logger().warning( "Teleportation did not place agent" " precisely where desired in scene {}" " (\ndesired\n{}\nactual\n{}\n)" " perhaps due to grid snapping." " Action is considered failed but agent may have moved.".format( self.scene_name, { "x": x, "y": y, "z": z, "rotation": rotation, "standing": standing, "horizon": horizon, }, agent_location, ) ) self.last_action_success = False return def random_reachable_state(self, seed: int = None) -> Dict: """Returns a random reachable location in the scene.""" if seed is not None: random.seed(seed) xyz = random.choice(self.currently_reachable_points) rotation = random.choice([0, 90, 180, 270]) horizon = random.choice([0, 30, 60, 330]) state = copy.copy(xyz) state["rotation"] = rotation state["horizon"] = horizon return state def randomize_agent_location( self, seed: int = None, partial_position: Optional[Dict[str, float]] = None ) -> Dict: """Teleports the agent to a random reachable location in the scene.""" if partial_position is None: partial_position = {} k = 0 state: Optional[Dict] = None while k == 0 or (not self.last_action_success and k < 10): state = self.random_reachable_state(seed=seed) self.teleport_agent_to(**{**state, **partial_position}) k += 1 if not self.last_action_success: get_logger().warning( ( "Randomize agent location in scene {}" " with seed {} and partial position {} failed in " "10 attempts. Forcing the action." ).format(self.scene_name, seed, partial_position) ) self.teleport_agent_to(**{**state, **partial_position}, force_action=True) # type: ignore assert self.last_action_success assert state is not None return state def object_pixels_in_frame( self, object_id: str, hide_all: bool = True, hide_transparent: bool = False ) -> np.ndarray: """Return an mask for a given object in the agent's current view. # Parameters object_id : The id of the object. hide_all : Whether or not to hide all other objects in the scene before getting the mask. hide_transparent : Whether or not partially transparent objects are considered to occlude the object. # Returns A numpy array of the mask. """ # Emphasizing an object turns it magenta and hides all other objects # from view, we can find where the hand object is on the screen by # emphasizing it and then scanning across the image for the magenta pixels. if hide_all: self.step({"action": "EmphasizeObject", "objectId": object_id}) else: self.step({"action": "MaskObject", "objectId": object_id}) if hide_transparent: self.step({"action": "HideTranslucentObjects"}) # noinspection PyShadowingBuiltins filter = np.array([[[255, 0, 255]]]) object_pixels = 1 * np.all(self.current_frame == filter, axis=2) if hide_all: self.step({"action": "UnemphasizeAll"}) else: self.step({"action": "UnmaskObject", "objectId": object_id}) if hide_transparent: self.step({"action": "UnhideAllObjects"}) return object_pixels def object_pixels_on_grid( self, object_id: str, grid_shape: Tuple[int, int], hide_all: bool = True, hide_transparent: bool = False, ) -> np.ndarray: """Like `object_pixels_in_frame` but counts object pixels in a partitioning of the image.""" def partition(n, num_parts): m = n // num_parts parts = [m] * num_parts num_extra = n % num_parts for k in range(num_extra): parts[k] += 1 return parts object_pixels = self.object_pixels_in_frame( object_id=object_id, hide_all=hide_all, hide_transparent=hide_transparent ) # Divide the current frame into a grid and count the number # of hand object pixels in each of the grid squares sums_in_blocks: List[List] = [] frame_shape = self.current_frame.shape[:2] row_inds = np.cumsum([0] + partition(frame_shape[0], grid_shape[0])) col_inds = np.cumsum([0] + partition(frame_shape[1], grid_shape[1])) for i in range(len(row_inds) - 1): sums_in_blocks.append([]) for j in range(len(col_inds) - 1): sums_in_blocks[i].append( np.sum( object_pixels[ row_inds[i] : row_inds[i + 1], col_inds[j] : col_inds[j + 1] ] ) ) return np.array(sums_in_blocks, dtype=np.float32) def object_in_hand(self): """Object metadata for the object in the agent's hand.""" inv_objs = self.last_event.metadata["inventoryObjects"] if len(inv_objs) == 0: return None elif len(inv_objs) == 1: return self.get_object_by_id( self.last_event.metadata["inventoryObjects"][0]["objectId"] ) else: raise AttributeError("Must be <= 1 inventory objects.") @property def initially_reachable_points(self) -> List[Dict[str, float]]: """List of {"x": x, "y": y, "z": z} locations in the scene that were reachable after initially resetting.""" assert self._initially_reachable_points is not None return copy.deepcopy(self._initially_reachable_points) # type:ignore @property def initially_reachable_points_set(self) -> Set[Tuple[float, float]]: """Set of (x,z) locations in the scene that were reachable after initially resetting.""" if self._initially_reachable_points_set is None: self._initially_reachable_points_set = set() for p in self.initially_reachable_points: self._initially_reachable_points_set.add( self._agent_location_to_tuple(p) ) return self._initially_reachable_points_set @property def currently_reachable_points(self) -> List[Dict[str, float]]: """List of {"x": x, "y": y, "z": z} locations in the scene that are currently reachable.""" self.step({"action": "GetReachablePositions"}) return self.last_event.metadata["actionReturn"] # type:ignore def get_agent_location(self) -> Dict[str, Union[float, bool]]: """Gets agent's location.""" metadata = self.controller.last_event.metadata location = { "x": metadata["agent"]["position"]["x"], "y": metadata["agent"]["position"]["y"], "z": metadata["agent"]["position"]["z"], "rotation": metadata["agent"]["rotation"]["y"], "horizon": metadata["agent"]["cameraHorizon"], "standing": metadata.get("isStanding", metadata["agent"].get("isStanding")), } return location @staticmethod def _agent_location_to_tuple(p: Dict[str, float]) -> Tuple[float, float]: return round(p["x"], 2), round(p["z"], 2) def _snap_agent_to_initially_reachable(self, verbose=True): agent_location = self.get_agent_location() end_location_tuple = self._agent_location_to_tuple(agent_location) if end_location_tuple in self.initially_reachable_points_set: return agent_x = agent_location["x"] agent_z = agent_location["z"] closest_reachable_points = list(self.initially_reachable_points_set) closest_reachable_points = sorted( closest_reachable_points, key=lambda xz: abs(xz[0] - agent_x) + abs(xz[1] - agent_z), ) # In rare cases end_location_tuple might be not considered to be in self.initially_reachable_points_set # even when it is, here we check for such cases. if ( math.sqrt( ( ( np.array(closest_reachable_points[0]) - np.array(end_location_tuple) ) ** 2 ).sum() ) < 1e-6 ): return saved_last_action = self.last_action saved_last_action_success = self.last_action_success saved_last_action_return = self.last_action_return saved_error_message = self.last_event.metadata["errorMessage"] # Thor behaves weirdly when the agent gets off of the grid and you # try to teleport the agent back to the closest grid location. To # get around this we first teleport the agent to random location # and then back to where it should be. for point in self.initially_reachable_points: if abs(agent_x - point["x"]) > 0.1 or abs(agent_z - point["z"]) > 0.1: self.teleport_agent_to( rotation=0, horizon=30, **point, only_initially_reachable=False, verbose=False, ) if self.last_action_success: break for p in closest_reachable_points: self.teleport_agent_to( **{**agent_location, "x": p[0], "z": p[1]}, only_initially_reachable=False, verbose=False, ) if self.last_action_success: break teleport_forced = False if not self.last_action_success: self.teleport_agent_to( **{ **agent_location, "x": closest_reachable_points[0][0], "z": closest_reachable_points[0][1], }, force_action=True, only_initially_reachable=False, verbose=False, ) teleport_forced = True self.last_action = saved_last_action self.last_action_success = saved_last_action_success self.last_action_return = saved_last_action_return self.last_event.metadata["errorMessage"] = saved_error_message new_agent_location = self.get_agent_location() if verbose: get_logger().warning( ( "In {}, at location (x,z)=({},{}) which is not in the set " "of initially reachable points;" " attempting to correct this: agent teleported to (x,z)=({},{}).\n" "Teleportation {} forced." ).format( self.scene_name, agent_x, agent_z, new_agent_location["x"], new_agent_location["z"], "was" if teleport_forced else "wasn't", ) ) def step( self, action_dict: Optional[Dict[str, Union[str, int, float, Dict]]] = None, **kwargs: Union[str, int, float, Dict], ) -> ai2thor.server.Event: """Take a step in the ai2thor environment.""" if action_dict is None: action_dict = dict() action_dict.update(kwargs) action = cast(str, action_dict["action"]) skip_render = "renderImage" in action_dict and not action_dict["renderImage"] last_frame: Optional[np.ndarray] = None if skip_render: last_frame = self.current_frame if self.simplify_physics: action_dict["simplifyPhysics"] = True if "Move" in action and "Hand" not in action: # type: ignore action_dict = { **action_dict, "moveMagnitude": self._move_mag, } # type: ignore start_location = self.get_agent_location() sr = self.controller.step(action_dict) if self.restrict_to_initially_reachable_points: end_location_tuple = self._agent_location_to_tuple( self.get_agent_location() ) if end_location_tuple not in self.initially_reachable_points_set: self.teleport_agent_to(**start_location, force_action=True) # type: ignore self.last_action = action self.last_action_success = False self.last_event.metadata["errorMessage"] = ( "Moved to location outside of initially reachable points." ) elif "RandomizeHideSeekObjects" in action: last_position = self.get_agent_location() self.controller.step(action_dict) metadata = self.last_event.metadata if self.position_dist(last_position, self.get_agent_location()) > 0.001: self.teleport_agent_to(**last_position, force_action=True) # type: ignore get_logger().warning( "In scene {}, after randomization of hide and seek objects, agent moved.".format( self.scene_name ) ) sr = self.controller.step({"action": "GetReachablePositions"}) self._initially_reachable_points = self.controller.last_event.metadata[ "actionReturn" ] self._initially_reachable_points_set = None self.last_action = action self.last_action_success = metadata["lastActionSuccess"] self.controller.last_event.metadata["actionReturn"] = [] elif "RotateUniverse" in action: sr = self.controller.step(action_dict) metadata = self.last_event.metadata if metadata["lastActionSuccess"]: sr = self.controller.step({"action": "GetReachablePositions"}) self._initially_reachable_points = self.controller.last_event.metadata[ "actionReturn" ] self._initially_reachable_points_set = None self.last_action = action self.last_action_success = metadata["lastActionSuccess"] self.controller.last_event.metadata["actionReturn"] = [] else: sr = self.controller.step(action_dict) if self.restrict_to_initially_reachable_points: self._snap_agent_to_initially_reachable() if skip_render: assert last_frame is not None self.last_event.frame = last_frame return sr @staticmethod def position_dist( p0: Mapping[str, Any], p1: Mapping[str, Any], ignore_y: bool = False, l1_dist: bool = False, ) -> float: """Distance between two points of the form {"x": x, "y":y, "z":z"}.""" if l1_dist: return ( abs(p0["x"] - p1["x"]) + (0 if ignore_y else abs(p0["y"] - p1["y"])) + abs(p0["z"] - p1["z"]) ) else: return math.sqrt( (p0["x"] - p1["x"]) ** 2 + (0 if ignore_y else (p0["y"] - p1["y"]) ** 2) + (p0["z"] - p1["z"]) ** 2 ) @staticmethod def rotation_dist(a: Dict[str, float], b: Dict[str, float]): """Distance between rotations.""" def deg_dist(d0: float, d1: float): dist = (d0 - d1) % 360 return min(dist, 360 - dist) return sum(deg_dist(a[k], b[k]) for k in ["x", "y", "z"]) @staticmethod def angle_between_rotations(a: Dict[str, float], b: Dict[str, float]): return np.abs( (180 / (2 * math.pi)) * ( Rotation.from_euler("xyz", [a[k] for k in "xyz"], degrees=True) * Rotation.from_euler("xyz", [b[k] for k in "xyz"], degrees=True).inv() ).as_rotvec() ).sum() def closest_object_with_properties( self, properties: Dict[str, Any] ) -> Optional[Dict[str, Any]]: """Find the object closest to the agent that has the given properties.""" agent_pos = self.controller.last_event.metadata["agent"]["position"] min_dist = float("inf") closest = None for o in self.all_objects(): satisfies_all = True for k, v in properties.items(): if o[k] != v: satisfies_all = False break if satisfies_all: d = self.position_dist(agent_pos, o["position"]) if d < min_dist: min_dist = d closest = o return closest def closest_visible_object_of_type( self, object_type: str ) -> Optional[Dict[str, Any]]: """Find the object closest to the agent that is visible and has the given type.""" properties = {"visible": True, "objectType": object_type} return self.closest_object_with_properties(properties) def closest_object_of_type(self, object_type: str) -> Optional[Dict[str, Any]]: """Find the object closest to the agent that has the given type.""" properties = {"objectType": object_type} return self.closest_object_with_properties(properties) def closest_reachable_point_to_position( self, position: Dict[str, float] ) -> Tuple[Dict[str, float], float]: """Of all reachable positions, find the one that is closest to the given location.""" target = np.array([position["x"], position["z"]]) min_dist = float("inf") closest_point = None for pt in self.initially_reachable_points: dist = np.linalg.norm(target - np.array([pt["x"], pt["z"]])) if dist < min_dist: closest_point = pt min_dist = dist if min_dist < 1e-3: break assert closest_point is not None return closest_point, min_dist @staticmethod def _angle_from_to(a_from: float, a_to: float) -> float: a_from = a_from % 360 a_to = a_to % 360 min_rot = min(a_from, a_to) max_rot = max(a_from, a_to) rot_across_0 = (360 - max_rot) + min_rot rot_not_across_0 = max_rot - min_rot rot_err = min(rot_across_0, rot_not_across_0) if rot_across_0 == rot_err: rot_err *= -1 if a_to > a_from else 1 else: rot_err *= 1 if a_to > a_from else -1 return rot_err def agent_xz_to_scene_xz(self, agent_xz: Dict[str, float]) -> Dict[str, float]: agent_pos = self.get_agent_location() x_rel_agent = agent_xz["x"] z_rel_agent = agent_xz["z"] scene_x = agent_pos["x"] scene_z = agent_pos["z"] rotation = agent_pos["rotation"] if abs(rotation) < 1e-5: scene_x += x_rel_agent scene_z += z_rel_agent elif abs(rotation - 90) < 1e-5: scene_x += z_rel_agent scene_z += -x_rel_agent elif abs(rotation - 180) < 1e-5: scene_x += -x_rel_agent scene_z += -z_rel_agent elif abs(rotation - 270) < 1e-5: scene_x += -z_rel_agent scene_z += x_rel_agent else: raise Exception("Rotation must be one of 0, 90, 180, or 270.") return {"x": scene_x, "z": scene_z} def scene_xz_to_agent_xz(self, scene_xz: Dict[str, float]) -> Dict[str, float]: agent_pos = self.get_agent_location() x_err = scene_xz["x"] - agent_pos["x"] z_err = scene_xz["z"] - agent_pos["z"] rotation = agent_pos["rotation"] if abs(rotation) < 1e-5: agent_x = x_err agent_z = z_err elif abs(rotation - 90) < 1e-5: agent_x = -z_err agent_z = x_err elif abs(rotation - 180) < 1e-5: agent_x = -x_err agent_z = -z_err elif abs(rotation - 270) < 1e-5: agent_x = z_err agent_z = -x_err else: raise Exception("Rotation must be one of 0, 90, 180, or 270.") return {"x": agent_x, "z": agent_z} def all_objects(self) -> List[Dict[str, Any]]: """Return all object metadata.""" return self.controller.last_event.metadata["objects"] def all_objects_with_properties( self, properties: Dict[str, Any] ) -> List[Dict[str, Any]]: """Find all objects with the given properties.""" objects = [] for o in self.all_objects(): satisfies_all = True for k, v in properties.items(): if o[k] != v: satisfies_all = False break if satisfies_all: objects.append(o) return objects def visible_objects(self) -> List[Dict[str, Any]]: """Return all visible objects.""" return self.all_objects_with_properties({"visible": True}) def get_object_by_id(self, object_id: str) -> Optional[Dict[str, Any]]: for o in self.last_event.metadata["objects"]: if o["objectId"] == object_id: return o return None ### # Following is used for computing shortest paths between states ### _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {} GRAPH_ACTIONS_SET = {"LookUp", "LookDown", "RotateLeft", "RotateRight", "MoveAhead"} def reachable_points_with_rotations_and_horizons(self): self.controller.step({"action": "GetReachablePositions"}) assert self.last_action_success points_slim = self.last_event.metadata["actionReturn"] points = [] for r in [0, 90, 180, 270]: for horizon in [-30, 0, 30, 60]: for p in points_slim: p = copy.copy(p) p["rotation"] = r p["horizon"] = horizon points.append(p) return points @staticmethod def location_for_key(key, y_value=0.0): x, z, rot, hor = key loc = dict(x=x, y=y_value, z=z, rotation=rot, horizon=hor) return loc @staticmethod def get_key(input_dict: Dict[str, Any]) -> Tuple[float, float, int, int]: if "x" in input_dict: x = input_dict["x"] z = input_dict["z"] rot = input_dict["rotation"] hor = input_dict["horizon"] else: x = input_dict["position"]["x"] z = input_dict["position"]["z"] rot = input_dict["rotation"]["y"] hor = input_dict["cameraHorizon"] return ( round(x, 2), round(z, 2), round_to_factor(rot, 90) % 360, round_to_factor(hor, 30) % 360, ) def update_graph_with_failed_action(self, failed_action: str): if ( self.scene_name not in self._CACHED_GRAPHS or failed_action not in self.GRAPH_ACTIONS_SET ): return source_key = self.get_key(self.last_event.metadata["agent"]) self._check_contains_key(source_key) edge_dict = self.graph[source_key] to_remove_key = None for target_key in self.graph[source_key]: if edge_dict[target_key]["action"] == failed_action: to_remove_key = target_key break if to_remove_key is not None: self.graph.remove_edge(source_key, to_remove_key) def _add_from_to_edge( self, g: nx.DiGraph, s: Tuple[float, float, int, int], t: Tuple[float, float, int, int], ): def ae(x, y): return abs(x - y) < 0.001 s_x, s_z, s_rot, s_hor = s t_x, t_z, t_rot, t_hor = t dist = round(math.sqrt((s_x - t_x) ** 2 + (s_z - t_z) ** 2), 2) angle_dist = (round_to_factor(t_rot - s_rot, 90) % 360) // 90 horz_dist = (round_to_factor(t_hor - s_hor, 30) % 360) // 30 # If source and target differ by more than one action, continue if sum(x != 0 for x in [dist, angle_dist, horz_dist]) != 1: return grid_size = self._grid_size action = None if angle_dist != 0: if angle_dist == 1: action = "RotateRight" elif angle_dist == 3: action = "RotateLeft" elif horz_dist != 0: if horz_dist == 11: action = "LookUp" elif horz_dist == 1: action = "LookDown" elif ae(dist, grid_size): if ( (s_rot == 0 and ae(t_z - s_z, grid_size)) or (s_rot == 90 and ae(t_x - s_x, grid_size)) or (s_rot == 180 and ae(t_z - s_z, -grid_size)) or (s_rot == 270 and ae(t_x - s_x, -grid_size)) ): g.add_edge(s, t, action="MoveAhead") if action is not None: g.add_edge(s, t, action=action) @functools.lru_cache(1) def possible_neighbor_offsets(self) -> Tuple[Tuple[float, float, int, int], ...]: grid_size = round(self._grid_size, 2) offsets = [] for rot_diff in [-90, 0, 90]: for horz_diff in [-30, 0, 30, 60]: for x_diff in [-grid_size, 0, grid_size]: for z_diff in [-grid_size, 0, grid_size]: if (rot_diff != 0) + (horz_diff != 0) + (x_diff != 0) + ( z_diff != 0 ) == 1: offsets.append((x_diff, z_diff, rot_diff, horz_diff)) return tuple(offsets) def _add_node_to_graph(self, graph: nx.DiGraph, s: Tuple[float, float, int, int]): if s in graph: return existing_nodes = set(graph.nodes()) graph.add_node(s) for o in self.possible_neighbor_offsets(): t = (s[0] + o[0], s[1] + o[1], s[2] + o[2], s[3] + o[3]) if t in existing_nodes: self._add_from_to_edge(graph, s, t) self._add_from_to_edge(graph, t, s) @property def graph(self): if self.scene_name not in self._CACHED_GRAPHS: g = nx.DiGraph() points = self.reachable_points_with_rotations_and_horizons() for p in points: self._add_node_to_graph(g, self.get_key(p)) self._CACHED_GRAPHS[self.scene_name] = g return self._CACHED_GRAPHS[self.scene_name] @graph.setter def graph(self, g): self._CACHED_GRAPHS[self.scene_name] = g def _check_contains_key(self, key: Tuple[float, float, int, int], add_if_not=True): if key not in self.graph: get_logger().warning( "{} was not in the graph for scene {}.".format(key, self.scene_name) ) if add_if_not: self._add_node_to_graph(self.graph, key) def shortest_state_path(self, source_state_key, goal_state_key): self._check_contains_key(source_state_key) self._check_contains_key(goal_state_key) # noinspection PyBroadException try: path = nx.shortest_path(self.graph, source_state_key, goal_state_key) return path except Exception as _: return None def action_transitioning_between_keys(self, s, t): self._check_contains_key(s) self._check_contains_key(t) if self.graph.has_edge(s, t): return self.graph.get_edge_data(s, t)["action"] else: return None def shortest_path_next_state(self, source_state_key, goal_state_key): self._check_contains_key(source_state_key) self._check_contains_key(goal_state_key) if source_state_key == goal_state_key: raise RuntimeError("called next state on the same source and goal state") state_path = self.shortest_state_path(source_state_key, goal_state_key) return state_path[1] def shortest_path_next_action(self, source_state_key, goal_state_key): self._check_contains_key(source_state_key) self._check_contains_key(goal_state_key) next_state_key = self.shortest_path_next_state(source_state_key, goal_state_key) return self.graph.get_edge_data(source_state_key, next_state_key)["action"] def shortest_path_length(self, source_state_key, goal_state_key): self._check_contains_key(source_state_key) self._check_contains_key(goal_state_key) try: return nx.shortest_path_length(self.graph, source_state_key, goal_state_key) except nx.NetworkXNoPath as _: return float("inf") ================================================ FILE: allenact_plugins/ithor_plugin/ithor_sensors.py ================================================ import copy from functools import reduce from typing import Any, Dict, Optional, Union, Sequence import ai2thor.controller import gym import gym.spaces import numpy as np import torch from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task from allenact.embodiedai.mapping.mapping_utils.map_builders import ( BinnedPointCloudMapBuilder, SemanticMapBuilder, ObjectHull2d, ) from allenact.embodiedai.sensors.vision_sensors import RGBSensor from allenact.utils.misc_utils import prepare_locals_for_super from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask from allenact_plugins.ithor_plugin.ithor_util import include_object_data from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask, ObjectNavTask THOR_ENV_TYPE = Union[ ai2thor.controller.Controller, IThorEnvironment, RoboThorEnvironment ] THOR_TASK_TYPE = Union[ Task[ai2thor.controller.Controller], Task[IThorEnvironment], Task[RoboThorEnvironment], ] class RGBSensorThor(RGBSensor[THOR_ENV_TYPE, THOR_TASK_TYPE]): """Sensor for RGB images in THOR. Returns from a running IThorEnvironment instance, the current RGB frame corresponding to the agent's egocentric view. """ def frame_from_env( self, env: THOR_ENV_TYPE, task: Optional[THOR_TASK_TYPE], ) -> np.ndarray: # type:ignore if isinstance(env, ai2thor.controller.Controller): return env.last_event.frame.copy() else: return env.current_frame.copy() class GoalObjectTypeThorSensor(Sensor): def __init__( self, object_types: Sequence[str], target_to_detector_map: Optional[Dict[str, str]] = None, detector_types: Optional[Sequence[str]] = None, uuid: str = "goal_object_type_ind", **kwargs: Any, ): self.ordered_object_types = list(object_types) assert self.ordered_object_types == sorted( self.ordered_object_types ), "object types input to goal object type sensor must be ordered" self.target_to_detector_map = target_to_detector_map if target_to_detector_map is None: self.object_type_to_ind = { ot: i for i, ot in enumerate(self.ordered_object_types) } else: assert ( detector_types is not None ), "Missing detector_types for map {}".format(target_to_detector_map) self.target_to_detector = target_to_detector_map self.detector_types = detector_types detector_index = {ot: i for i, ot in enumerate(self.detector_types)} self.object_type_to_ind = { ot: detector_index[self.target_to_detector[ot]] for ot in self.ordered_object_types } observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self): if self.target_to_detector_map is None: return gym.spaces.Discrete(len(self.ordered_object_types)) else: return gym.spaces.Discrete(len(self.detector_types)) def get_observation( self, env: IThorEnvironment, task: Optional[ObjectNaviThorGridTask], *args: Any, **kwargs: Any, ) -> Any: return self.object_type_to_ind[task.task_info["object_type"]] class TakeEndActionThorNavSensor( Sensor[ Union[RoboThorEnvironment, IThorEnvironment], Union[ObjectNaviThorGridTask, ObjectNavTask, PointNavTask], ] ): def __init__(self, nactions: int, uuid: str, **kwargs: Any) -> None: self.nactions = nactions observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self) -> gym.spaces.Discrete: """The observation space. Equals `gym.spaces.Discrete(2)` where a 0 indicates that the agent **should not** take the `End` action and a 1 indicates that the agent **should** take the end action. """ return gym.spaces.Discrete(2) def get_observation( # type:ignore self, env: IThorEnvironment, task: Union[ObjectNaviThorGridTask, ObjectNavTask, PointNavTask], *args, **kwargs, ) -> np.ndarray: if isinstance(task, ObjectNaviThorGridTask): should_end = task.is_goal_object_visible() elif isinstance(task, ObjectNavTask): should_end = task._is_goal_in_range() elif isinstance(task, PointNavTask): should_end = task._is_goal_in_range() else: raise NotImplementedError if should_end is None: should_end = False return np.array([1 * should_end], dtype=np.int64) class RelativePositionChangeTHORSensor( Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]] ): def __init__(self, uuid: str = "rel_position_change", **kwargs: Any): observation_space = gym.spaces.Dict( { "last_allocentric_position": gym.spaces.Box( low=np.array([-np.inf, -np.inf, 0], dtype=np.float32), high=np.array([np.inf, np.inf, 360], dtype=np.float32), shape=(3,), dtype=np.float32, ), "dx_dz_dr": gym.spaces.Box( low=np.array([-np.inf, -np.inf, -360], dtype=np.float32), high=np.array([-np.inf, -np.inf, 360], dtype=np.float32), shape=(3,), dtype=np.float32, ), } ) super().__init__(**prepare_locals_for_super(locals())) self.last_xzr: Optional[np.ndarray] = None @staticmethod def get_relative_position_change(from_xzr: np.ndarray, to_xzr: np.ndarray): dx_dz_dr = to_xzr - from_xzr # Transform dx, dz (in global coordinates) into the relative coordinates # given by rotation r0=from_xzr[-2]. This requires rotating everything so that # r0 is facing in the positive z direction. Since thor rotations are negative # the usual rotation direction this means we want to rotate by r0 degrees. theta = np.pi * from_xzr[-1] / 180 cos_theta = np.cos(theta) sin_theta = np.sin(theta) dx_dz_dr = ( np.array( [ [cos_theta, -sin_theta, 0], [sin_theta, cos_theta, 0], [0, 0, 1], # Don't change dr ] ) @ dx_dz_dr.reshape(-1, 1) ).reshape(-1) dx_dz_dr[-1] = dx_dz_dr[-1] % 360 return dx_dz_dr def get_observation( self, env: RoboThorEnvironment, task: Optional[Task[RoboThorEnvironment]], *args: Any, **kwargs: Any, ) -> Any: if task.num_steps_taken() == 0: p = env.controller.last_event.metadata["agent"]["position"] r = env.controller.last_event.metadata["agent"]["rotation"]["y"] self.last_xzr = np.array([p["x"], p["z"], r % 360]) p = env.controller.last_event.metadata["agent"]["position"] r = env.controller.last_event.metadata["agent"]["rotation"]["y"] current_xzr = np.array([p["x"], p["z"], r % 360]) dx_dz_dr = self.get_relative_position_change( from_xzr=self.last_xzr, to_xzr=current_xzr ) to_return = {"last_allocentric_position": self.last_xzr, "dx_dz_dr": dx_dz_dr} self.last_xzr = current_xzr return to_return class ReachableBoundsTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]): def __init__(self, margin: float, uuid: str = "scene_bounds", **kwargs: Any): observation_space = gym.spaces.Dict( { "x_range": gym.spaces.Box( low=np.array([-np.inf, -np.inf], dtype=np.float32), high=np.array([np.inf, np.inf], dtype=np.float32), shape=(2,), dtype=np.float32, ), "z_range": gym.spaces.Box( low=np.array([-np.inf, -np.inf], dtype=np.float32), high=np.array([np.inf, np.inf], dtype=np.float32), shape=(2,), dtype=np.float32, ), } ) super().__init__(**prepare_locals_for_super(locals())) self.margin = margin self._bounds_cache = {} @staticmethod def get_bounds( controller: ai2thor.controller.Controller, margin: float, ) -> Dict[str, np.ndarray]: positions = controller.step("GetReachablePositions").metadata["actionReturn"] min_x = min(p["x"] for p in positions) max_x = max(p["x"] for p in positions) min_z = min(p["z"] for p in positions) max_z = max(p["z"] for p in positions) return { "x_range": np.array([min_x - margin, max_x + margin]), "z_range": np.array([min_z - margin, max_z + margin]), } def get_observation( self, env: RoboThorEnvironment, task: Optional[Task[RoboThorEnvironment]], *args: Any, **kwargs: Any, ) -> Any: if isinstance(env, ai2thor.controller.Controller): controller = env else: controller = env.controller scene_name = controller.last_event.metadata["sceneName"] if scene_name not in self._bounds_cache: self._bounds_cache[scene_name] = self.get_bounds( controller=controller, margin=self.margin ) return copy.deepcopy(self._bounds_cache[scene_name]) class SceneBoundsTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]): def __init__(self, uuid: str = "scene_bounds", **kwargs: Any): observation_space = gym.spaces.Dict( { "x_range": gym.spaces.Box( low=np.array([-np.inf, -np.inf]), high=np.array([np.inf, np.inf]), shape=(2,), dtype=np.float32, ), "z_range": gym.spaces.Box( low=np.array([-np.inf, -np.inf]), high=np.array([np.inf, np.inf]), shape=(2,), dtype=np.float32, ), } ) super().__init__(**prepare_locals_for_super(locals())) def get_observation( self, env: RoboThorEnvironment, task: Optional[Task[RoboThorEnvironment]], *args: Any, **kwargs: Any, ) -> Any: scene_bounds = env.controller.last_event.metadata["sceneBounds"] center = scene_bounds["center"] size = scene_bounds["size"] return { "x_range": np.array( [center["x"] - size["x"] / 2, center["x"] + size["x"] / 2] ), "z_range": np.array( [center["z"] - size["z"] / 2, center["z"] + size["z"] / 2] ), } class BinnedPointCloudMapTHORSensor( Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]] ): observation_space = gym.spaces.Dict def __init__( self, fov: Optional[float], vision_range_in_cm: int, map_size_in_cm: int, resolution_in_cm: int, map_range_sensor: Sensor, return_egocentric_local_context: bool = False, height_bins: Sequence[float] = (0.02, 2), ego_only: bool = True, exclude_agent: bool = False, uuid: str = "binned_pc_map", device: torch.device = torch.device("cpu"), **kwargs: Any, ): self.fov = fov self.vision_range_in_cm = vision_range_in_cm self.map_size_in_cm = map_size_in_cm self.resolution_in_cm = resolution_in_cm self.height_bins = height_bins self.ego_only = ego_only self.return_egocentric_local_context = return_egocentric_local_context self.exclude_agent = exclude_agent self.binned_pc_map_builder = BinnedPointCloudMapBuilder( fov=fov, vision_range_in_cm=vision_range_in_cm, map_size_in_cm=map_size_in_cm, resolution_in_cm=resolution_in_cm, height_bins=height_bins, return_egocentric_local_context=return_egocentric_local_context, ) self.device = device big_map_space = gym.spaces.Box( low=0, high=np.inf, shape=self.binned_pc_map_builder.binned_point_cloud_map.shape, dtype=np.float32, ) local_map_space = gym.spaces.Box( low=0, high=np.inf, shape=(self.binned_pc_map_builder.vision_range_in_map_units,) * 2 + self.binned_pc_map_builder.binned_point_cloud_map.shape[-1:], dtype=np.float32, ) space_dict = { "egocentric_update": local_map_space, } if self.return_egocentric_local_context: space_dict = { "egocentric_local_context": copy.deepcopy(local_map_space), } if not ego_only: space_dict["allocentric_update"] = copy.deepcopy(big_map_space) space_dict["map"] = copy.deepcopy(big_map_space) observation_space = gym.spaces.Dict(space_dict) super().__init__(**prepare_locals_for_super(locals())) self.map_range_sensor = map_range_sensor @property def device(self): return self.binned_pc_map_builder.device @device.setter def device(self, val: torch.device): self.binned_pc_map_builder.device = torch.device(val) def get_observation( self, env: RoboThorEnvironment, task: Optional[Task[RoboThorEnvironment]], *args: Any, **kwargs: Any, ) -> Any: if isinstance(env, ai2thor.controller.Controller): controller = env else: controller = env.controller e = controller.last_event metadata = e.metadata if task.num_steps_taken() == 0: xz_ranges_dict = self.map_range_sensor.get_observation(env=env, task=task) if self.fov is None: self.binned_pc_map_builder.fov = e.metadata["fov"] self.binned_pc_map_builder.reset( min_xyz=np.array( [ xz_ranges_dict["x_range"][0], 0, # TODO: Should y be different per scene? xz_ranges_dict["z_range"][0], ] ) ) depth_frame = e.depth_frame if self.exclude_agent: depth_frame = depth_frame.copy() assert len(e.instance_masks) > 0 depth_frame[~reduce(np.logical_or, e.instance_masks.values())] = np.nan map_dict = self.binned_pc_map_builder.update( depth_frame=depth_frame, camera_xyz=np.array( [metadata["cameraPosition"][k] for k in ["x", "y", "z"]] ), camera_rotation=metadata["agent"]["rotation"]["y"], camera_horizon=metadata["agent"]["cameraHorizon"], ) return {k: map_dict[k] for k in self.observation_space.spaces.keys()} class SemanticMapTHORSensor(Sensor[RoboThorEnvironment, Task[RoboThorEnvironment]]): observation_space = gym.spaces.Dict def __init__( self, fov: float, vision_range_in_cm: int, map_size_in_cm: int, resolution_in_cm: int, ordered_object_types: Sequence[str], map_range_sensor: Sensor, ego_only: bool = True, uuid: str = "semantic_map", device: torch.device = torch.device("cpu"), **kwargs: Any, ): self.fov = fov self.vision_range_in_cm = vision_range_in_cm self.map_size_in_cm = map_size_in_cm self.resolution_in_cm = resolution_in_cm self.ordered_object_types = ordered_object_types self.map_range_sensor = map_range_sensor self.ego_only = ego_only self.semantic_map_builder = SemanticMapBuilder( fov=fov, vision_range_in_cm=vision_range_in_cm, map_size_in_cm=map_size_in_cm, resolution_in_cm=resolution_in_cm, ordered_object_types=ordered_object_types, device=device, ) def get_map_space(nchannels: int, size: int): return gym.spaces.Box( low=0, high=1, shape=(size, size, nchannels), dtype=np.bool_, ) n = len(self.ordered_object_types) small = self.vision_range_in_cm // self.resolution_in_cm big = self.semantic_map_builder.ground_truth_semantic_map.shape[0] space_dict = { "egocentric_update": get_map_space( nchannels=n, size=small, ), "egocentric_mask": get_map_space( nchannels=1, size=small, ), } if not ego_only: space_dict["explored_mask"] = get_map_space( nchannels=1, size=big, ) space_dict["map"] = get_map_space( nchannels=n, size=big, ) observation_space = gym.spaces.Dict(space_dict) super().__init__(**prepare_locals_for_super(locals())) @property def device(self): return self.semantic_map_builder.device @device.setter def device(self, val: torch.device): self.semantic_map_builder.device = torch.device(val) def get_observation( self, env: RoboThorEnvironment, task: Optional[Task[RoboThorEnvironment]], *args: Any, **kwargs: Any, ) -> Any: with include_object_data(env.controller): last_event = env.controller.last_event metadata = last_event.metadata if task.num_steps_taken() == 0: env.controller.step( "Get2DSemanticHulls", objectTypes=self.ordered_object_types ) assert env.last_event.metadata[ "lastActionSuccess" ], f"Get2DSemanticHulls failed with error '{env.last_event.metadata['lastActionSuccess']}'" object_id_to_hull = env.controller.last_event.metadata["actionReturn"] xz_ranges_dict = self.map_range_sensor.get_observation( env=env, task=task ) self.semantic_map_builder.reset( min_xyz=np.array( [ xz_ranges_dict["x_range"][0], 0, # TODO: Should y be different per scene? xz_ranges_dict["z_range"][0], ] ), object_hulls=[ ObjectHull2d( object_id=o["objectId"], object_type=o["objectType"], hull_points=object_id_to_hull[o["objectId"]], ) for o in metadata["objects"] if o["objectId"] in object_id_to_hull ], ) map_dict = self.semantic_map_builder.update( depth_frame=last_event.depth_frame, camera_xyz=np.array( [metadata["cameraPosition"][k] for k in ["x", "y", "z"]] ), camera_rotation=metadata["agent"]["rotation"]["y"], camera_horizon=metadata["agent"]["cameraHorizon"], ) return { k: map_dict[k] > 0.001 if map_dict[k].dtype != np.bool_ else map_dict[k] for k in self.observation_space.spaces.keys() } ================================================ FILE: allenact_plugins/ithor_plugin/ithor_task_samplers.py ================================================ import copy import random from typing import List, Dict, Optional, Any, Union, cast import gym from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import set_deterministic_cudnn, set_seed from allenact.utils.system import get_logger from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask class ObjectNavTaskSampler(TaskSampler): def __init__( self, scenes: List[str], object_types: str, sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, scene_period: Optional[Union[int, str]] = None, max_tasks: Optional[int] = None, seed: Optional[int] = None, deterministic_cudnn: bool = False, **kwargs, ) -> None: self.env_args = env_args self.scenes = scenes self.object_types = object_types self.grid_size = 0.25 self.env: Optional[IThorEnvironment] = None self.sensors = sensors self.max_steps = max_steps self._action_space = action_space self.scene_counter: Optional[int] = None self.scene_order: Optional[List[str]] = None self.scene_id: Optional[int] = None self.scene_period: Optional[Union[str, int]] = ( scene_period # default makes a random choice ) self.max_tasks: Optional[int] = None self.reset_tasks = max_tasks self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None self.seed: Optional[int] = None self.set_seed(seed) if deterministic_cudnn: set_deterministic_cudnn() self.reset() def _create_environment(self) -> IThorEnvironment: env = IThorEnvironment( make_agents_visible=False, object_open_speed=0.05, restrict_to_initially_reachable_points=True, **self.env_args, ) return env @property def length(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks @property def total_unique(self) -> Optional[Union[int, float]]: return None @property def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]: return self._last_sampled_task def close(self) -> None: if self.env is not None: self.env.stop() @property def all_observation_spaces_equal(self) -> bool: """Check if observation spaces equal. # Returns True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ return True def sample_scene(self, force_advance_scene: bool): if force_advance_scene: if self.scene_period != "manual": get_logger().warning( "When sampling scene, have `force_advance_scene == True`" "but `self.scene_period` is not equal to 'manual'," "this may cause unexpected behavior." ) self.scene_id = (1 + self.scene_id) % len(self.scenes) if self.scene_id == 0: random.shuffle(self.scene_order) if self.scene_period is None: # Random scene self.scene_id = random.randint(0, len(self.scenes) - 1) elif self.scene_period == "manual": pass elif self.scene_counter >= cast(int, self.scene_period): if self.scene_id == len(self.scene_order) - 1: # Randomize scene order for next iteration random.shuffle(self.scene_order) # Move to next scene self.scene_id = 0 else: # Move to next scene self.scene_id += 1 # Reset scene counter self.scene_counter = 1 elif isinstance(self.scene_period, int): # Stay in current scene self.scene_counter += 1 else: raise NotImplementedError( "Invalid scene_period {}".format(self.scene_period) ) if self.max_tasks is not None: self.max_tasks -= 1 return self.scenes[int(self.scene_order[self.scene_id])] def next_task( self, force_advance_scene: bool = False ) -> Optional[ObjectNaviThorGridTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None scene = self.sample_scene(force_advance_scene) if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) pose = self.env.randomize_agent_location() object_types_in_scene = set( [o["objectType"] for o in self.env.last_event.metadata["objects"]] ) task_info: Dict[str, Any] = {} for ot in random.sample(self.object_types, len(self.object_types)): if ot in object_types_in_scene: task_info["object_type"] = ot break if len(task_info) == 0: get_logger().warning( "Scene {} does not contain any" " objects of any of the types {}.".format(scene, self.object_types) ) task_info["start_pose"] = copy.copy(pose) task_info["id"] = ( f"{scene}__{'_'.join(list(map(str, self.env.get_key(pose))))}__{task_info['object_type']}" ) self._last_sampled_task = ObjectNaviThorGridTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, ) return self._last_sampled_task def reset(self): self.scene_counter = 0 self.scene_order = list(range(len(self.scenes))) random.shuffle(self.scene_order) self.scene_id = 0 self.max_tasks = self.reset_tasks def set_seed(self, seed: int): self.seed = seed if seed is not None: set_seed(seed) ================================================ FILE: allenact_plugins/ithor_plugin/ithor_tasks.py ================================================ import random from typing import Dict, Tuple, List, Any, Optional, Union, Sequence, cast import gym import numpy as np from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task from allenact.utils.system import get_logger from allenact_plugins.ithor_plugin.ithor_constants import ( MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END, ) from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from allenact_plugins.ithor_plugin.ithor_util import round_to_factor class ObjectNaviThorGridTask(Task[IThorEnvironment]): """Defines the object navigation task in AI2-THOR. In object navigation an agent is randomly initialized into an AI2-THOR scene and must find an object of a given type (e.g. tomato, television, etc). An object is considered found if the agent takes an `End` action and the object is visible to the agent (see [here](https://ai2thor.allenai.org/documentation/concepts) for a definition of visibiliy in AI2-THOR). The actions available to an agent in this task are: 1. Move ahead * Moves agent ahead by 0.25 meters. 1. Rotate left / rotate right * Rotates the agent by 90 degrees counter-clockwise / clockwise. 1. Look down / look up * Changes agent view angle by 30 degrees up or down. An agent cannot look more than 30 degrees above horizontal or less than 60 degrees below horizontal. 1. End * Ends the task and the agent receives a positive reward if the object type is visible to the agent, otherwise it receives a negative reward. # Attributes env : The ai2thor environment. sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer. task_info : The task info. Must contain a field "object_type" that specifies, as a string, the goal object type. max_steps : The maximum number of steps an agent can take an in the task before it is considered failed. observation_space: The observation space returned on each step from the sensors. """ _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END) _CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE: Dict[ Tuple[str, str], List[Tuple[float, float, int, int]] ] = {} def __init__( self, env: IThorEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, **kwargs, ) -> None: """Initializer. See class documentation for parameter definitions. """ super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self._took_end_action: bool = False self._success: Optional[bool] = False self._subsampled_locations_from_which_obj_visible: Optional[ List[Tuple[float, float, int, int]] ] = None self.task_info["followed_path"] = [self.env.get_agent_location()] self.task_info["action_names"] = self.class_action_names() @property def action_space(self): return gym.spaces.Discrete(len(self._actions)) def reached_terminal_state(self) -> bool: return self._took_end_action @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return cls._actions def close(self) -> None: self.env.stop() def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.class_action_names()[action] if action_str == END: self._took_end_action = True self._success = self.is_goal_object_visible() self.last_action_success = self._success else: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success if ( not self.last_action_success ) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None: self.env.update_graph_with_failed_action(failed_action=action_str) self.task_info["followed_path"].append(self.env.get_agent_location()) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: assert mode == "rgb", "only rgb rendering is implemented" return self.env.current_frame def is_goal_object_visible(self) -> bool: """Is the goal object currently visible?""" return any( o["objectType"] == self.task_info["object_type"] for o in self.env.visible_objects() ) def judge(self) -> float: """Compute the reward after having taken a step.""" reward = -0.01 if not self.last_action_success: reward += -0.03 if self._took_end_action: reward += 1.0 if self._success else -1.0 return float(reward) def metrics(self) -> Dict[str, Any]: if not self.is_done(): return {} else: return { "success": self._success, **super(ObjectNaviThorGridTask, self).metrics(), } def query_expert(self, **kwargs) -> Tuple[int, bool]: target = self.task_info["object_type"] if self.is_goal_object_visible(): return self.class_action_names().index(END), True else: key = (self.env.scene_name, target) if self._subsampled_locations_from_which_obj_visible is None: if key not in self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE: obj_ids: List[str] = [] obj_ids.extend( o["objectId"] for o in self.env.last_event.metadata["objects"] if o["objectType"] == target ) assert len(obj_ids) != 0, "No objects to get an expert path to." locations_from_which_object_is_visible: List[ Tuple[float, float, int, int] ] = [] y = self.env.last_event.metadata["agent"]["position"]["y"] positions_to_check_interactionable_from = [ {"x": x, "y": y, "z": z} for x, z in set((x, z) for x, z, _, _ in self.env.graph.nodes) ] for obj_id in set(obj_ids): self.env.controller.step( { "action": "PositionsFromWhichItemIsInteractable", "objectId": obj_id, "positions": positions_to_check_interactionable_from, } ) assert ( self.env.last_action_success ), "Could not get positions from which item was interactable." returned = self.env.last_event.metadata["actionReturn"] locations_from_which_object_is_visible.extend( ( round(x, 2), round(z, 2), round_to_factor(rot, 90) % 360, round_to_factor(hor, 30) % 360, ) for x, z, rot, hor, standing in zip( returned["x"], returned["z"], returned["rotation"], returned["horizon"], returned["standing"], ) if standing == 1 ) self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key] = ( locations_from_which_object_is_visible ) self._subsampled_locations_from_which_obj_visible = ( self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key] ) if len(self._subsampled_locations_from_which_obj_visible) > 5: self._subsampled_locations_from_which_obj_visible = random.sample( self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key], 5 ) current_loc_key = self.env.get_key(self.env.last_event.metadata["agent"]) paths = [] for goal_key in self._subsampled_locations_from_which_obj_visible: path = self.env.shortest_state_path( source_state_key=current_loc_key, goal_state_key=goal_key ) if path is not None: paths.append(path) if len(paths) == 0: return 0, False shortest_path_ind = int(np.argmin([len(p) for p in paths])) if len(paths[shortest_path_ind]) == 1: get_logger().warning( "Shortest path computations suggest we are at the target but episode does not think so." ) return 0, False next_key_on_shortest_path = paths[shortest_path_ind][1] return ( self.class_action_names().index( self.env.action_transitioning_between_keys( current_loc_key, next_key_on_shortest_path ) ), True, ) ================================================ FILE: allenact_plugins/ithor_plugin/ithor_util.py ================================================ import glob import math import os import platform import traceback import warnings from contextlib import contextmanager from typing import Sequence import Xlib import Xlib.display import ai2thor.controller @contextmanager def include_object_data(controller: ai2thor.controller.Controller): needs_reset = len(controller.last_event.metadata["objects"]) == 0 try: if needs_reset: controller.step("ResetObjectFilter") assert controller.last_event.metadata["lastActionSuccess"] yield None finally: if needs_reset: controller.step("SetObjectFilter", objectIds=[]) assert controller.last_event.metadata["lastActionSuccess"] def vertical_to_horizontal_fov( vertical_fov_in_degrees: float, height: float, width: float ): assert 0 < vertical_fov_in_degrees < 180 aspect_ratio = width / height vertical_fov_in_rads = (math.pi / 180) * vertical_fov_in_degrees return ( (180 / math.pi) * math.atan(math.tan(vertical_fov_in_rads * 0.5) * aspect_ratio) * 2 ) def horizontal_to_vertical_fov( horizontal_fov_in_degrees: float, height: float, width: float ): return vertical_to_horizontal_fov( vertical_fov_in_degrees=horizontal_fov_in_degrees, height=width, width=height, ) def round_to_factor(num: float, base: int) -> int: """Rounds floating point number to the nearest integer multiple of the given base. E.g., for floating number 90.1 and integer base 45, the result is 90. # Attributes num : floating point number to be rounded. base: integer base """ return round(num / base) * base def get_open_x_displays(throw_error_if_empty: bool = False) -> Sequence[str]: assert platform.system() == "Linux", "Can only get X-displays for Linux systems." displays = [] open_display_strs = [ os.path.basename(s)[1:] for s in glob.glob("/tmp/.X11-unix/X*") ] for open_display_str in sorted(open_display_strs): try: open_display_str = str(int(open_display_str)) display = Xlib.display.Display(f":{open_display_str}") except Exception: warnings.warn( f"Encountered error when attempting to open display :{open_display_str}," f" error message:\n{traceback.format_exc()}" ) continue displays.extend( [f"{open_display_str}.{i}" for i in range(display.screen_count())] ) if throw_error_if_empty and len(displays) == 0: raise IOError( "Could not find any open X-displays on which to run AI2-THOR processes. " " Please see the AI2-THOR installation instructions at" " https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin" " for information as to how to start such displays." ) return displays ================================================ FILE: allenact_plugins/ithor_plugin/ithor_viz.py ================================================ import copy import json import math import os from typing import Tuple, Sequence, Union, Dict, Optional, Any, cast, Generator, List import colour as col import cv2 import numpy as np from PIL import Image, ImageDraw from ai2thor.controller import Controller from matplotlib import pyplot as plt from matplotlib.figure import Figure from allenact.utils.system import get_logger from allenact.utils.viz_utils import TrajectoryViz ITHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR = os.path.join( os.path.expanduser("~"), ".allenact", "ithor", "top_down_viz_cache" ) class ThorPositionTo2DFrameTranslator(object): def __init__( self, frame_shape_rows_cols: Tuple[int, int], cam_position: Sequence[float], orth_size: float, ): self.frame_shape = frame_shape_rows_cols self.lower_left = np.array((cam_position[0], cam_position[2])) - orth_size self.span = 2 * orth_size def __call__(self, position: Sequence[float]): if len(position) == 3: x, _, z = position else: x, z = position camera_position = (np.array((x, z)) - self.lower_left) / self.span return np.array( ( round(self.frame_shape[0] * (1.0 - camera_position[1])), round(self.frame_shape[1] * camera_position[0]), ), dtype=int, ) class ThorViz(TrajectoryViz): def __init__( self, path_to_trajectory: Sequence[str] = ("task_info", "followed_path"), label: str = "thor_trajectory", figsize: Tuple[float, float] = (8, 8), # width, height fontsize: float = 10, scenes: Union[Tuple[str, int, int], Sequence[Tuple[str, int, int]]] = ( ("FloorPlan{}_physics", 1, 30), ("FloorPlan{}_physics", 201, 230), ("FloorPlan{}_physics", 301, 330), ("FloorPlan{}_physics", 401, 430), ), viz_rows_cols: Tuple[int, int] = (448, 448), single_color: bool = False, view_triangle_only_on_last: bool = True, disable_view_triangle: bool = False, line_opacity: float = 1.0, path_to_rot_degrees: Sequence[str] = ("rotation",), **kwargs, ): super().__init__( path_to_trajectory=path_to_trajectory, label=label, figsize=figsize, fontsize=fontsize, path_to_rot_degrees=path_to_rot_degrees, **kwargs, ) if isinstance(scenes[0], str): scenes = [cast(Tuple[str, int, int], scenes)] # make it list of tuples self.scenes = cast(List[Tuple[str, int, int]], scenes) self.room_path = ITHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR os.makedirs(self.room_path, exist_ok=True) self.viz_rows_cols = viz_rows_cols self.single_color = single_color self.view_triangle_only_on_last = view_triangle_only_on_last self.disable_view_triangle = disable_view_triangle self.line_opacity = line_opacity # Only needed for rendering self.map_data: Optional[Dict[str, Any]] = None self.thor_top_downs: Optional[Dict[str, np.ndarray]] = None self.controller: Optional[Controller] = None def init_top_down_render(self): self.map_data = self.get_translator() self.thor_top_downs = self.make_top_down_views() # No controller needed after this point if self.controller is not None: self.controller.stop() self.controller = None @staticmethod def iterate_scenes( all_scenes: Sequence[Tuple[str, int, int]] ) -> Generator[str, None, None]: for scenes in all_scenes: for wall in range(scenes[1], scenes[2] + 1): roomname = scenes[0].format(wall) yield roomname def cached_map_data_path(self, roomname: str) -> str: return os.path.join(self.room_path, "map_data__{}.json".format(roomname)) def get_translator(self) -> Dict[str, Any]: # roomname = list(ThorViz.iterate_scenes(self.scenes))[0] all_map_data = {} for roomname in ThorViz.iterate_scenes(self.scenes): json_file = self.cached_map_data_path(roomname) if not os.path.exists(json_file): self.make_controller() self.controller.reset(roomname) map_data = self.get_agent_map_data() get_logger().info("Dumping {}".format(json_file)) with open(json_file, "w") as f: json.dump(map_data, f, indent=4, sort_keys=True) else: with open(json_file, "r") as f: map_data = json.load(f) pos_translator = ThorPositionTo2DFrameTranslator( self.viz_rows_cols, self.position_to_tuple(map_data["cam_position"]), map_data["cam_orth_size"], ) map_data["pos_translator"] = pos_translator all_map_data[roomname] = map_data get_logger().debug("Using map_data {}".format(all_map_data)) return all_map_data def cached_image_path(self, roomname: str) -> str: return os.path.join( self.room_path, "{}__r{}_c{}.png".format(roomname, *self.viz_rows_cols) ) def make_top_down_views(self) -> Dict[str, np.ndarray]: top_downs = {} for roomname in self.iterate_scenes(self.scenes): fname = self.cached_image_path(roomname) if not os.path.exists(fname): self.make_controller() self.dump_top_down_view(roomname, fname) top_downs[roomname] = cv2.imread(fname) return top_downs def crop_viz_image(self, viz_image: np.ndarray) -> np.ndarray: y_min = int(self.viz_rows_cols[0] * 0) y_max = int(self.viz_rows_cols[0] * 1) # But it covers approximately the entire width: x_min = 0 x_max = self.viz_rows_cols[1] cropped_viz_image = viz_image[y_min:y_max, x_min:x_max, :] return cropped_viz_image def make_controller(self): if self.controller is None: self.controller = Controller() self.controller.step({"action": "ChangeQuality", "quality": "Very High"}) self.controller.step( { "action": "ChangeResolution", "x": self.viz_rows_cols[1], "y": self.viz_rows_cols[0], } ) def get_agent_map_data(self): self.controller.step({"action": "ToggleMapView"}) cam_position = self.controller.last_event.metadata["cameraPosition"] cam_orth_size = self.controller.last_event.metadata["cameraOrthSize"] to_return = { "cam_position": cam_position, "cam_orth_size": cam_orth_size, } self.controller.step({"action": "ToggleMapView"}) return to_return @staticmethod def position_to_tuple(position: Dict[str, float]) -> Tuple[float, float, float]: return position["x"], position["y"], position["z"] @staticmethod def add_lines_to_map( ps: Sequence[Any], frame: np.ndarray, pos_translator: ThorPositionTo2DFrameTranslator, opacity: float, color: Optional[Tuple[int, ...]] = None, ) -> np.ndarray: if len(ps) <= 1: return frame if color is None: color = (255, 0, 0) img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA") img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA opacity = int(round(255 * opacity)) # Define transparency for the triangle. draw = ImageDraw.Draw(img2) for i in range(len(ps) - 1): draw.line( tuple(reversed(pos_translator(ps[i]))) + tuple(reversed(pos_translator(ps[i + 1]))), fill=color + (opacity,), width=int(frame.shape[0] / 100), ) img = Image.alpha_composite(img1, img2) return np.array(img.convert("RGB")) @staticmethod def add_line_to_map( p0: Any, p1: Any, frame: np.ndarray, pos_translator: ThorPositionTo2DFrameTranslator, opacity: float, color: Optional[Tuple[int, ...]] = None, ) -> np.ndarray: if p0 == p1: return frame if color is None: color = (255, 0, 0) img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA") img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA opacity = int(round(255 * opacity)) # Define transparency for the triangle. draw = ImageDraw.Draw(img2) draw.line( tuple(reversed(pos_translator(p0))) + tuple(reversed(pos_translator(p1))), fill=color + (opacity,), width=int(frame.shape[0] / 100), ) img = Image.alpha_composite(img1, img2) return np.array(img.convert("RGB")) @staticmethod def add_agent_view_triangle( position: Any, rotation: float, frame: np.ndarray, pos_translator: ThorPositionTo2DFrameTranslator, scale: float = 1.0, opacity: float = 0.1, ) -> np.ndarray: p0 = np.array((position[0], position[2])) p1 = copy.copy(p0) p2 = copy.copy(p0) theta = -2 * math.pi * (rotation / 360.0) rotation_mat = np.array( [[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]] ) offset1 = scale * np.array([-1 / 2.0, 1]) offset2 = scale * np.array([1 / 2.0, 1]) p1 += np.matmul(rotation_mat, offset1) p2 += np.matmul(rotation_mat, offset2) img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA") img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA opacity = int(round(255 * opacity)) # Define transparency for the triangle. points = [tuple(reversed(pos_translator(p))) for p in [p0, p1, p2]] draw = ImageDraw.Draw(img2) draw.polygon(points, fill=(255, 255, 255, opacity)) img = Image.alpha_composite(img1, img2) return np.array(img.convert("RGB")) @staticmethod def visualize_agent_path( positions: Sequence[Any], frame: np.ndarray, pos_translator: ThorPositionTo2DFrameTranslator, single_color: bool = False, view_triangle_only_on_last: bool = False, disable_view_triangle: bool = False, line_opacity: float = 1.0, trajectory_start_end_color_str: Tuple[str, str] = ("red", "green"), ) -> np.ndarray: if single_color: frame = ThorViz.add_lines_to_map( list(map(ThorViz.position_to_tuple, positions)), frame, pos_translator, line_opacity, tuple( map( lambda x: int(round(255 * x)), col.Color(trajectory_start_end_color_str[0]).rgb, ) ), ) else: if len(positions) > 1: colors = list( col.Color(trajectory_start_end_color_str[0]).range_to( col.Color(trajectory_start_end_color_str[1]), len(positions) - 1 ) ) for i in range(len(positions) - 1): frame = ThorViz.add_line_to_map( ThorViz.position_to_tuple(positions[i]), ThorViz.position_to_tuple(positions[i + 1]), frame, pos_translator, opacity=line_opacity, color=tuple(map(lambda x: int(round(255 * x)), colors[i].rgb)), ) if view_triangle_only_on_last: positions = [positions[-1]] if disable_view_triangle: positions = [] for position in positions: frame = ThorViz.add_agent_view_triangle( ThorViz.position_to_tuple(position), rotation=position["rotation"], frame=frame, pos_translator=pos_translator, opacity=0.05 + view_triangle_only_on_last * 0.2, ) return frame def dump_top_down_view(self, room_name: str, image_path: str): get_logger().debug("Dumping {}".format(image_path)) self.controller.reset(room_name) self.controller.step( {"action": "Initialize", "gridSize": 0.1, "makeAgentsVisible": False} ) self.controller.step({"action": "ToggleMapView"}) top_down_view = self.controller.last_event.cv2img cv2.imwrite(image_path, top_down_view) def make_fig(self, episode: Any, episode_id: str) -> Figure: trajectory: Sequence[Dict[str, Any]] = self._access( episode, self.path_to_trajectory ) if self.thor_top_downs is None: self.init_top_down_render() roomname = "_".join(episode_id.split("_")[:2]) im = self.visualize_agent_path( trajectory, self.thor_top_downs[roomname], self.map_data[roomname]["pos_translator"], single_color=self.single_color, view_triangle_only_on_last=self.view_triangle_only_on_last, disable_view_triangle=self.disable_view_triangle, line_opacity=self.line_opacity, ) fig, ax = plt.subplots(figsize=self.figsize) ax.set_title(episode_id, fontsize=self.fontsize) ax.imshow(self.crop_viz_image(im)[:, :, ::-1]) ax.axis("off") return fig class ThorMultiViz(ThorViz): def __init__( self, path_to_trajectory_prefix: Sequence[str] = ("task_info", "followed_path"), agent_suffixes: Sequence[str] = ("1", "2"), label: str = "thor_trajectories", trajectory_start_end_color_strs: Sequence[Tuple[str, str]] = ( ("red", "green"), ("cyan", "purple"), ), **kwargs, ): super().__init__(label=label, **kwargs) self.path_to_trajectory_prefix = list(path_to_trajectory_prefix) self.agent_suffixes = list(agent_suffixes) self.trajectory_start_end_color_strs = list(trajectory_start_end_color_strs) def make_fig(self, episode: Any, episode_id: str) -> Figure: if self.thor_top_downs is None: self.init_top_down_render() roomname = "_".join(episode_id.split("_")[:2]) im = self.thor_top_downs[roomname] for agent, start_end_color in zip( self.agent_suffixes, self.trajectory_start_end_color_strs ): path = self.path_to_trajectory_prefix[:] path[-1] = path[-1] + agent trajectory = self._access(episode, path) im = self.visualize_agent_path( trajectory, im, self.map_data[roomname]["pos_translator"], single_color=self.single_color, view_triangle_only_on_last=self.view_triangle_only_on_last, disable_view_triangle=self.disable_view_triangle, line_opacity=self.line_opacity, trajectory_start_end_color_str=start_end_color, ) fig, ax = plt.subplots(figsize=self.figsize) ax.set_title(episode_id, fontsize=self.fontsize) ax.imshow(self.crop_viz_image(im)[:, :, ::-1]) ax.axis("off") return fig ================================================ FILE: allenact_plugins/ithor_plugin/scripts/__init__.py ================================================ ================================================ FILE: allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.py ================================================ import os from allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import ( create_debug_dataset_from_train_dataset, ) if __name__ == "__main__": CURRENT_PATH = os.getcwd() SCENE = "FloorPlan1" TARGET = "Apple" EPISODES = [0, 7, 11, 12] BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "ithor-objectnav", "debug") create_debug_dataset_from_train_dataset( scene=SCENE, target_object_type=TARGET, episodes_subset=EPISODES, train_dataset_path=os.path.join( CURRENT_PATH, "datasets", "ithor-objectnav", "train" ), base_debug_output_path=BASE_OUT, ) ================================================ FILE: allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.py ================================================ import os from allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import ( create_debug_dataset_from_train_dataset, ) if __name__ == "__main__": CURRENT_PATH = os.getcwd() SCENE = "FloorPlan1" EPISODES = [0, 7, 11, 12] BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "ithor-pointnav", "debug") create_debug_dataset_from_train_dataset( scene=SCENE, target_object_type=None, episodes_subset=EPISODES, train_dataset_path=os.path.join( CURRENT_PATH, "datasets", "ithor-pointnav", "train" ), base_debug_output_path=BASE_OUT, ) ================================================ FILE: allenact_plugins/lighthouse_plugin/__init__.py ================================================ ================================================ FILE: allenact_plugins/lighthouse_plugin/configs/__init__.py ================================================ ================================================ FILE: allenact_plugins/lighthouse_plugin/data/__init__.py ================================================ ================================================ FILE: allenact_plugins/lighthouse_plugin/extra_environment.yml ================================================ dependencies: - patsy>=0.5.1 - pip - pip: - gym-minigrid>=1.0.1 ================================================ FILE: allenact_plugins/lighthouse_plugin/extra_requirements.txt ================================================ patsy>=0.5.1 gym-minigrid>=1.0.1 ================================================ FILE: allenact_plugins/lighthouse_plugin/lighthouse_environment.py ================================================ import copy import curses import itertools import time from functools import lru_cache from typing import Optional, Tuple, Any, List, Union, cast import numpy as np from gym.utils import seeding from gym_minigrid import minigrid EMPTY = 0 GOAL = 1 WRONG_CORNER = 2 WALL = 3 @lru_cache(1000) def _get_world_corners(world_dim: int, world_radius: int): if world_radius == 0: return ((0,) * world_dim,) def combination_to_vec(comb) -> Tuple[int, ...]: vec = [world_radius] * world_dim for k in comb: vec[k] *= -1 return tuple(vec) return tuple( sorted( combination_to_vec(comb) for i in range(world_dim + 1) for comb in itertools.combinations(list(range(world_dim)), i) ) ) @lru_cache(1000) def _base_world_tensor(world_dim: int, world_radius: int): tensor = np.full((2 * world_radius + 1,) * world_dim, fill_value=EMPTY) slices: List[Union[slice, int]] = [slice(0, 2 * world_radius + 1)] * world_dim for i in range(world_dim): tmp_slices = [*slices] tmp_slices[i] = 0 tensor[tuple(tmp_slices)] = WALL tmp_slices[i] = 2 * world_radius tensor[tuple(tmp_slices)] = WALL for corner in _get_world_corners(world_dim=world_dim, world_radius=world_radius): tensor[tuple([loc + world_radius for loc in corner])] = WRONG_CORNER return tensor class LightHouseEnvironment(object): EMPTY = 0 GOAL = 1 WRONG_CORNER = 2 WALL = 3 SPACE_LEVELS = [EMPTY, GOAL, WRONG_CORNER, WALL] def __init__(self, world_dim: int, world_radius: int, **kwargs): self.world_dim = world_dim self.world_radius = world_radius self.world_corners = np.array( _get_world_corners(world_dim=world_dim, world_radius=world_radius), dtype=int, ) self.curses_screen: Optional[Any] = None self.world_tensor: np.ndarray = copy.deepcopy( _base_world_tensor(world_radius=world_radius, world_dim=world_dim) ) self.current_position = np.zeros(world_dim, dtype=int) self.closest_distance_to_corners = np.full( 2**world_dim, fill_value=world_radius, dtype=int ) self.positions: List[Tuple[int, ...]] = [tuple(self.current_position)] self.goal_position: Optional[np.ndarray] = None self.last_action: Optional[int] = None self.seed: Optional[int] = None self.np_seeded_random_gen: Optional[np.random.RandomState] = None self.set_seed(seed=int(kwargs.get("seed", np.random.randint(0, 2**31 - 1)))) self.random_reset() def set_seed(self, seed: int): # More information about why `np_seeded_random_gen` is used rather than just `np.random.seed` # can be found at gym/utils/seeding.py # There's literature indicating that having linear correlations between seeds of multiple # PRNG's can correlate the outputs self.seed = seed self.np_seeded_random_gen, _ = cast( Tuple[np.random.RandomState, Any], seeding.np_random(self.seed) ) def random_reset(self, goal_position: Optional[bool] = None): self.last_action = None self.world_tensor = copy.deepcopy( _base_world_tensor(world_radius=self.world_radius, world_dim=self.world_dim) ) if goal_position is None: self.goal_position = self.world_corners[ self.np_seeded_random_gen.randint(low=0, high=len(self.world_corners)) ] self.world_tensor[ tuple(cast(np.ndarray, self.world_radius + self.goal_position)) ] = GOAL if self.curses_screen is not None: curses.nocbreak() self.curses_screen.keypad(False) curses.echo() curses.endwin() self.curses_screen = None self.current_position = np.zeros(self.world_dim, dtype=int) self.closest_distance_to_corners = np.abs( (self.world_corners - self.current_position.reshape(1, -1)) ).max(1) self.positions = [tuple(self.current_position)] def step(self, action: int) -> bool: assert 0 <= action < 2 * self.world_dim self.last_action = action delta = -1 if action >= self.world_dim else 1 ind = action % self.world_dim old = self.current_position[ind] new = min(max(delta + old, -self.world_radius), self.world_radius) if new == old: self.positions.append(self.positions[-1]) return False else: self.current_position[ind] = new self.closest_distance_to_corners = np.minimum( np.abs((self.world_corners - self.current_position.reshape(1, -1))).max( 1 ), self.closest_distance_to_corners, ) self.positions.append(tuple(self.current_position)) return True def render(self, mode="array", **kwargs): if mode == "array": arr = copy.deepcopy(self.world_tensor) arr[tuple(self.world_radius + self.current_position)] = 9 return arr elif mode == "curses": if self.world_dim == 1: space_list = ["_"] * (1 + 2 * self.world_radius) goal_ind = self.goal_position[0] + self.world_radius space_list[goal_ind] = "G" space_list[2 * self.world_radius - goal_ind] = "W" space_list[self.current_position[0] + self.world_radius] = "X" to_print = " ".join(space_list) if self.curses_screen is None: self.curses_screen = curses.initscr() self.curses_screen.addstr(0, 0, to_print) if "extra_text" in kwargs: self.curses_screen.addstr(1, 0, kwargs["extra_text"]) self.curses_screen.refresh() elif self.world_dim == 2: space_list = [ ["_"] * (1 + 2 * self.world_radius) for _ in range(1 + 2 * self.world_radius) ] for row_ind in range(1 + 2 * self.world_radius): for col_ind in range(1 + 2 * self.world_radius): if self.world_tensor[row_ind][col_ind] == self.GOAL: space_list[row_ind][col_ind] = "G" if self.world_tensor[row_ind][col_ind] == self.WRONG_CORNER: space_list[row_ind][col_ind] = "C" if self.world_tensor[row_ind][col_ind] == self.WALL: space_list[row_ind][col_ind] = "W" if ( (row_ind, col_ind) == self.world_radius + self.current_position ).all(): space_list[row_ind][col_ind] = "X" if self.curses_screen is None: self.curses_screen = curses.initscr() for i, sl in enumerate(space_list): self.curses_screen.addstr(i, 0, " ".join(sl)) self.curses_screen.addstr(len(space_list), 0, str(self.state())) if "extra_text" in kwargs: self.curses_screen.addstr( len(space_list) + 1, 0, kwargs["extra_text"] ) self.curses_screen.refresh() else: raise NotImplementedError("Cannot render worlds of > 2 dimensions.") elif mode == "minigrid": height = width = 2 * self.world_radius + 2 grid = minigrid.Grid(width, height) # Generate the surrounding walls grid.horz_wall(0, 0) grid.horz_wall(0, height - 1) grid.vert_wall(0, 0) grid.vert_wall(width - 1, 0) # Place fake agent at the center agent_pos = np.array(self.positions[-1]) + 1 + self.world_radius # grid.set(*agent_pos, None) agent = minigrid.Goal() agent.color = "red" grid.set(agent_pos[0], agent_pos[1], agent) agent.init_pos = tuple(agent_pos) agent.cur_pos = tuple(agent_pos) goal_pos = self.goal_position + self.world_radius goal = minigrid.Goal() grid.set(goal_pos[0], goal_pos[1], goal) goal.init_pos = tuple(goal_pos) goal.cur_pos = tuple(goal_pos) highlight_mask = np.zeros((height, width), dtype=bool) minx, maxx = max(1, agent_pos[0] - 5), min(height - 1, agent_pos[0] + 5) miny, maxy = max(1, agent_pos[1] - 5), min(height - 1, agent_pos[1] + 5) highlight_mask[minx : (maxx + 1), miny : (maxy + 1)] = True img = grid.render( minigrid.TILE_PIXELS, agent_pos, None, highlight_mask=highlight_mask ) return img else: raise NotImplementedError("Unknown render mode {}.".format(mode)) time.sleep(0.0 if "sleep_time" not in kwargs else kwargs["sleep_time"]) def close(self): if self.curses_screen is not None: curses.nocbreak() self.curses_screen.keypad(False) curses.echo() curses.endwin() @staticmethod def optimal_ave_ep_length(world_dim: int, world_radius: int, view_radius: int): if world_dim == 1: max_steps_wrong_dir = max(world_radius - view_radius, 0) return max_steps_wrong_dir + world_radius elif world_dim == 2: tau = 2 * (world_radius - view_radius) average_steps_needed = 0.25 * (4 * 2 * view_radius + 10 * tau) return average_steps_needed else: raise NotImplementedError( "`optimal_average_ep_length` is only implemented" " for when the `world_dim` is 1 or 2 ({} given).".format(world_dim) ) ================================================ FILE: allenact_plugins/lighthouse_plugin/lighthouse_models.py ================================================ from typing import Optional, Tuple, cast import gym import torch import torch.nn as nn from gym.spaces.dict import Dict as SpaceDict from allenact.algorithms.onpolicy_sync.policy import ( ActorCriticModel, Memory, ObservationType, ) from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput, DistributionType class LinearAdvisorActorCritic(ActorCriticModel[CategoricalDistr]): def __init__( self, input_uuid: str, action_space: gym.spaces.Discrete, observation_space: SpaceDict, ensure_same_init_aux_weights: bool = True, ): super().__init__(action_space=action_space, observation_space=observation_space) assert ( input_uuid in observation_space.spaces ), "LinearActorCritic expects only a single observational input." self.input_uuid = input_uuid box_space: gym.spaces.Box = observation_space[self.input_uuid] assert isinstance(box_space, gym.spaces.Box), ( "LinearActorCritic requires that" "observation space corresponding to the input key is a Box space." ) assert len(box_space.shape) == 1 self.in_dim = box_space.shape[0] self.num_actions = action_space.n self.linear = nn.Linear(self.in_dim, 2 * self.num_actions + 1) nn.init.orthogonal_(self.linear.weight) if ensure_same_init_aux_weights: # Ensure main actor / auxiliary actor start with the same weights self.linear.weight.data[self.num_actions : -1, :] = self.linear.weight[ : self.num_actions, : ] nn.init.constant_(self.linear.bias, 0) # noinspection PyMethodMayBeStatic def _recurrent_memory_specification(self): return None def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: out = self.linear(cast(torch.Tensor, observations[self.input_uuid])) main_logits = out[..., : self.num_actions] aux_logits = out[..., self.num_actions : -1] values = out[..., -1:] # noinspection PyArgumentList return ( ActorCriticOutput( distributions=cast( DistributionType, CategoricalDistr(logits=main_logits) ), # step x sampler x ... values=cast( torch.FloatTensor, values.view(values.shape[:2] + (-1,)) ), # step x sampler x flattened extras={"auxiliary_distributions": CategoricalDistr(logits=aux_logits)}, ), None, ) ================================================ FILE: allenact_plugins/lighthouse_plugin/lighthouse_sensors.py ================================================ import itertools from typing import Any, Dict, Optional, Tuple, Sequence import gym import numpy as np import pandas as pd import patsy from allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super from allenact.base_abstractions.task import Task from allenact_plugins.lighthouse_plugin.lighthouse_environment import ( LightHouseEnvironment, ) def get_corner_observation( env: LightHouseEnvironment, view_radius: int, view_corner_offsets: Optional[np.array], ): if view_corner_offsets is None: view_corner_offsets = view_radius * (2 * (env.world_corners > 0) - 1) world_corners_offset = env.world_corners + env.world_radius multidim_view_corner_indices = np.clip( np.reshape(env.current_position, (1, -1)) + view_corner_offsets + env.world_radius, a_min=0, a_max=2 * env.world_radius, ) flat_view_corner_indices = np.ravel_multi_index( np.transpose(multidim_view_corner_indices), env.world_tensor.shape ) view_values = env.world_tensor.reshape(-1)[flat_view_corner_indices] last_action = 2 * env.world_dim if env.last_action is None else env.last_action on_border_bools = np.concatenate( ( env.current_position == env.world_radius, env.current_position == -env.world_radius, ), axis=0, ) if last_action == 2 * env.world_dim or on_border_bools[last_action]: on_border_value = last_action elif on_border_bools.any(): on_border_value = np.argwhere(on_border_bools).reshape(-1)[0] else: on_border_value = 2 * env.world_dim seen_mask = np.array(env.closest_distance_to_corners <= view_radius, dtype=int) seen_corner_values = ( env.world_tensor.reshape(-1)[ np.ravel_multi_index( np.transpose(world_corners_offset), env.world_tensor.shape ) ] * seen_mask ) return np.concatenate( ( seen_corner_values + view_values * (1 - seen_mask), [on_border_value, last_action], ), axis=0, out=np.zeros( (seen_corner_values.shape[0] + 2,), dtype=np.float32, ), ) class CornerSensor(Sensor[LightHouseEnvironment, Any]): def __init__( self, view_radius: int, world_dim: int, uuid: str = "corner_fixed_radius", **kwargs: Any ): self.view_radius = view_radius self.world_dim = world_dim self.view_corner_offsets: Optional[np.ndarray] = None observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self): return gym.spaces.Box( low=min(LightHouseEnvironment.SPACE_LEVELS), high=max(LightHouseEnvironment.SPACE_LEVELS), shape=(2**self.world_dim + 2,), dtype=int, ) def get_observation( self, env: LightHouseEnvironment, task: Optional[Task], *args: Any, **kwargs: Any ) -> Any: if self.view_corner_offsets is None: self.view_corner_offsets = self.view_radius * ( 2 * (env.world_corners > 0) - 1 ) return get_corner_observation( env=env, view_radius=self.view_radius, view_corner_offsets=self.view_corner_offsets, ) class FactorialDesignCornerSensor(Sensor[LightHouseEnvironment, Any]): _DESIGN_MAT_CACHE: Dict[Tuple, Any] = {} def __init__( self, view_radius: int, world_dim: int, degree: int, uuid: str = "corner_fixed_radius_categorical", **kwargs: Any ): self.view_radius = view_radius self.world_dim = world_dim self.degree = degree if self.world_dim > 2: raise NotImplementedError( "When using the `FactorialDesignCornerSensor`," "`world_dim` must be <= 2 due to memory constraints." "In the current implementation, creating the design" "matrix in the `world_dim == 3` case would require" "instantiating a matrix of size ~ 3Mx3M (9 trillion entries)." ) self.view_corner_offsets: Optional[np.ndarray] = None # self.world_corners_offset: Optional[List[typing.Tuple[int, ...]]] = None self.corner_sensor = CornerSensor(self.view_radius, self.world_dim) self.variables_and_levels = self._get_variables_and_levels( world_dim=self.world_dim ) self._design_mat_formula = self._create_formula( variables_and_levels=self._get_variables_and_levels( world_dim=self.world_dim ), degree=self.degree, ) self.single_row_df = pd.DataFrame( data=[[0] * len(self.variables_and_levels)], columns=[x[0] for x in self.variables_and_levels], ) self._view_tuple_to_design_array: Dict[Tuple[int, ...], np.ndarray] = {} ( design_matrix, tuple_to_ind, ) = self._create_full_design_matrix_and_tuple_to_ind_dict( variables_and_levels=tuple(self.variables_and_levels), degree=self.degree ) self.design_matrix = design_matrix self.tuple_to_ind = tuple_to_ind observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self): return gym.spaces.Box( low=min(LightHouseEnvironment.SPACE_LEVELS), high=max(LightHouseEnvironment.SPACE_LEVELS), shape=( len( self.view_tuple_to_design_array( (0,) * len(self.variables_and_levels) ) ), ), dtype=int, ) def view_tuple_to_design_array(self, view_tuple: Tuple): return np.array( self.design_matrix[self.tuple_to_ind[view_tuple], :], dtype=np.float32 ) @classmethod def output_dim(cls, world_dim: int): return ((3 if world_dim == 1 else 4) ** (2**world_dim)) * ( 2 * world_dim + 1 ) ** 2 @classmethod def _create_full_design_matrix_and_tuple_to_ind_dict( cls, variables_and_levels: Sequence[Tuple[str, Sequence[int]]], degree: int ): variables_and_levels = tuple((x, tuple(y)) for x, y in variables_and_levels) key = (variables_and_levels, degree) if key not in cls._DESIGN_MAT_CACHE: all_tuples = [ tuple(x) for x in itertools.product( *[levels for _, levels in variables_and_levels] ) ] tuple_to_ind = {} for i, t in enumerate(all_tuples): tuple_to_ind[t] = i df = pd.DataFrame( data=all_tuples, columns=[var_name for var_name, _ in variables_and_levels], ) cls._DESIGN_MAT_CACHE[key] = ( np.array( 1.0 * patsy.dmatrix( cls._create_formula( variables_and_levels=variables_and_levels, degree=degree ), data=df, ), dtype=bool, ), tuple_to_ind, ) return cls._DESIGN_MAT_CACHE[key] @staticmethod def _get_variables_and_levels(world_dim: int): return ( [ ("s{}".format(i), list(range(3 if world_dim == 1 else 4))) for i in range(2**world_dim) ] + [("b{}".format(i), list(range(2 * world_dim + 1))) for i in range(1)] + [("a{}".format(i), list(range(2 * world_dim + 1))) for i in range(1)] ) @classmethod def _create_formula( cls, variables_and_levels: Sequence[Tuple[str, Sequence[int]]], degree: int ): def make_categorial(var_name, levels): return "C({}, levels={})".format(var_name, levels) if degree == -1: return ":".join( make_categorial(var_name, levels) for var_name, levels in variables_and_levels ) else: return "({})**{}".format( "+".join( make_categorial(var_name, levels) for var_name, levels in variables_and_levels ), degree, ) def get_observation( self, env: LightHouseEnvironment, task: Optional[Task], *args: Any, **kwargs: Any ) -> Any: kwargs["as_tuple"] = True view_array = self.corner_sensor.get_observation(env, task, *args, **kwargs) return self.view_tuple_to_design_array(tuple(view_array)) ================================================ FILE: allenact_plugins/lighthouse_plugin/lighthouse_tasks.py ================================================ import abc import string from typing import List, Dict, Any, Optional, Tuple, Union, Sequence, cast import gym import numpy as np from gym.utils import seeding from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor, SensorSuite from allenact.base_abstractions.task import Task, TaskSampler from allenact.utils.experiment_utils import set_seed from allenact.utils.system import get_logger from allenact_plugins.lighthouse_plugin.lighthouse_environment import ( LightHouseEnvironment, ) from allenact_plugins.lighthouse_plugin.lighthouse_sensors import get_corner_observation DISCOUNT_FACTOR = 0.99 STEP_PENALTY = -0.01 FOUND_TARGET_REWARD = 1.0 class LightHouseTask(Task[LightHouseEnvironment], abc.ABC): """Defines an abstract embodied task in the light house gridworld. # Attributes env : The light house environment. sensor_suite: Collection of sensors formed from the `sensors` argument in the initializer. task_info : Dictionary of (k, v) pairs defining task goals and other task information. max_steps : The maximum number of steps an agent can take an in the task before it is considered failed. observation_space: The observation space returned on each step from the sensors. """ def __init__( self, env: LightHouseEnvironment, sensors: Union[SensorSuite, List[Sensor]], task_info: Dict[str, Any], max_steps: int, **kwargs, ) -> None: """Initializer. See class documentation for parameter definitions. """ super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self._last_action: Optional[int] = None @property def last_action(self) -> int: return self._last_action @last_action.setter def last_action(self, value: int): self._last_action = value def step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) self.last_action = action return super(LightHouseTask, self).step(action=action) def render(self, mode: str = "array", *args, **kwargs) -> np.ndarray: if mode == "array": return self.env.render(mode, **kwargs) elif mode in ["rgb", "rgb_array", "human"]: arr = self.env.render("array", **kwargs) colors = np.array( [ (31, 119, 180), (255, 127, 14), (44, 160, 44), (214, 39, 40), (148, 103, 189), (140, 86, 75), (227, 119, 194), (127, 127, 127), (188, 189, 34), (23, 190, 207), ], dtype=np.uint8, ) return colors[arr] else: raise NotImplementedError("Render mode '{}' is not supported.".format(mode)) class FindGoalLightHouseTask(LightHouseTask): _CACHED_ACTION_NAMES: Dict[int, Tuple[str, ...]] = {} def __init__( self, env: LightHouseEnvironment, sensors: Union[SensorSuite, List[Sensor]], task_info: Dict[str, Any], max_steps: int, **kwargs, ): super().__init__(env, sensors, task_info, max_steps, **kwargs) self._found_target = False @property def action_space(self) -> gym.spaces.Discrete: return gym.spaces.Discrete(2 * self.env.world_dim) def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) self.env.step(action) reward = STEP_PENALTY if np.all(self.env.current_position == self.env.goal_position): self._found_target = True reward += FOUND_TARGET_REWARD elif self.num_steps_taken() == self.max_steps - 1: reward = STEP_PENALTY / (1 - DISCOUNT_FACTOR) return RLStepResult( observation=self.get_observations(), reward=reward, done=self.is_done(), info=None, ) def reached_terminal_state(self) -> bool: return self._found_target @classmethod def class_action_names(cls, world_dim: int = 2, **kwargs) -> Tuple[str, ...]: assert 1 <= world_dim <= 26, "Too many dimensions." if world_dim not in cls._CACHED_ACTION_NAMES: action_names = [ "{}(+1)".format(string.ascii_lowercase[i] for i in range(world_dim)) ] action_names.extend( "{}(-1)".format(string.ascii_lowercase[i] for i in range(world_dim)) ) cls._CACHED_ACTION_NAMES[world_dim] = tuple(action_names) return cls._CACHED_ACTION_NAMES[world_dim] def action_names(self) -> Tuple[str, ...]: return self.class_action_names(world_dim=self.env.world_dim) def close(self) -> None: pass def query_expert( self, expert_view_radius: int, return_policy: bool = False, deterministic: bool = False, **kwargs, ) -> Tuple[Any, bool]: view_tuple = get_corner_observation( env=self.env, view_radius=expert_view_radius, view_corner_offsets=None, ) goal = self.env.GOAL wrong = self.env.WRONG_CORNER if self.env.world_dim == 1: left_view, right_view, hitting, last_action = view_tuple left = 1 right = 0 expert_action: Optional[int] = None policy: Optional[np.ndarray] = None if left_view == goal: expert_action = left elif right_view == goal: expert_action = right elif hitting != 2 * self.env.world_dim: expert_action = left if last_action == right else right elif left_view == wrong: expert_action = right elif right_view == wrong: expert_action = left elif last_action == 2 * self.env.world_dim: policy = np.array([0.5, 0.5]) else: expert_action = last_action if policy is None: policy = np.array([expert_action == right, expert_action == left]) elif self.env.world_dim == 2: tl, tr, bl, br, hitting, last_action = view_tuple wall = self.env.WALL d, r, u, l, none = 0, 1, 2, 3, 4 if tr == goal: if hitting != r: expert_action = r else: expert_action = u elif br == goal: if hitting != d: expert_action = d else: expert_action = r elif bl == goal: if hitting != l: expert_action = l else: expert_action = d elif tl == goal: if hitting != u: expert_action = u else: expert_action = l elif tr == wrong and not any(x == wrong for x in [br, bl, tl]): expert_action = l elif br == wrong and not any(x == wrong for x in [bl, tl, tr]): expert_action = u elif bl == wrong and not any(x == wrong for x in [tl, tr, br]): expert_action = r elif tl == wrong and not any(x == wrong for x in [tr, br, bl]): expert_action = d elif all(x == wrong for x in [tr, br]) and not any( x == wrong for x in [bl, tl] ): expert_action = l elif all(x == wrong for x in [br, bl]) and not any( x == wrong for x in [tl, tr] ): expert_action = u elif all(x == wrong for x in [bl, tl]) and not any( x == wrong for x in [tr, br] ): expert_action = r elif all(x == wrong for x in [tl, tr]) and not any( x == wrong for x in [br, bl] ): expert_action = d elif hitting != none and tr == br == bl == tl: # Only possible if in 0 vis setting if tr == self.env.WRONG_CORNER or last_action == hitting: if last_action == r: expert_action = u elif last_action == u: expert_action = l elif last_action == l: expert_action = d elif last_action == d: expert_action = r else: raise NotImplementedError() else: expert_action = last_action elif last_action == r and tr == wall: expert_action = u elif last_action == u and tl == wall: expert_action = l elif last_action == l and bl == wall: expert_action = d elif last_action == d and br == wall: expert_action = r elif last_action == none: expert_action = r else: expert_action = last_action policy = np.array( [ expert_action == d, expert_action == r, expert_action == u, expert_action == l, ] ) else: raise NotImplementedError("Can only query expert for world dims of 1 or 2.") if return_policy: return policy, True elif deterministic: return int(np.argmax(policy)), True else: return ( int(np.argmax(np.random.multinomial(1, policy / (1.0 * policy.sum())))), True, ) class FindGoalLightHouseTaskSampler(TaskSampler): def __init__( self, world_dim: int, world_radius: int, sensors: Union[SensorSuite, List[Sensor]], max_steps: int, max_tasks: Optional[int] = None, num_unique_seeds: Optional[int] = None, task_seeds_list: Optional[List[int]] = None, deterministic_sampling: bool = False, seed: Optional[int] = None, **kwargs, ): self.env = LightHouseEnvironment(world_dim=world_dim, world_radius=world_radius) self._last_sampled_task: Optional[FindGoalLightHouseTask] = None self.sensors = ( SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors ) self.max_steps = max_steps self.max_tasks = max_tasks self.num_tasks_generated = 0 self.deterministic_sampling = deterministic_sampling self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list assert (self.num_unique_seeds is None) or ( 0 < self.num_unique_seeds ), "`num_unique_seeds` must be a positive integer." self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list if self.task_seeds_list is not None: if self.num_unique_seeds is not None: assert self.num_unique_seeds == len( self.task_seeds_list ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified." self.num_unique_seeds = len(self.task_seeds_list) elif self.num_unique_seeds is not None: self.task_seeds_list = list(range(self.num_unique_seeds)) assert (not deterministic_sampling) or ( self.num_unique_seeds is not None ), "Cannot use deterministic sampling when `num_unique_seeds` is `None`." if (not deterministic_sampling) and self.max_tasks: get_logger().warning( "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`," " this might be a mistake when running testing." ) self.seed: int = int( seed if seed is not None else np.random.randint(0, 2**31 - 1) ) self.np_seeded_random_gen: Optional[np.random.RandomState] = None self.set_seed(self.seed) @property def world_dim(self): return self.env.world_dim @property def world_radius(self): return self.env.world_radius @property def length(self) -> Union[int, float]: return ( float("inf") if self.max_tasks is None else self.max_tasks - self.num_tasks_generated ) @property def total_unique(self) -> Optional[Union[int, float]]: n = 2**self.world_dim return n if self.num_unique_seeds is None else min(n, self.num_unique_seeds) @property def last_sampled_task(self) -> Optional[Task]: return self._last_sampled_task def next_task(self, force_advance_scene: bool = False) -> Optional[Task]: if self.length <= 0: return None if self.num_unique_seeds is not None: if self.deterministic_sampling: seed = self.task_seeds_list[ self.num_tasks_generated % len(self.task_seeds_list) ] else: seed = self.np_seeded_random_gen.choice(self.task_seeds_list) else: seed = self.np_seeded_random_gen.randint(0, 2**31 - 1) self.num_tasks_generated += 1 self.env.set_seed(seed) self.env.random_reset() return FindGoalLightHouseTask( env=self.env, sensors=self.sensors, task_info={}, max_steps=self.max_steps ) def close(self) -> None: pass @property def all_observation_spaces_equal(self) -> bool: return True def reset(self) -> None: self.num_tasks_generated = 0 self.set_seed(seed=self.seed) def set_seed(self, seed: int) -> None: set_seed(seed) self.np_seeded_random_gen, _ = seeding.np_random(seed) self.seed = seed ================================================ FILE: allenact_plugins/lighthouse_plugin/lighthouse_util.py ================================================ import numpy as np from allenact.utils.experiment_utils import EarlyStoppingCriterion, ScalarMeanTracker class StopIfNearOptimal(EarlyStoppingCriterion): def __init__(self, optimal: float, deviation: float, min_memory_size: int = 100): self.optimal = optimal self.deviation = deviation self.current_pos = 0 self.has_filled = False self.memory: np.ndarray = np.zeros(min_memory_size) def __call__( self, stage_steps: int, total_steps: int, training_metrics: ScalarMeanTracker, ) -> bool: sums = training_metrics.sums() counts = training_metrics.counts() k = "ep_length" if k in sums: count = counts[k] ep_length_ave = sums[k] / count n = self.memory.shape[0] if count >= n: if count > n: # Increase memory size to fit all of the new values self.memory = np.full(count, fill_value=ep_length_ave) else: # We have exactly as many values as the memory size, # simply set the whole memory to be equal to the new # average ep length. self.memory[:] = ep_length_ave self.current_pos = 0 self.has_filled = True else: self.memory[self.current_pos : (self.current_pos + count)] = ( ep_length_ave ) if self.current_pos + count > n: self.has_filled = True self.current_pos = self.current_pos + count % n self.memory[: self.current_pos] = ep_length_ave if not self.has_filled: return False return self.memory.mean() < self.optimal + self.deviation ================================================ FILE: allenact_plugins/lighthouse_plugin/scripts/__init__.py ================================================ ================================================ FILE: allenact_plugins/manipulathor_plugin/__init__.py ================================================ from allenact.utils.system import ImportChecker with ImportChecker( "Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`)." ): # noinspection PyUnresolvedReferences import ai2thor ================================================ FILE: allenact_plugins/manipulathor_plugin/arm_calculation_utils.py ================================================ """Utility classes and functions for calculating the arm relative and absolute position.""" from typing import Dict import numpy as np import torch from scipy.spatial.transform import Rotation as R from allenact.utils.system import get_logger def state_dict_to_tensor(state: Dict): result = [] if "position" in state: result += [ state["position"]["x"], state["position"]["y"], state["position"]["z"], ] if "rotation" in state: result += [ state["rotation"]["x"], state["rotation"]["y"], state["rotation"]["z"], ] return torch.Tensor(result) def diff_position(state_goal, state_curr, absolute: bool = True): p1 = state_goal["position"] p2 = state_curr["position"] if absolute: result = {k: abs(p1[k] - p2[k]) for k in p1.keys()} else: result = {k: (p1[k] - p2[k]) for k in p1.keys()} return result def coord_system_transform(position: Dict, coord_system: str): assert coord_system in [ "xyz_unsigned", "xyz_signed", "polar_radian", "polar_trigo", ] if "xyz" in coord_system: result = [ position["x"], position["y"], position["z"], ] result = torch.Tensor(result) if coord_system == "xyz_unsigned": return torch.abs(result) else: # xyz_signed return result else: hxy = np.hypot(position["x"], position["y"]) r = np.hypot(hxy, position["z"]) el = np.arctan2(position["z"], hxy) # elevation angle: [-pi/2, pi/2] az = np.arctan2(position["y"], position["x"]) # azimuthal angle: [-pi, pi] if coord_system == "polar_radian": result = [ r, el / (0.5 * np.pi), az / np.pi, ] # normalize to [-1, 1] return torch.Tensor(result) else: # polar_trigo result = [ r, np.cos(el), np.sin(el), np.cos(az), np.sin(az), ] return torch.Tensor(result) def position_rotation_to_matrix(position, rotation): result = np.zeros((4, 4)) r = R.from_euler("xyz", [rotation["x"], rotation["y"], rotation["z"]], degrees=True) result[:3, :3] = r.as_matrix() result[3, 3] = 1 result[:3, 3] = [position["x"], position["y"], position["z"]] return result def inverse_rot_trans_matrix(mat): mat = np.linalg.inv(mat) return mat def matrix_to_position_rotation(matrix): result = {"position": None, "rotation": None} rotation = R.from_matrix(matrix[:3, :3]).as_euler("xyz", degrees=True) rotation_dict = {"x": rotation[0], "y": rotation[1], "z": rotation[2]} result["rotation"] = rotation_dict position = matrix[:3, 3] result["position"] = {"x": position[0], "y": position[1], "z": position[2]} return result def find_closest_inverse(deg, use_cache): if use_cache: for k in _saved_inverse_rotation_mats.keys(): if abs(k - deg) < 5: return _saved_inverse_rotation_mats[k] # if it reaches here it means it had not calculated the degree before rotation = R.from_euler("xyz", [0, deg, 0], degrees=True) result = rotation.as_matrix() inverse = inverse_rot_trans_matrix(result) if use_cache: get_logger().warning(f"Had to calculate the matrix for {deg}") return inverse def calc_inverse(deg): rotation = R.from_euler("xyz", [0, deg, 0], degrees=True) result = rotation.as_matrix() inverse = inverse_rot_trans_matrix(result) return inverse _saved_inverse_rotation_mats = {i: calc_inverse(i) for i in range(0, 360, 45)} _saved_inverse_rotation_mats[360] = _saved_inverse_rotation_mats[0] def world_coords_to_agent_coords(world_obj, agent_state, use_cache=True): position = agent_state["position"] rotation = agent_state["rotation"] agent_translation = [position["x"], position["y"], position["z"]] assert abs(rotation["x"]) < 0.01 and abs(rotation["z"]) < 0.01 inverse_agent_rotation = find_closest_inverse(rotation["y"], use_cache=use_cache) obj_matrix = position_rotation_to_matrix( world_obj["position"], world_obj["rotation"] ) obj_translation = np.matmul( inverse_agent_rotation, (obj_matrix[:3, 3] - agent_translation) ) # add rotation later obj_matrix[:3, 3] = obj_translation result = matrix_to_position_rotation(obj_matrix) return result ================================================ FILE: allenact_plugins/manipulathor_plugin/armpointnav_constants.py ================================================ import json import os from typing import Dict, Optional, Any from constants import ABS_PATH_OF_TOP_LEVEL_DIR TRAIN_OBJECTS = ["Apple", "Bread", "Tomato", "Lettuce", "Pot", "Mug"] TEST_OBJECTS = ["Potato", "SoapBottle", "Pan", "Egg", "Spatula", "Cup"] MOVE_ARM_CONSTANT = 0.05 MOVE_ARM_HEIGHT_CONSTANT = MOVE_ARM_CONSTANT UNWANTED_MOVE_THR = 0.01 DISTANCE_EPS = 1e-9 DISTANCE_MAX = 10.0 dataset_json_file = os.path.join( ABS_PATH_OF_TOP_LEVEL_DIR, "datasets", "apnd-dataset", "starting_pose.json" ) _ARM_START_POSITIONS: Optional[Dict[str, Any]] = None def get_agent_start_positions(): global _ARM_START_POSITIONS if _ARM_START_POSITIONS is not None: try: with open(dataset_json_file) as f: _ARM_START_POSITIONS = json.load(f) except Exception: raise Exception(f"Dataset not found in {dataset_json_file}") return _ARM_START_POSITIONS ================================================ FILE: allenact_plugins/manipulathor_plugin/manipulathor_constants.py ================================================ """Constant values and hyperparameters that are used by the environment.""" import ai2thor.fifo_server ARM_MIN_HEIGHT = 0.450998873 ARM_MAX_HEIGHT = 1.8009994 ADDITIONAL_ARM_ARGS = { "disableRendering": True, "returnToStart": True, "speed": 1, } MOVE_AHEAD = "MoveAheadContinuous" MOVE_BACK = "MoveBackContinuous" ROTATE_LEFT = "RotateLeftContinuous" ROTATE_RIGHT = "RotateRightContinuous" MOVE_ARM_HEIGHT_P = "MoveArmHeightP" MOVE_ARM_HEIGHT_M = "MoveArmHeightM" MOVE_ARM_X_P = "MoveArmXP" MOVE_ARM_X_M = "MoveArmXM" MOVE_ARM_Y_P = "MoveArmYP" MOVE_ARM_Y_M = "MoveArmYM" MOVE_ARM_Z_P = "MoveArmZP" MOVE_ARM_Z_M = "MoveArmZM" ROTATE_WRIST_PITCH_P = "RotateArmWristPitchP" ROTATE_WRIST_PITCH_M = "RotateArmWristPitchM" ROTATE_WRIST_YAW_P = "RotateArmWristYawP" ROTATE_WRIST_YAW_M = "RotateArmWristYawM" ROTATE_WRIST_ROLL_P = "RotateArmWristRollP" ROTATE_WRIST_ROLL_M = "RotateArmWristRollM" ROTATE_ELBOW_P = "RotateArmElbowP" ROTATE_ELBOW_M = "RotateArmElbowM" LOOK_UP = "LookUp" LOOK_DOWN = "LookDown" PICKUP = "PickUpMidLevel" DROP = "DropMidLevel" DONE = "DoneMidLevel" ENV_ARGS = dict( gridSize=0.25, width=224, height=224, visibilityDistance=1.0, agentMode="arm", fieldOfView=100, agentControllerType="mid-level", server_class=ai2thor.fifo_server.FifoServer, useMassThreshold=True, massThreshold=10, autoSimulation=False, autoSyncTransforms=True, ) VALID_OBJECT_LIST = [ "Knife", "Bread", "Fork", "Potato", "SoapBottle", "Pan", "Plate", "Tomato", "Egg", "Pot", "Spatula", "Cup", "Bowl", "SaltShaker", "PepperShaker", "Lettuce", "ButterKnife", "Apple", "DishSponge", "Spoon", "Mug", ] ================================================ FILE: allenact_plugins/manipulathor_plugin/manipulathor_environment.py ================================================ """A wrapper for engaging with the ManipulaTHOR environment.""" import copy import math import warnings from typing import Dict, Union, Any, Optional, cast import ai2thor.server import numpy as np from ai2thor.controller import Controller from allenact.utils.misc_utils import prepare_locals_for_super from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from allenact_plugins.manipulathor_plugin.armpointnav_constants import ( MOVE_ARM_HEIGHT_CONSTANT, MOVE_ARM_CONSTANT, UNWANTED_MOVE_THR, DISTANCE_MAX, ) from allenact_plugins.manipulathor_plugin.manipulathor_constants import ( ADDITIONAL_ARM_ARGS, ARM_MIN_HEIGHT, ARM_MAX_HEIGHT, ) from allenact_plugins.manipulathor_plugin.manipulathor_constants import ( ROTATE_WRIST_PITCH_P, ROTATE_WRIST_PITCH_M, ROTATE_WRIST_YAW_P, ROTATE_WRIST_YAW_M, ROTATE_ELBOW_P, ROTATE_ELBOW_M, LOOK_UP, LOOK_DOWN, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, PICKUP, DONE, ) from allenact_plugins.manipulathor_plugin.manipulathor_utils import ( reset_environment_and_additional_commands, ) def position_distance(s1, s2, filter_nan: bool = False): position1 = s1["position"] position2 = s2["position"] dist = ( (position1["x"] - position2["x"]) ** 2 + (position1["y"] - position2["y"]) ** 2 + (position1["z"] - position2["z"]) ** 2 ) ** 0.5 if filter_nan: dist = DISTANCE_MAX if math.isnan(dist) or dist > DISTANCE_MAX else dist return dist def rotation_distance(s1: Dict[str, Dict[str, float]], s2: Dict[str, Dict[str, float]]): """Distance between rotations.""" rotation1 = s1["rotation"] rotation2 = s2["rotation"] def deg_dist(d0: float, d1: float): dist = (d0 - d1) % 360 return min(dist, 360 - dist) return sum(deg_dist(rotation1[k], rotation2[k]) for k in ["x", "y", "z"]) class ManipulaTHOREnvironment(IThorEnvironment): """Wrapper for the manipulathor controller providing arm functionality and bookkeeping. See [here](https://ai2thor.allenai.org/documentation/installation) for comprehensive documentation on AI2-THOR. # Attributes controller : The ai2thor controller. """ def __init__( self, x_display: Optional[str] = None, docker_enabled: bool = False, local_thor_build: Optional[str] = None, visibility_distance: float = VISIBILITY_DISTANCE, fov: float = FOV, player_screen_width: int = 224, player_screen_height: int = 224, quality: str = "Very Low", restrict_to_initially_reachable_points: bool = False, make_agents_visible: bool = True, object_open_speed: float = 1.0, simplify_physics: bool = False, verbose: bool = False, env_args=None, ) -> None: """Initializer. # Parameters x_display : The x display into which to launch ai2thor (possibly necessarily if you are running on a server without an attached display). docker_enabled : Whether or not to run thor in a docker container (useful on a server without an attached display so that you don't have to start an x display). local_thor_build : The path to a local build of ai2thor. This is probably not necessary for your use case and can be safely ignored. visibility_distance : The distance (in meters) at which objects, in the viewport of the agent, are considered visible by ai2thor and will have their "visible" flag be set to `True` in the metadata. fov : The agent's camera's field of view. width : The width resolution (in pixels) of the images returned by ai2thor. height : The height resolution (in pixels) of the images returned by ai2thor. quality : The quality at which to render. Possible quality settings can be found in `ai2thor._quality_settings.QUALITY_SETTINGS`. restrict_to_initially_reachable_points : Whether or not to restrict the agent to locations in ai2thor that were found to be (initially) reachable by the agent (i.e. reachable by the agent after resetting the scene). This can be useful if you want to ensure there are only a fixed set of locations where the agent can go. make_agents_visible : Whether or not the agent should be visible. Most noticable when there are multiple agents or when quality settings are high so that the agent casts a shadow. object_open_speed : How quickly objects should be opened. High speeds mean faster simulation but also mean that opening objects have a lot of kinetic energy and can, possibly, knock other objects away. simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object interactions when opening drawers (when simplified, objects within a drawer do not slide around on their own when the drawer is opened or closed, instead they are effectively glued down). """ self._verbose = verbose self.env_args = env_args del verbose del env_args super(ManipulaTHOREnvironment, self).__init__( **prepare_locals_for_super(locals()) ) def create_controller(self): controller = Controller(**self.env_args) return controller def start( self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs, ) -> None: """Starts the ai2thor controller if it was previously stopped. After starting, `reset` will be called with the scene name and move magnitude. # Parameters scene_name : The scene to load. move_mag : The amount of distance the agent moves in a single `MoveAhead` step. kwargs : additional kwargs, passed to reset. """ if self._started: raise RuntimeError( "Trying to start the environment but it is already started." ) self.controller = self.create_controller() self._started = True self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs) def reset( self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs, ): self._move_mag = move_mag self._grid_size = self._move_mag if scene_name is None: scene_name = self.controller.last_event.metadata["sceneName"] # self.reset_init_params()#**kwargs) removing this fixes one of the crashing problem # to solve the crash issue # TODO why do we still have this crashing problem? try: reset_environment_and_additional_commands(self.controller, scene_name) except Exception as e: print("RESETTING THE SCENE,", scene_name, "because of", str(e)) self.controller = ai2thor.controller.Controller(**self.env_args) reset_environment_and_additional_commands(self.controller, scene_name) if self.object_open_speed != 1.0: self.controller.step( {"action": "ChangeOpenSpeed", "x": self.object_open_speed} ) self._initially_reachable_points = None self._initially_reachable_points_set = None self.controller.step({"action": "GetReachablePositions"}) if not self.controller.last_event.metadata["lastActionSuccess"]: warnings.warn( "Error when getting reachable points: {}".format( self.controller.last_event.metadata["errorMessage"] ) ) self._initially_reachable_points = self.last_action_return self.list_of_actions_so_far = [] def randomize_agent_location( self, seed: int = None, partial_position: Optional[Dict[str, float]] = None ) -> Dict: raise NotImplementedError def is_object_at_low_level_hand(self, object_id): current_objects_in_hand = self.controller.last_event.metadata["arm"][ "heldObjects" ] return object_id in current_objects_in_hand def object_in_hand(self): """Object metadata for the object in the agent's hand.""" inv_objs = self.last_event.metadata["inventoryObjects"] if len(inv_objs) == 0: return None elif len(inv_objs) == 1: return self.get_object_by_id( self.last_event.metadata["inventoryObjects"][0]["objectId"] ) else: raise AttributeError("Must be <= 1 inventory objects.") @classmethod def correct_nan_inf(cls, flawed_dict, extra_tag=""): corrected_dict = copy.deepcopy(flawed_dict) for k, v in corrected_dict.items(): if math.isnan(v) or math.isinf(v): corrected_dict[k] = 0 return corrected_dict def get_object_by_id(self, object_id: str) -> Optional[Dict[str, Any]]: for o in self.last_event.metadata["objects"]: if o["objectId"] == object_id: o["position"] = self.correct_nan_inf(o["position"], "obj id") return o return None def get_current_arm_state(self): h_min = ARM_MIN_HEIGHT h_max = ARM_MAX_HEIGHT agent_base_location = 0.9009995460510254 event = self.controller.last_event offset = event.metadata["agent"]["position"]["y"] - agent_base_location h_max += offset h_min += offset joints = event.metadata["arm"]["joints"] arm = joints[-1] assert arm["name"] == "robot_arm_4_jnt" xyz_dict = copy.deepcopy(arm["rootRelativePosition"]) height_arm = joints[0]["position"]["y"] xyz_dict["h"] = (height_arm - h_min) / (h_max - h_min) xyz_dict = self.correct_nan_inf(xyz_dict, "realtive hand") return xyz_dict def get_absolute_hand_state(self): event = self.controller.last_event joints = event.metadata["arm"]["joints"] arm = copy.deepcopy(joints[-1]) assert arm["name"] == "robot_arm_4_jnt" xyz_dict = arm["position"] xyz_dict = self.correct_nan_inf(xyz_dict, "absolute hand") return dict(position=xyz_dict, rotation={"x": 0, "y": 0, "z": 0}) def get_pickupable_objects(self): event = self.controller.last_event object_list = event.metadata["arm"]["pickupableObjects"] return object_list def get_current_object_locations(self): obj_loc_dict = {} metadata = self.controller.last_event.metadata["objects"] for o in metadata: obj_loc_dict[o["objectId"]] = dict( position=o["position"], rotation=o["rotation"], visible=o["visible"], ) return copy.deepcopy(obj_loc_dict) def close_enough(self, current_obj_pose, init_obj_pose, threshold): position_close = [ abs(current_obj_pose["position"][k] - init_obj_pose["position"][k]) <= threshold for k in ["x", "y", "z"] ] position_is_close = sum(position_close) == 3 return position_is_close def get_objects_moved( self, previous_object_locations, current_object_locations, target_object_id, thres_dict: Optional[Dict] = None, ): moved_objects = [] scene_id = self.scene_name.split("_")[0] for object_id in current_object_locations.keys(): if object_id == target_object_id: continue if object_id not in previous_object_locations: continue threshold = UNWANTED_MOVE_THR if thres_dict is not None: threshold = max(threshold, thres_dict[scene_id + "-" + object_id]) if not self.close_enough( current_object_locations[object_id], previous_object_locations[object_id], threshold=threshold, ): moved_objects.append(object_id) return moved_objects def get_objects_move_distance( self, initial_object_locations, previous_object_locations, current_object_locations, target_object_id, only_visible: bool = False, thres_dict: Optional[Dict] = None, ): moved_objects_position_distance = {} scene_id = self.scene_name.split("_")[0] for object_id in current_object_locations.keys(): if object_id == target_object_id: continue if object_id not in previous_object_locations: continue if only_visible: # current is visible if not current_object_locations[object_id]["visible"]: continue p_initial2current = position_distance( current_object_locations[object_id], initial_object_locations[object_id], filter_nan=True, ) p_initial2previous = position_distance( previous_object_locations[object_id], initial_object_locations[object_id], filter_nan=True, ) threshold = 0.0 if thres_dict is not None: threshold = max(threshold, thres_dict[scene_id + "-" + object_id]) p_initial2current = max(0.0, p_initial2current - threshold) p_initial2previous = max(0.0, p_initial2previous - threshold) moved_objects_position_distance[object_id] = ( p_initial2current - p_initial2previous ) return sum(moved_objects_position_distance.values()) def step( self, action_dict: Dict[str, Union[str, int, float]] ) -> ai2thor.server.Event: """Take a step in the ai2thor environment.""" action = cast(str, action_dict["action"]) skip_render = "renderImage" in action_dict and not action_dict["renderImage"] last_frame: Optional[np.ndarray] = None if skip_render: last_frame = self.current_frame if self.simplify_physics: action_dict["simplifyPhysics"] = True if action in [PICKUP, DONE]: if action == PICKUP: object_id = action_dict["object_id"] if not self.is_object_at_low_level_hand(object_id): pickupable_objects = self.get_pickupable_objects() # if object_id in pickupable_objects: # This version of the task is actually harder # consider making it easier, are we penalizing failed pickup? yes self.step(dict(action="PickupObject")) # we are doing an additional pass here, label is not right and if we fail we will do it twice object_inventory = self.controller.last_event.metadata["arm"][ "heldObjects" ] if ( len(object_inventory) > 0 and object_id not in object_inventory ): self.step(dict(action="ReleaseObject")) action_dict = {"action": "Pass"} elif action in [MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT]: copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS) action_dict = {**action_dict, **copy_additions} if action in [MOVE_AHEAD]: action_dict["action"] = "MoveAgent" action_dict["ahead"] = 0.2 elif action in [ROTATE_RIGHT]: action_dict["action"] = "RotateAgent" action_dict["degrees"] = 45 elif action in [ROTATE_LEFT]: action_dict["action"] = "RotateAgent" action_dict["degrees"] = -45 elif "MoveArm" in action: copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS) action_dict = {**action_dict, **copy_additions} base_position = self.get_current_arm_state() if "MoveArmHeight" in action: action_dict["action"] = "MoveArmBase" if action == "MoveArmHeightP": base_position["h"] += MOVE_ARM_HEIGHT_CONSTANT if action == "MoveArmHeightM": base_position[ "h" ] -= MOVE_ARM_HEIGHT_CONSTANT # height is pretty big! action_dict["y"] = base_position["h"] else: action_dict["action"] = "MoveArm" if action == "MoveArmXP": base_position["x"] += MOVE_ARM_CONSTANT elif action == "MoveArmXM": base_position["x"] -= MOVE_ARM_CONSTANT elif action == "MoveArmYP": base_position["y"] += MOVE_ARM_CONSTANT elif action == "MoveArmYM": base_position["y"] -= MOVE_ARM_CONSTANT elif action == "MoveArmZP": base_position["z"] += MOVE_ARM_CONSTANT elif action == "MoveArmZM": base_position["z"] -= MOVE_ARM_CONSTANT action_dict["position"] = { k: v for (k, v) in base_position.items() if k in ["x", "y", "z"] } elif "RotateArm" in action: copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS) action_dict = {**action_dict, **copy_additions} if action == ROTATE_WRIST_PITCH_P: action_dict["action"] = "RotateWristRelative" action_dict["pitch"] = 15 elif action == ROTATE_WRIST_PITCH_M: action_dict["action"] = "RotateWristRelative" action_dict["pitch"] = -15 elif action == ROTATE_WRIST_YAW_P: action_dict["action"] = "RotateWristRelative" action_dict["yaw"] = 15 elif action == ROTATE_WRIST_YAW_M: action_dict["action"] = "RotateWristRelative" action_dict["yaw"] = -15 elif action == ROTATE_ELBOW_P: action_dict["action"] = "RotateElbowRelative" action_dict["degrees"] = 15 elif action == ROTATE_ELBOW_M: action_dict["action"] = "RotateElbowRelative" action_dict["degrees"] = -15 else: raise ValueError("invalid action " + str(action)) elif action in [LOOK_UP, LOOK_DOWN]: copy_additions = copy.deepcopy(ADDITIONAL_ARM_ARGS) action_dict = {**action_dict, **copy_additions} if action == LOOK_UP: action_dict["action"] = LOOK_UP elif action == LOOK_DOWN: action_dict["action"] = LOOK_DOWN # there exists other actions e.g. "PlaceObjectAtPoint" sr = self.controller.step(action_dict) self.list_of_actions_so_far.append(action_dict) if self._verbose: print(self.controller.last_event) if self.restrict_to_initially_reachable_points: self._snap_agent_to_initially_reachable() if skip_render: assert last_frame is not None self.last_event.frame = last_frame return sr ================================================ FILE: allenact_plugins/manipulathor_plugin/manipulathor_sensors.py ================================================ """Utility classes and functions for sensory inputs used by the models.""" from typing import Any, Union, Optional import gym import numpy as np from allenact.base_abstractions.sensor import Sensor from allenact.embodiedai.sensors.vision_sensors import DepthSensor, RGBSensor from allenact.base_abstractions.task import Task from allenact.utils.misc_utils import prepare_locals_for_super from allenact_plugins.manipulathor_plugin.arm_calculation_utils import ( world_coords_to_agent_coords, state_dict_to_tensor, diff_position, coord_system_transform, ) from allenact_plugins.manipulathor_plugin.manipulathor_environment import ( ManipulaTHOREnvironment, ) class DepthSensorThor( DepthSensor[ Union[ManipulaTHOREnvironment], Union[Task[ManipulaTHOREnvironment]], ] ): """Sensor for Depth images in THOR. Returns from a running ManipulaTHOREnvironment instance, the current RGB frame corresponding to the agent's egocentric view. """ def frame_from_env( self, env: ManipulaTHOREnvironment, task: Optional[Task] ) -> np.ndarray: return env.controller.last_event.depth_frame.copy() class NoVisionSensorThor( RGBSensor[ Union[ManipulaTHOREnvironment], Union[Task[ManipulaTHOREnvironment]], ] ): """Sensor for RGB images in THOR. Returns from a running ManipulaTHOREnvironment instance, the current RGB frame corresponding to the agent's egocentric view. """ def frame_from_env( self, env: ManipulaTHOREnvironment, task: Optional[Task] ) -> np.ndarray: return np.zeros_like(env.current_frame) class AgentRelativeCurrentObjectStateThorSensor(Sensor): def __init__(self, uuid: str = "relative_current_obj_state", **kwargs: Any): observation_space = gym.spaces.Box( low=-100, high=100, shape=(6,), dtype=np.float32 ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) super().__init__(**prepare_locals_for_super(locals())) def get_observation( self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: object_id = task.task_info["objectId"] current_object_state = env.get_object_by_id(object_id) relative_current_obj = world_coords_to_agent_coords( current_object_state, env.controller.last_event.metadata["agent"] ) result = state_dict_to_tensor( dict( position=relative_current_obj["position"], rotation=relative_current_obj["rotation"], ) ) return result class RelativeObjectToGoalSensor(Sensor): def __init__( self, uuid: str = "relative_obj_to_goal", coord_system: str = "xyz_unsigned", **kwargs: Any ): assert coord_system in [ "xyz_unsigned", "xyz_signed", "polar_radian", "polar_trigo", ] self.coord_system = coord_system if coord_system == "polar_trigo": obs_dim = 5 else: obs_dim = 3 observation_space = gym.spaces.Box( low=-100, high=100, shape=(obs_dim,), dtype=np.float32 ) super().__init__(**prepare_locals_for_super(locals())) def get_observation( self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: goal_obj_id = task.task_info["objectId"] object_info = env.get_object_by_id(goal_obj_id) target_state = task.task_info["target_location"] agent_state = env.controller.last_event.metadata["agent"] relative_current_obj = world_coords_to_agent_coords(object_info, agent_state) relative_goal_state = world_coords_to_agent_coords(target_state, agent_state) relative_distance = diff_position( relative_current_obj, relative_goal_state, absolute=False, ) result = coord_system_transform(relative_distance, self.coord_system) return result class InitialObjectToGoalSensor(Sensor): def __init__(self, uuid: str = "initial_obj_to_goal", **kwargs: Any): # observation_space = gym.spaces.Discrete(len(self.detector_types)) observation_space = gym.spaces.Box( low=-100, high=100, shape=(3,), dtype=np.float32 ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) super().__init__(**prepare_locals_for_super(locals())) def get_observation( self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: object_source_location = task.task_info["initial_object_location"] target_state = task.task_info["target_location"] agent_state = task.task_info["agent_initial_state"] relative_current_obj = world_coords_to_agent_coords( object_source_location, agent_state ) relative_goal_state = world_coords_to_agent_coords(target_state, agent_state) relative_distance = diff_position(relative_current_obj, relative_goal_state) result = state_dict_to_tensor(dict(position=relative_distance)) return result class DistanceObjectToGoalSensor(Sensor): def __init__(self, uuid: str = "distance_obj_to_goal", **kwargs: Any): # observation_space = gym.spaces.Discrete(len(self.detector_types)) observation_space = gym.spaces.Box( low=-100, high=100, shape=(3,), dtype=np.float32 ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) super().__init__(**prepare_locals_for_super(locals())) def get_observation( self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: goal_obj_id = task.task_info["objectId"] object_info = env.get_object_by_id(goal_obj_id) target_state = task.task_info["target_location"] agent_state = env.controller.last_event.metadata["agent"] relative_current_obj = world_coords_to_agent_coords(object_info, agent_state) relative_goal_state = world_coords_to_agent_coords(target_state, agent_state) relative_distance = diff_position(relative_current_obj, relative_goal_state) result = state_dict_to_tensor(dict(position=relative_distance)) result = ((result**2).sum() ** 0.5).view(1) return result class RelativeAgentArmToObjectSensor(Sensor): def __init__( self, uuid: str = "relative_agent_arm_to_obj", coord_system: str = "xyz_unsigned", **kwargs: Any ): assert coord_system in [ "xyz_unsigned", "xyz_signed", "polar_radian", "polar_trigo", ] self.coord_system = coord_system if coord_system == "polar_trigo": obs_dim = 5 else: obs_dim = 3 observation_space = gym.spaces.Box( low=-100, high=100, shape=(obs_dim,), dtype=np.float32 ) super().__init__(**prepare_locals_for_super(locals())) def get_observation( self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: goal_obj_id = task.task_info["objectId"] object_info = env.get_object_by_id(goal_obj_id) hand_state = env.get_absolute_hand_state() relative_goal_obj = world_coords_to_agent_coords( object_info, env.controller.last_event.metadata["agent"] ) relative_hand_state = world_coords_to_agent_coords( hand_state, env.controller.last_event.metadata["agent"] ) relative_distance = diff_position( relative_goal_obj, relative_hand_state, absolute=False, ) result = coord_system_transform(relative_distance, self.coord_system) return result class InitialAgentArmToObjectSensor(Sensor): def __init__(self, uuid: str = "initial_agent_arm_to_obj", **kwargs: Any): observation_space = gym.spaces.Box( low=-100, high=100, shape=(3,), dtype=np.float32 ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) super().__init__(**prepare_locals_for_super(locals())) def get_observation( self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: object_source_location = task.task_info["initial_object_location"] initial_hand_state = task.task_info["initial_hand_state"] relative_goal_obj = world_coords_to_agent_coords( object_source_location, env.controller.last_event.metadata["agent"] ) relative_hand_state = world_coords_to_agent_coords( initial_hand_state, env.controller.last_event.metadata["agent"] ) relative_distance = diff_position(relative_goal_obj, relative_hand_state) result = state_dict_to_tensor(dict(position=relative_distance)) return result class DistanceAgentArmToObjectSensor(Sensor): def __init__(self, uuid: str = "distance_agent_arm_to_obj", **kwargs: Any): observation_space = gym.spaces.Box( low=-100, high=100, shape=(3,), dtype=np.float32 ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) super().__init__(**prepare_locals_for_super(locals())) def get_observation( self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: goal_obj_id = task.task_info["objectId"] object_info = env.get_object_by_id(goal_obj_id) hand_state = env.get_absolute_hand_state() relative_goal_obj = world_coords_to_agent_coords( object_info, env.controller.last_event.metadata["agent"] ) relative_hand_state = world_coords_to_agent_coords( hand_state, env.controller.last_event.metadata["agent"] ) relative_distance = diff_position(relative_goal_obj, relative_hand_state) result = state_dict_to_tensor(dict(position=relative_distance)) result = ((result**2).sum() ** 0.5).view(1) return result class PickedUpObjSensor(Sensor): def __init__(self, uuid: str = "pickedup_object", **kwargs: Any): observation_space = gym.spaces.Box( low=0, high=1, shape=(1,), dtype=np.float32 ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) super().__init__(**prepare_locals_for_super(locals())) def get_observation( self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: return task.object_picked_up ================================================ FILE: allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.py ================================================ """Task Samplers for the task of ArmPointNav.""" import json import random from typing import List, Dict, Optional, Any, Union import gym from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import set_deterministic_cudnn, set_seed from allenact_plugins.manipulathor_plugin.manipulathor_environment import ( ManipulaTHOREnvironment, ) from allenact_plugins.manipulathor_plugin.manipulathor_tasks import ( AbstractPickUpDropOffTask, ArmPointNavTask, RotateArmPointNavTask, CamRotateArmPointNavTask, EasyArmPointNavTask, ) from allenact_plugins.manipulathor_plugin.manipulathor_utils import ( transport_wrapper, initialize_arm, ) from allenact_plugins.manipulathor_plugin.manipulathor_viz import ( ImageVisualizer, LoggerVisualizer, ) class AbstractMidLevelArmTaskSampler(TaskSampler): _TASK_TYPE = Task def __init__( self, scenes: List[str], sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, rewards_config: Dict, objects: List[str], scene_period: Optional[Union[int, str]] = None, max_tasks: Optional[int] = None, num_task_per_scene: Optional[int] = None, seed: Optional[int] = None, deterministic_cudnn: bool = False, fixed_tasks: Optional[List[Dict[str, Any]]] = None, visualizers: Optional[List[LoggerVisualizer]] = None, *args, **kwargs ) -> None: self.rewards_config = rewards_config self.env_args = env_args self.scenes = scenes self.grid_size = 0.25 self.env: Optional[ManipulaTHOREnvironment] = None self.sensors = sensors self.max_steps = max_steps self._action_space = action_space self.objects = objects self.num_task_per_scene = num_task_per_scene self.scene_counter: Optional[int] = None self.scene_order: Optional[List[str]] = None self.scene_id: Optional[int] = None self.scene_period: Optional[Union[str, int]] = ( scene_period # default makes a random choice ) self.max_tasks: Optional[int] = None self.reset_tasks = max_tasks self._last_sampled_task: Optional[Task] = None self.seed: Optional[int] = None self.set_seed(seed) if deterministic_cudnn: set_deterministic_cudnn() self.reset() self.visualizers = visualizers if visualizers is not None else [] self.sampler_mode = kwargs["sampler_mode"] self.cap_training = kwargs["cap_training"] def _create_environment(self, **kwargs) -> ManipulaTHOREnvironment: env = ManipulaTHOREnvironment( make_agents_visible=False, object_open_speed=0.05, env_args=self.env_args, ) return env @property def last_sampled_task(self) -> Optional[Task]: return self._last_sampled_task def close(self) -> None: if self.env is not None: self.env.stop() @property def all_observation_spaces_equal(self) -> bool: """Check if observation spaces equal. # Returns True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ return True def reset(self): self.scene_counter = 0 self.scene_order = list(range(len(self.scenes))) random.shuffle(self.scene_order) self.scene_id = 0 self.sampler_index = 0 self.max_tasks = self.reset_tasks def set_seed(self, seed: int): self.seed = seed if seed is not None: set_seed(seed) class SimpleArmPointNavGeneralSampler(AbstractMidLevelArmTaskSampler): _TASK_TYPE = AbstractPickUpDropOffTask def __init__(self, **kwargs) -> None: super(SimpleArmPointNavGeneralSampler, self).__init__(**kwargs) self.all_possible_points = [] for scene in self.scenes: for object in self.objects: valid_position_adr = "datasets/apnd-dataset/valid_object_positions/valid_{}_positions_in_{}.json".format( object, scene ) try: with open(valid_position_adr) as f: data_points = json.load(f) except Exception: print("Failed to load", valid_position_adr) continue visible_data = [ data for data in data_points[scene] if data["visibility"] ] self.all_possible_points += visible_data self.countertop_object_to_data_id = self.calc_possible_trajectories( self.all_possible_points ) scene_names = set( [ self.all_possible_points[counter[0]]["scene_name"] for counter in self.countertop_object_to_data_id.values() if len(counter) > 1 ] ) if len(set(scene_names)) < len(self.scenes): print("Not all scenes appear") print( "Len dataset", len(self.all_possible_points), "total_remained", sum([len(v) for v in self.countertop_object_to_data_id.values()]), ) if ( self.sampler_mode != "train" ): # Be aware that this totally overrides some stuff self.deterministic_data_list = [] for scene in self.scenes: for object in self.objects: valid_position_adr = "datasets/apnd-dataset/deterministic_tasks/tasks_{}_positions_in_{}.json".format( object, scene ) try: with open(valid_position_adr) as f: data_points = json.load(f) except Exception: print("Failed to load", valid_position_adr) continue visible_data = [ dict(scene=scene, index=i, datapoint=data) for (i, data) in enumerate(data_points[scene]) ] if self.num_task_per_scene is None: self.deterministic_data_list += visible_data else: # select a small number of data points for fast evaluation self.deterministic_data_list += visible_data[ : min(self.num_task_per_scene, len(visible_data)) ] if self.sampler_mode == "test": random.shuffle(self.deterministic_data_list) self.max_tasks = self.reset_tasks = len(self.deterministic_data_list) def next_task( self, force_advance_scene: bool = False ) -> Optional[AbstractPickUpDropOffTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.sampler_mode != "train" and self.length <= 0: return None source_data_point, target_data_point = self.get_source_target_indices() scene = source_data_point["scene_name"] assert source_data_point["object_id"] == target_data_point["object_id"] assert source_data_point["scene_name"] == target_data_point["scene_name"] if self.env is None: self.env = self._create_environment() self.env.reset( scene_name=scene, agentMode="arm", agentControllerType="mid-level" ) initialize_arm(self.env.controller) source_location = source_data_point target_location = dict( position=target_data_point["object_location"], rotation={"x": 0, "y": 0, "z": 0}, ) task_info = { "objectId": source_location["object_id"], "countertop_id": source_location["countertop_id"], "source_location": source_location, "target_location": target_location, } this_controller = self.env transport_wrapper( this_controller, source_location["object_id"], source_location["object_location"], ) agent_state = source_location["agent_pose"] this_controller.step( dict( action="TeleportFull", standing=True, x=agent_state["position"]["x"], y=agent_state["position"]["y"], z=agent_state["position"]["z"], rotation=dict( x=agent_state["rotation"]["x"], y=agent_state["rotation"]["y"], z=agent_state["rotation"]["z"], ), horizon=agent_state["cameraHorizon"], ) ) should_visualize_goal_start = [ x for x in self.visualizers if issubclass(type(x), ImageVisualizer) ] if len(should_visualize_goal_start) > 0: task_info["visualization_source"] = source_data_point task_info["visualization_target"] = target_data_point self._last_sampled_task = self._TASK_TYPE( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, visualizers=self.visualizers, reward_configs=self.rewards_config, ) return self._last_sampled_task @property def total_unique(self) -> Optional[Union[int, float]]: if self.sampler_mode == "train": return None else: return min(self.max_tasks, len(self.deterministic_data_list)) @property def length(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ return ( self.total_unique - self.sampler_index if self.sampler_mode != "train" else (float("inf") if self.max_tasks is None else self.max_tasks) ) def get_source_target_indices(self): if self.sampler_mode == "train": valid_countertops = [ k for (k, v) in self.countertop_object_to_data_id.items() if len(v) > 1 ] countertop_id = random.choice(valid_countertops) indices = random.sample(self.countertop_object_to_data_id[countertop_id], 2) result = ( self.all_possible_points[indices[0]], self.all_possible_points[indices[1]], ) else: result = self.deterministic_data_list[self.sampler_index]["datapoint"] self.sampler_index += 1 return result def calc_possible_trajectories(self, all_possible_points): object_to_data_id = {} for i in range(len(all_possible_points)): object_id = all_possible_points[i]["object_id"] object_to_data_id.setdefault(object_id, []) object_to_data_id[object_id].append(i) return object_to_data_id class ArmPointNavTaskSampler(SimpleArmPointNavGeneralSampler): _TASK_TYPE = ArmPointNavTask def __init__(self, **kwargs) -> None: super(ArmPointNavTaskSampler, self).__init__(**kwargs) possible_initial_locations = ( "datasets/apnd-dataset/valid_agent_initial_locations.json" ) if self.sampler_mode == "test": possible_initial_locations = ( "datasets/apnd-dataset/deterministic_valid_agent_initial_locations.json" ) with open(possible_initial_locations) as f: self.possible_agent_reachable_poses = json.load(f) def next_task( self, force_advance_scene: bool = False ) -> Optional[AbstractPickUpDropOffTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.sampler_mode != "train" and self.length <= 0: return None source_data_point, target_data_point = self.get_source_target_indices() scene = source_data_point["scene_name"] assert source_data_point["object_id"] == target_data_point["object_id"] assert source_data_point["scene_name"] == target_data_point["scene_name"] if self.env is None: self.env = self._create_environment() self.env.reset( scene_name=scene, agentMode="arm", agentControllerType="mid-level" ) initialize_arm(self.env.controller) source_location = source_data_point target_location = dict( position=target_data_point["object_location"], rotation={"x": 0, "y": 0, "z": 0}, countertop_id=target_data_point["countertop_id"], ) this_controller = self.env transport_wrapper( this_controller, source_location["object_id"], source_location["object_location"], ) agent_state = source_location[ "initial_agent_pose" ] # THe only line different from father this_controller.step( dict( action="TeleportFull", standing=True, x=agent_state["position"]["x"], y=agent_state["position"]["y"], z=agent_state["position"]["z"], rotation=dict( x=agent_state["rotation"]["x"], y=agent_state["rotation"]["y"], z=agent_state["rotation"]["z"], ), horizon=agent_state["cameraHorizon"], ) ) should_visualize_goal_start = [ x for x in self.visualizers if issubclass(type(x), ImageVisualizer) ] initial_object_info = self.env.get_object_by_id(source_location["object_id"]) initial_agent_location = self.env.controller.last_event.metadata["agent"] initial_hand_state = self.env.get_absolute_hand_state() task_info = { "objectId": source_location["object_id"], "source_location": source_location, # used in analysis "target_location": target_location, # used in analysis "agent_initial_state": initial_agent_location, # not used "initial_object_location": initial_object_info, # not used "initial_hand_state": initial_hand_state, } if len(should_visualize_goal_start) > 0: task_info["visualization_source"] = source_data_point task_info["visualization_target"] = target_data_point self._last_sampled_task = self._TASK_TYPE( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, visualizers=self.visualizers, reward_configs=self.rewards_config, ) return self._last_sampled_task def get_source_target_indices(self): if self.sampler_mode == "train": valid_countertops = [ k for (k, v) in self.countertop_object_to_data_id.items() if len(v) > 1 ] countertop_id = random.choice(valid_countertops) indices = random.sample(self.countertop_object_to_data_id[countertop_id], 2) result = ( self.all_possible_points[indices[0]], self.all_possible_points[indices[1]], ) scene_name = result[0]["scene_name"] selected_agent_init_loc = random.choice( self.possible_agent_reachable_poses[scene_name] ) initial_agent_pose = { "name": "agent", "position": { "x": selected_agent_init_loc["x"], "y": selected_agent_init_loc["y"], "z": selected_agent_init_loc["z"], }, "rotation": { "x": -0.0, "y": selected_agent_init_loc["rotation"], "z": 0.0, }, "cameraHorizon": selected_agent_init_loc["horizon"], "isStanding": True, } result[0]["initial_agent_pose"] = initial_agent_pose else: # agent init location needs to be fixed, therefore we load a fixed valid agent init that is previously randomized result = self.deterministic_data_list[self.sampler_index]["datapoint"] scene_name = self.deterministic_data_list[self.sampler_index]["scene"] datapoint_original_index = self.deterministic_data_list[self.sampler_index][ "index" ] selected_agent_init_loc = self.possible_agent_reachable_poses[scene_name][ datapoint_original_index ] initial_agent_pose = { "name": "agent", "position": { "x": selected_agent_init_loc["x"], "y": selected_agent_init_loc["y"], "z": selected_agent_init_loc["z"], }, "rotation": { "x": -0.0, "y": selected_agent_init_loc["rotation"], "z": 0.0, }, "cameraHorizon": selected_agent_init_loc["horizon"], "isStanding": True, } result[0]["initial_agent_pose"] = initial_agent_pose self.sampler_index += 1 return result class RotateArmPointNavTaskSampler(ArmPointNavTaskSampler): _TASK_TYPE = RotateArmPointNavTask class CamRotateArmPointNavTaskSampler(ArmPointNavTaskSampler): _TASK_TYPE = CamRotateArmPointNavTask class EasyArmPointNavTaskSampler(ArmPointNavTaskSampler): _TASK_TYPE = EasyArmPointNavTask def get_all_tuples_from_list(list): result = [] for first_ind in range(len(list) - 1): for second_ind in range(first_ind + 1, len(list)): result.append([list[first_ind], list[second_ind]]) return result ================================================ FILE: allenact_plugins/manipulathor_plugin/manipulathor_tasks.py ================================================ """Task Definions for the task of ArmPointNav.""" import copy from typing import Dict, Tuple, List, Any, Optional import gym import numpy as np from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task from allenact_plugins.manipulathor_plugin.armpointnav_constants import ( MOVE_ARM_CONSTANT, DISTANCE_EPS, ) from allenact_plugins.manipulathor_plugin.manipulathor_constants import ( MOVE_ARM_HEIGHT_P, MOVE_ARM_HEIGHT_M, MOVE_ARM_X_P, MOVE_ARM_X_M, MOVE_ARM_Y_P, MOVE_ARM_Y_M, MOVE_ARM_Z_P, MOVE_ARM_Z_M, ROTATE_WRIST_PITCH_P, ROTATE_WRIST_PITCH_M, ROTATE_WRIST_YAW_P, ROTATE_WRIST_YAW_M, ROTATE_ELBOW_P, ROTATE_ELBOW_M, LOOK_UP, LOOK_DOWN, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, PICKUP, DONE, ) from allenact_plugins.manipulathor_plugin.manipulathor_environment import ( ManipulaTHOREnvironment, position_distance, ) from allenact_plugins.manipulathor_plugin.manipulathor_viz import LoggerVisualizer class AbstractPickUpDropOffTask(Task[ManipulaTHOREnvironment]): _actions = ( MOVE_ARM_HEIGHT_P, MOVE_ARM_HEIGHT_M, MOVE_ARM_X_P, MOVE_ARM_X_M, MOVE_ARM_Y_P, MOVE_ARM_Y_M, MOVE_ARM_Z_P, MOVE_ARM_Z_M, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, ) # New commit of AI2THOR has some issue that the objects will vibrate a bit # without any external force. To eliminate the vibration effect, we have to # introduce _vibration_dist_dict when checking the disturbance, from an external csv file. # By default it is None, i.e. we assume there is no vibration. _vibration_dist_dict: Optional[Dict] = None def __init__( self, env: ManipulaTHOREnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, visualizers: Optional[List[LoggerVisualizer]] = None, **kwargs ) -> None: """Initializer. See class documentation for parameter definitions. """ super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self._took_end_action: bool = False self._success: Optional[bool] = False self._subsampled_locations_from_which_obj_visible: Optional[ List[Tuple[float, float, int, int]] ] = None self.visualizers = visualizers if visualizers is not None else [] self.start_visualize() self.action_sequence_and_success = [] self._took_end_action: bool = False self._success: Optional[bool] = False self._subsampled_locations_from_which_obj_visible: Optional[ List[Tuple[float, float, int, int]] ] = None # in allenact initialization is with 0.2 self.last_obj_to_goal_distance = None self.last_arm_to_obj_distance = None self.object_picked_up = False self.got_reward_for_pickup = False self.reward_configs = kwargs["reward_configs"] self.initial_object_locations = self.env.get_current_object_locations() @property def action_space(self): return gym.spaces.Discrete(len(self._actions)) def reached_terminal_state(self) -> bool: return self._took_end_action @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return cls._actions def close(self) -> None: self.env.stop() def obj_state_aproximity(self, s1, s2): # KIANA ignore rotation for now position1 = s1["position"] position2 = s2["position"] eps = MOVE_ARM_CONSTANT * 2 return ( abs(position1["x"] - position2["x"]) < eps and abs(position1["y"] - position2["y"]) < eps and abs(position1["z"] - position2["z"]) < eps ) def start_visualize(self): for visualizer in self.visualizers: if not visualizer.is_empty(): print("OH NO VISUALIZER WAS NOT EMPTY") visualizer.finish_episode(self.env, self, self.task_info) visualizer.finish_episode_metrics(self, self.task_info, None) visualizer.log(self.env) def visualize(self, action_str): for vizualizer in self.visualizers: vizualizer.log(self.env, action_str) def finish_visualizer(self): for visualizer in self.visualizers: visualizer.finish_episode(self.env, self, self.task_info) def finish_visualizer_metrics(self, metric_results): for visualizer in self.visualizers: visualizer.finish_episode_metrics(self, self.task_info, metric_results) def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: assert mode == "rgb", "only rgb rendering is implemented" return self.env.current_frame def calc_action_stat_metrics(self) -> Dict[str, Any]: action_stat = {"action_stat/" + action_str: 0.0 for action_str in self._actions} action_success_stat = { "action_success/" + action_str: 0.0 for action_str in self._actions } action_success_stat["action_success/total"] = 0.0 seq_len = len(self.action_sequence_and_success) for action_name, action_success in self.action_sequence_and_success: action_stat["action_stat/" + action_name] += 1.0 action_success_stat[ "action_success/{}".format(action_name) ] += action_success action_success_stat["action_success/total"] += action_success action_success_stat["action_success/total"] /= seq_len for action_name in self._actions: action_success_stat["action_success/{}".format(action_name)] /= max( action_stat["action_stat/" + action_name], 1.0 ) action_stat["action_stat/" + action_name] /= seq_len result = {**action_stat, **action_success_stat} return result def metrics(self) -> Dict[str, Any]: result = super(AbstractPickUpDropOffTask, self).metrics() if self.is_done(): result = {**result, **self.calc_action_stat_metrics()} # 1. goal object metrics final_obj_distance_from_goal = self.obj_distance_from_goal() result["average/final_obj_distance_from_goal"] = ( final_obj_distance_from_goal ) final_arm_distance_from_obj = self.arm_distance_from_obj() result["average/final_arm_distance_from_obj"] = final_arm_distance_from_obj final_obj_pickup = 1 if self.object_picked_up else 0 result["average/final_obj_pickup"] = final_obj_pickup original_distance = self.get_original_object_distance() + DISTANCE_EPS result["average/original_distance"] = original_distance # this ratio can be more than 1 if self.object_picked_up: ratio_distance_left = final_obj_distance_from_goal / original_distance result["average/ratio_distance_left"] = ratio_distance_left result["average/eplen_pickup"] = self.eplen_pickup # 2. disturbance with other objects current_object_locations = self.env.get_current_object_locations() objects_moved = self.env.get_objects_moved( self.initial_object_locations, current_object_locations, self.task_info["objectId"], self._vibration_dist_dict, ) result["disturbance/objects_moved_num"] = len(objects_moved) # 3. conditioned on success if self._success: result["average/eplen_success"] = result["ep_length"] result["average/success_wo_disturb"] = len(objects_moved) == 0 else: result["average/success_wo_disturb"] = 0.0 result["success"] = self._success self.finish_visualizer_metrics(result) self.finish_visualizer() self.action_sequence_and_success = [] return result def _step(self, action: int) -> RLStepResult: raise Exception("Not implemented") def arm_distance_from_obj(self): goal_obj_id = self.task_info["objectId"] object_info = self.env.get_object_by_id(goal_obj_id) hand_state = self.env.get_absolute_hand_state() return position_distance(object_info, hand_state) def obj_distance_from_goal(self): goal_obj_id = self.task_info["objectId"] object_info = self.env.get_object_by_id(goal_obj_id) goal_state = self.task_info["target_location"] return position_distance(object_info, goal_state) def get_original_object_distance(self): goal_obj_id = self.task_info["objectId"] s_init = dict(position=self.task_info["source_location"]["object_location"]) current_location = self.env.get_object_by_id(goal_obj_id) original_object_distance = position_distance(s_init, current_location) return original_object_distance def judge(self) -> float: """Compute the reward after having taken a step.""" raise Exception("Not implemented") class ArmPointNavTask(AbstractPickUpDropOffTask): _actions = ( MOVE_ARM_HEIGHT_P, MOVE_ARM_HEIGHT_M, MOVE_ARM_X_P, MOVE_ARM_X_M, MOVE_ARM_Y_P, MOVE_ARM_Y_M, MOVE_ARM_Z_P, MOVE_ARM_Z_M, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, PICKUP, DONE, ) def __init__( self, env: ManipulaTHOREnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, visualizers: Optional[List[LoggerVisualizer]] = None, **kwargs ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, visualizers=visualizers, **kwargs ) self.cumulated_disturb_distance_all = 0.0 self.cumulated_disturb_distance_visible = 0.0 # NOTE: visible distance can be negative, no determinitic relation with # all distance self.previous_object_locations = copy.deepcopy(self.initial_object_locations) self.current_penalized_distance = 0.0 # used in Sensor for auxiliary task def metrics(self) -> Dict[str, Any]: result = super(ArmPointNavTask, self).metrics() if self.is_done(): # add disturbance distance metrics result["disturbance/objects_moved_distance"] = ( self.cumulated_disturb_distance_all ) result["disturbance/objects_moved_distance_vis"] = ( self.cumulated_disturb_distance_visible ) return result def visualize(self, **kwargs): for vizualizer in self.visualizers: vizualizer.log(self.env, **kwargs) def _step(self, action: int) -> RLStepResult: action_str = self.class_action_names()[action] self._last_action_str = action_str action_dict = {"action": action_str} object_id = self.task_info["objectId"] if action_str == PICKUP: action_dict = {**action_dict, "object_id": object_id} self.env.step(action_dict) self.last_action_success = self.env.last_action_success last_action_name = self._last_action_str last_action_success = float(self.last_action_success) self.action_sequence_and_success.append((last_action_name, last_action_success)) # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward if not self.object_picked_up: if self.env.is_object_at_low_level_hand(object_id): self.object_picked_up = True self.eplen_pickup = ( self._num_steps_taken + 1 ) # plus one because this step has not been counted yet if action_str == DONE: self._took_end_action = True object_state = self.env.get_object_by_id(object_id) goal_state = self.task_info["target_location"] goal_achieved = self.object_picked_up and self.obj_state_aproximity( object_state, goal_state ) self.last_action_success = goal_achieved self._success = goal_achieved step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result def judge(self) -> float: """Compute the reward after having taken a step.""" reward = self.reward_configs["step_penalty"] if not self.last_action_success or ( self._last_action_str == PICKUP and not self.object_picked_up ): reward += self.reward_configs["failed_action_penalty"] if self._took_end_action: reward += ( self.reward_configs["goal_success_reward"] if self._success else self.reward_configs["failed_stop_reward"] ) # increase reward if object pickup and only do it once if not self.got_reward_for_pickup and self.object_picked_up: reward += self.reward_configs["pickup_success_reward"] self.got_reward_for_pickup = True current_obj_to_arm_distance = self.arm_distance_from_obj() if self.last_arm_to_obj_distance is None: delta_arm_to_obj_distance_reward = 0 else: delta_arm_to_obj_distance_reward = ( self.last_arm_to_obj_distance - current_obj_to_arm_distance ) self.last_arm_to_obj_distance = current_obj_to_arm_distance reward += delta_arm_to_obj_distance_reward current_obj_to_goal_distance = self.obj_distance_from_goal() if self.last_obj_to_goal_distance is None: delta_obj_to_goal_distance_reward = 0 else: delta_obj_to_goal_distance_reward = ( self.last_obj_to_goal_distance - current_obj_to_goal_distance ) self.last_obj_to_goal_distance = current_obj_to_goal_distance reward += delta_obj_to_goal_distance_reward # add disturbance cost ## here we measure disturbance by the sum of moving distance of all objects ## note that collided object may move for a while wo external force due to inertia ## and we may also consider mass current_object_locations = self.env.get_current_object_locations() disturb_distance_visible = self.env.get_objects_move_distance( initial_object_locations=self.initial_object_locations, previous_object_locations=self.previous_object_locations, current_object_locations=current_object_locations, target_object_id=self.task_info["objectId"], only_visible=True, thres_dict=self._vibration_dist_dict, ) disturb_distance_all = self.env.get_objects_move_distance( initial_object_locations=self.initial_object_locations, previous_object_locations=self.previous_object_locations, current_object_locations=current_object_locations, target_object_id=self.task_info["objectId"], only_visible=False, thres_dict=self._vibration_dist_dict, ) self.cumulated_disturb_distance_all += disturb_distance_all self.cumulated_disturb_distance_visible += disturb_distance_visible penalized_distance = ( disturb_distance_visible if self.reward_configs["disturb_visible"] else disturb_distance_all ) reward += self.reward_configs["disturb_penalty"] * penalized_distance self.current_penalized_distance = penalized_distance self.previous_object_locations = current_object_locations self.visualize( action_str=self._last_action_str, disturbance_str=str(round(penalized_distance, 4)), ) return float(reward) class RotateArmPointNavTask(ArmPointNavTask): _actions = ( MOVE_ARM_HEIGHT_P, MOVE_ARM_HEIGHT_M, MOVE_ARM_X_P, MOVE_ARM_X_M, MOVE_ARM_Y_P, MOVE_ARM_Y_M, MOVE_ARM_Z_P, MOVE_ARM_Z_M, ROTATE_WRIST_PITCH_P, ROTATE_WRIST_PITCH_M, ROTATE_WRIST_YAW_P, ROTATE_WRIST_YAW_M, ROTATE_ELBOW_P, ROTATE_ELBOW_M, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, PICKUP, DONE, ) class CamRotateArmPointNavTask(ArmPointNavTask): _actions = ( MOVE_ARM_HEIGHT_P, MOVE_ARM_HEIGHT_M, MOVE_ARM_X_P, MOVE_ARM_X_M, MOVE_ARM_Y_P, MOVE_ARM_Y_M, MOVE_ARM_Z_P, MOVE_ARM_Z_M, ROTATE_WRIST_PITCH_P, ROTATE_WRIST_PITCH_M, ROTATE_WRIST_YAW_P, ROTATE_WRIST_YAW_M, ROTATE_ELBOW_P, ROTATE_ELBOW_M, LOOK_UP, LOOK_DOWN, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, PICKUP, DONE, ) class EasyArmPointNavTask(ArmPointNavTask): _actions = ( MOVE_ARM_HEIGHT_P, MOVE_ARM_HEIGHT_M, MOVE_ARM_X_P, MOVE_ARM_X_M, MOVE_ARM_Y_P, MOVE_ARM_Y_M, MOVE_ARM_Z_P, MOVE_ARM_Z_M, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, # PICKUP, # DONE, ) def _step(self, action: int) -> RLStepResult: action_str = self.class_action_names()[action] self._last_action_str = action_str action_dict = {"action": action_str} object_id = self.task_info["objectId"] if action_str == PICKUP: action_dict = {**action_dict, "object_id": object_id} self.env.step(action_dict) self.last_action_success = self.env.last_action_success last_action_name = self._last_action_str last_action_success = float(self.last_action_success) self.action_sequence_and_success.append((last_action_name, last_action_success)) self.visualize(last_action_name) # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward if not self.object_picked_up: if ( object_id in self.env.controller.last_event.metadata["arm"]["pickupableObjects"] ): self.env.step(dict(action="PickupObject")) # we are doing an additional pass here, label is not right and if we fail we will do it twice object_inventory = self.env.controller.last_event.metadata["arm"][ "heldObjects" ] if len(object_inventory) > 0 and object_id not in object_inventory: self.env.step(dict(action="ReleaseObject")) if self.env.is_object_at_low_level_hand(object_id): self.object_picked_up = True self.eplen_pickup = ( self._num_steps_taken + 1 ) # plus one because this step has not been counted yet if self.object_picked_up: object_state = self.env.get_object_by_id(object_id) goal_state = self.task_info["target_location"] goal_achieved = self.object_picked_up and self.obj_state_aproximity( object_state, goal_state ) if goal_achieved: self._took_end_action = True self.last_action_success = goal_achieved self._success = goal_achieved step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result # def judge(self) -> float: Seems like we are fine on this ================================================ FILE: allenact_plugins/manipulathor_plugin/manipulathor_utils.py ================================================ import ai2thor from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from allenact_plugins.manipulathor_plugin.armpointnav_constants import ( get_agent_start_positions, ) from allenact_plugins.manipulathor_plugin.manipulathor_constants import ( ADDITIONAL_ARM_ARGS, ) def make_all_objects_unbreakable(controller): all_breakable_objects = [ o["objectType"] for o in controller.last_event.metadata["objects"] if o["breakable"] is True ] all_breakable_objects = set(all_breakable_objects) for obj_type in all_breakable_objects: controller.step(action="MakeObjectsOfTypeUnbreakable", objectType=obj_type) def reset_environment_and_additional_commands(controller, scene_name): controller.reset(scene_name) controller.step(action="MakeAllObjectsMoveable") controller.step(action="MakeObjectsStaticKinematicMassThreshold") make_all_objects_unbreakable(controller) return def transport_wrapper(controller, target_object, target_location): transport_detail = dict( action="PlaceObjectAtPoint", objectId=target_object, position=target_location, forceKinematic=True, ) advance_detail = dict(action="AdvancePhysicsStep", simSeconds=1.0) if issubclass(type(controller), IThorEnvironment): event = controller.step(transport_detail) controller.step(advance_detail) elif type(controller) == ai2thor.controller.Controller: event = controller.step(**transport_detail) controller.step(**advance_detail) else: raise NotImplementedError return event def initialize_arm(controller): # for start arm from high up, scene = controller.last_event.metadata["sceneName"] initial_pose = get_agent_start_positions()[scene] event1 = controller.step( dict( action="TeleportFull", standing=True, x=initial_pose["x"], y=initial_pose["y"], z=initial_pose["z"], rotation=dict(x=0, y=initial_pose["rotation"], z=0), horizon=initial_pose["horizon"], ) ) event2 = controller.step( dict(action="MoveArm", position=dict(x=0.0, y=0, z=0.35), **ADDITIONAL_ARM_ARGS) ) event3 = controller.step(dict(action="MoveArmBase", y=0.8, **ADDITIONAL_ARM_ARGS)) return event1, event2, event3 ================================================ FILE: allenact_plugins/manipulathor_plugin/manipulathor_viz.py ================================================ """Utility functions and classes for visualization and logging.""" import os from datetime import datetime import cv2 import imageio import matplotlib import matplotlib.cm as cm import matplotlib.pyplot as plt import numpy as np from allenact_plugins.manipulathor_plugin.manipulathor_utils import initialize_arm from allenact_plugins.manipulathor_plugin.manipulathor_utils import ( reset_environment_and_additional_commands, transport_wrapper, ) class LoggerVisualizer: def __init__(self, exp_name="", log_dir=""): if log_dir == "": log_dir = self.__class__.__name__ if exp_name == "": exp_name = "NoNameExp" self.exp_name = exp_name log_dir = os.path.join( exp_name, log_dir, ) self.log_dir = log_dir os.makedirs(self.log_dir, exist_ok=True) self.log_queue = [] self.action_queue = [] self.logger_index = 0 def log(self, environment, action_str): raise Exception("Not Implemented") def is_empty(self): return len(self.log_queue) == 0 def finish_episode_metrics(self, episode_info, task_info, metric_results): pass def finish_episode(self, environment, episode_info, task_info): pass class TestMetricLogger(LoggerVisualizer): def __init__(self, exp_name="", log_dir="", **kwargs): super().__init__(exp_name=exp_name, log_dir=log_dir) self.total_metric_dict = {} log_file_name = os.path.join(self.log_dir, "test_metric.txt") self.metric_log_file = open(log_file_name, "w") self.disturbance_distance_queue = [] def average_dict(self): result = {} for k, v in self.total_metric_dict.items(): result[k] = sum(v) / len(v) return result def finish_episode_metrics(self, episode_info, task_info, metric_results=None): if metric_results is None: print("had to reset") self.action_queue = [] self.disturbance_distance_queue = [] return for k in metric_results.keys(): if "metric" in k or k in ["ep_length", "reward", "success"]: self.total_metric_dict.setdefault(k, []) self.total_metric_dict[k].append(metric_results[k]) print( "total", len(self.total_metric_dict["success"]), "average test metric", self.average_dict(), ) # save the task info and all the action queue and results log_dict = { "logger_number": self.logger_index, "action_sequence": self.action_queue, "disturbance_sequence": self.disturbance_distance_queue, "task_info_metrics": metric_results, } self.logger_index += 1 self.metric_log_file.write(str(log_dict)) self.metric_log_file.write("\n") self.metric_log_file.flush() print("Logging to", self.metric_log_file.name) self.action_queue = [] self.disturbance_distance_queue = [] def log(self, environment, action_str="", disturbance_str=""): # We can add agent arm and state location if needed self.action_queue.append(action_str) self.disturbance_distance_queue.append(disturbance_str) class BringObjImageVisualizer(LoggerVisualizer): def finish_episode(self, environment, episode_info, task_info): now = datetime.now() time_to_write = now.strftime("%Y_%m_%d_%H_%M_%S_%f") time_to_write += "log_ind_{}".format(self.logger_index) self.logger_index += 1 print("Loggigng", time_to_write, "len", len(self.log_queue)) source_object_id = task_info["source_object_id"] goal_object_id = task_info["goal_object_id"] pickup_success = episode_info.object_picked_up episode_success = episode_info._success # Put back if you want the images # for i, img in enumerate(self.log_queue): # image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i))) # cv2.imwrite(image_dir, img[:,:,[2,1,0]]) episode_success_offset = "succ" if episode_success else "fail" pickup_success_offset = "succ" if pickup_success else "fail" gif_name = ( time_to_write + "_from_" + source_object_id.split("|")[0] + "_to_" + goal_object_id.split("|")[0] + "_pickup_" + pickup_success_offset + "_episode_" + episode_success_offset + ".gif" ) concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1) save_image_list_to_gif(concat_all_images, gif_name, self.log_dir) this_controller = environment.controller scene = this_controller.last_event.metadata["sceneName"] reset_environment_and_additional_commands(this_controller, scene) self.log_start_goal( environment, task_info["visualization_source"], tag="start", img_adr=os.path.join(self.log_dir, time_to_write), ) self.log_start_goal( environment, task_info["visualization_target"], tag="goal", img_adr=os.path.join(self.log_dir, time_to_write), ) self.log_queue = [] self.action_queue = [] def log(self, environment, action_str): image_tensor = environment.current_frame self.action_queue.append(action_str) self.log_queue.append(image_tensor) def log_start_goal(self, env, task_info, tag, img_adr): object_location = task_info["object_location"] object_id = task_info["object_id"] agent_state = task_info["agent_pose"] this_controller = env.controller # We should not reset here # for start arm from high up as a cheating, this block is very important. never remove event1, event2, event3 = initialize_arm(this_controller) if not ( event1.metadata["lastActionSuccess"] and event2.metadata["lastActionSuccess"] and event3.metadata["lastActionSuccess"] ): print("ERROR: ARM MOVEMENT FAILED in logging! SHOULD NEVER HAPPEN") event = transport_wrapper(this_controller, object_id, object_location) if not event.metadata["lastActionSuccess"]: print("ERROR: oh no could not transport in logging") event = this_controller.step( dict( action="TeleportFull", standing=True, x=agent_state["position"]["x"], y=agent_state["position"]["y"], z=agent_state["position"]["z"], rotation=dict( x=agent_state["rotation"]["x"], y=agent_state["rotation"]["y"], z=agent_state["rotation"]["z"], ), horizon=agent_state["cameraHorizon"], ) ) if not event.metadata["lastActionSuccess"]: print("ERROR: oh no could not teleport in logging") image_tensor = this_controller.last_event.frame image_dir = ( img_adr + "_obj_" + object_id.split("|")[0] + "_pickup_" + tag + ".png" ) cv2.imwrite(image_dir, image_tensor[:, :, [2, 1, 0]]) # Saving the mask target_object_id = task_info["object_id"] all_visible_masks = this_controller.last_event.instance_masks if target_object_id in all_visible_masks: mask_frame = all_visible_masks[target_object_id] else: mask_frame = np.zeros(env.controller.last_event.frame[:, :, 0].shape) mask_dir = ( img_adr + "_obj_" + object_id.split("|")[0] + "_pickup_" + tag + "_mask.png" ) cv2.imwrite(mask_dir, mask_frame.astype(float) * 255.0) class ImageVisualizer(LoggerVisualizer): def __init__( self, exp_name="", log_dir="", add_top_down_view: bool = False, add_depth_map: bool = False, ): super().__init__(exp_name=exp_name, log_dir=log_dir) self.add_top_down_view = add_top_down_view self.add_depth_map = add_depth_map if self.add_top_down_view: self.top_down_queue = [] self.disturbance_distance_queue = [] def finish_episode(self, environment, episode_info, task_info): time_to_write = "log_ind_{:03d}".format(self.logger_index) self.logger_index += 1 print("Logging", time_to_write, "len", len(self.log_queue)) object_id = task_info["objectId"] scene_name = task_info["source_location"]["scene_name"] source_countertop = task_info["source_location"]["countertop_id"] target_countertop = task_info["target_location"]["countertop_id"] pickup_success = episode_info.object_picked_up episode_success = episode_info._success # Put back if you want the images # for i, img in enumerate(self.log_queue): # image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i))) # cv2.imwrite(image_dir, img[:,:,[2,1,0]]) episode_success_offset = "succ" if episode_success else "fail" pickup_success_offset = "succ" if pickup_success else "fail" gif_name = ( time_to_write + "_pickup_" + pickup_success_offset + "_episode_" + episode_success_offset + "_" + scene_name.split("_")[0] + "_obj_" + object_id.split("|")[0] + "_from_" + source_countertop.split("|")[0] + "_to_" + target_countertop.split("|")[0] + ".gif" ) self.log_queue = put_annotation_on_image( self.log_queue, self.disturbance_distance_queue ) concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1) if self.add_top_down_view: topdown_all_images = np.expand_dims( np.stack(self.top_down_queue, axis=0), axis=1 ) # (T, 1, H, W, 3) concat_all_images = np.concatenate( [concat_all_images, topdown_all_images], axis=1 ) # (T, 2, H, W, 3) save_image_list_to_gif(concat_all_images, gif_name, self.log_dir) self.log_start_goal( environment, task_info["visualization_source"], tag="start", img_adr=os.path.join(self.log_dir, time_to_write), ) self.log_start_goal( environment, task_info["visualization_target"], tag="goal", img_adr=os.path.join(self.log_dir, time_to_write), ) self.log_queue = [] self.action_queue = [] self.disturbance_distance_queue = [] if self.add_top_down_view: self.top_down_queue = [] def log(self, environment, action_str="", disturbance_str=""): self.action_queue.append(action_str) self.disturbance_distance_queue.append(disturbance_str) image_tensor = environment.current_frame self.log_queue.append(image_tensor) if self.add_top_down_view: # Reference: https://github.com/allenai/ai2thor/pull/814 event = environment.controller.step(action="GetMapViewCameraProperties") event = environment.controller.step( action="AddThirdPartyCamera", **event.metadata["actionReturn"] ) self.top_down_queue.append(event.third_party_camera_frames[0]) def log_start_goal(self, env, task_info, tag, img_adr): object_location = task_info["object_location"] object_id = task_info["object_id"] agent_state = task_info["agent_pose"] this_controller = env.controller scene = this_controller.last_event.metadata[ "sceneName" ] # maybe we need to reset env actually] reset_environment_and_additional_commands(this_controller, scene) # for start arm from high up as a cheating, this block is very important. never remove event1, event2, event3 = initialize_arm(this_controller) if not ( event1.metadata["lastActionSuccess"] and event2.metadata["lastActionSuccess"] and event3.metadata["lastActionSuccess"] ): print("ERROR: ARM MOVEMENT FAILED in logging! SHOULD NEVER HAPPEN") event = transport_wrapper(this_controller, object_id, object_location) if not event.metadata["lastActionSuccess"]: print("ERROR: oh no could not transport in logging") event = this_controller.step( dict( action="TeleportFull", standing=True, x=agent_state["position"]["x"], y=agent_state["position"]["y"], z=agent_state["position"]["z"], rotation=dict( x=agent_state["rotation"]["x"], y=agent_state["rotation"]["y"], z=agent_state["rotation"]["z"], ), horizon=agent_state["cameraHorizon"], ) ) if not event.metadata["lastActionSuccess"]: print("ERROR: oh no could not teleport in logging") image_tensor = this_controller.last_event.frame image_dir = img_adr + "_" + tag + ".png" cv2.imwrite(image_dir, image_tensor[:, :, [2, 1, 0]]) if self.add_depth_map: depth = this_controller.last_event.depth_frame.copy() # (H, W) depth[depth > 5.0] = 5.0 norm = matplotlib.colors.Normalize(vmin=depth.min(), vmax=depth.max()) rgb = cm.get_cmap(plt.get_cmap("viridis"))(norm(depth))[:, :, :3] # [0,1] rgb = (rgb * 255).astype(np.uint8) depth_dir = img_adr + "_" + tag + "_depth.png" cv2.imwrite(depth_dir, rgb[:, :, [2, 1, 0]]) def save_image_list_to_gif(image_list, gif_name, gif_dir): gif_adr = os.path.join(gif_dir, gif_name) seq_len, cols, w, h, c = image_list.shape pallet = np.zeros( (seq_len, w, h * cols, c) ) # to support multiple animations in one gif for col_ind in range(cols): pallet[:, :, col_ind * h : (col_ind + 1) * h, :] = image_list[:, col_ind] if not os.path.exists(gif_dir): os.makedirs(gif_dir) imageio.mimsave(gif_adr, pallet.astype(np.uint8), format="GIF", duration=1 / 5) print("Saved result in ", gif_adr) def put_annotation_on_image(images, annotations): all_images = [] for img, annot in zip(images, annotations): position = (10, 10) from PIL import Image, ImageDraw pil_img = Image.fromarray(img) draw = ImageDraw.Draw(pil_img) draw.text(position, annot, (0, 0, 0)) all_images.append(np.array(pil_img)) return all_images ================================================ FILE: allenact_plugins/minigrid_plugin/__init__.py ================================================ from allenact.utils.system import ImportChecker with ImportChecker( "\n\nPlease install babyai with:\n\n" "pip install -e git+https://github.com/Lucaweihs/babyai.git@0b450eeb3a2dc7116c67900d51391986bdbb84cd#egg=babyai\n", ): import babyai ================================================ FILE: allenact_plugins/minigrid_plugin/configs/__init__.py ================================================ ================================================ FILE: allenact_plugins/minigrid_plugin/configs/minigrid_nomemory.py ================================================ """Experiment Config for MiniGrid tutorial.""" import gym import torch.nn as nn from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConv from allenact_plugins.minigrid_plugin.minigrid_tasks import MiniGridTask from projects.tutorials.minigrid_tutorial import MiniGridTutorialExperimentConfig class MiniGridNoMemoryExperimentConfig(MiniGridTutorialExperimentConfig): @classmethod def tag(cls) -> str: return "MiniGridNoMemory" @classmethod def create_model(cls, **kwargs) -> nn.Module: return MiniGridSimpleConv( action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())), observation_space=SensorSuite(cls.SENSORS).observation_spaces, num_objects=cls.SENSORS[0].num_objects, num_colors=cls.SENSORS[0].num_colors, num_states=cls.SENSORS[0].num_states, ) ================================================ FILE: allenact_plugins/minigrid_plugin/data/__init__.py ================================================ ================================================ FILE: allenact_plugins/minigrid_plugin/extra_environment.yml ================================================ dependencies: - patsy>=0.5.1 - pip - pip: - gym-minigrid>=1.0.1 - pickle5 ================================================ FILE: allenact_plugins/minigrid_plugin/extra_requirements.txt ================================================ patsy>=0.5.1 gym-minigrid>=1.0.1 pickle5 ================================================ FILE: allenact_plugins/minigrid_plugin/minigrid_environments.py ================================================ import copy from typing import Optional, Set import numpy as np from gym import register from gym_minigrid.envs import CrossingEnv from gym_minigrid.minigrid import Lava, Wall class FastCrossing(CrossingEnv): """Similar to `CrossingEnv`, but to support faster task sampling as per `repeat_failed_task_for_min_steps` flag in MiniGridTaskSampler.""" def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None): self.init_agent_pos: Optional[np.ndarray] = None self.init_agent_dir: Optional[int] = None self.step_count: Optional[int] = None super(FastCrossing, self).__init__( size=size, num_crossings=num_crossings, obstacle_type=obstacle_type, seed=seed, ) def same_seed_reset(self): assert self.init_agent_pos is not None # Current position and direction of the agent self.agent_pos = self.init_agent_pos self.agent_dir = self.init_agent_dir # Check that the agent doesn't overlap with an object start_cell = self.grid.get(*self.agent_pos) assert start_cell is None or start_cell.can_overlap() assert self.carrying is None # Step count since episode start self.step_count = 0 # Return first observation obs = self.gen_obs() return obs def reset(self, partial_reset: bool = False): super(FastCrossing, self).reset() self.init_agent_pos = copy.deepcopy(self.agent_pos) self.init_agent_dir = self.agent_dir class AskForHelpSimpleCrossing(CrossingEnv): """Corresponds to WC FAULTY SWITCH environment.""" def __init__( self, size=9, num_crossings=1, obstacle_type=Wall, seed=None, exploration_reward: Optional[float] = None, death_penalty: Optional[float] = None, toggle_is_permenant: bool = False, ): self.init_agent_pos: Optional[np.ndarray] = None self.init_agent_dir: Optional[int] = None self.should_reveal_image: bool = False self.exploration_reward = exploration_reward self.death_penalty = death_penalty self.explored_points: Set = set() self._was_successful = False self.toggle_is_permanent = toggle_is_permenant self.step_count: Optional[int] = None super(AskForHelpSimpleCrossing, self).__init__( size=size, num_crossings=num_crossings, obstacle_type=obstacle_type, seed=seed, ) @property def was_successful(self) -> bool: return self._was_successful def gen_obs(self): obs = super(AskForHelpSimpleCrossing, self).gen_obs() if not self.should_reveal_image: obs["image"] *= 0 return obs def metrics(self): return { "explored_count": len(self.explored_points), "final_distance": float( min( abs(x - (self.width - 2)) + abs(y - (self.height - 2)) for x, y in self.explored_points ) ), } def step(self, action: int): """Reveal the observation only if the `toggle` action is executed.""" if action == self.actions.toggle: self.should_reveal_image = True else: self.should_reveal_image = ( self.should_reveal_image and self.toggle_is_permanent ) minigrid_obs, reward, done, info = super(AskForHelpSimpleCrossing, self).step( action=action ) assert not self._was_successful, "Called step after done." self._was_successful = self._was_successful or (reward > 0) if ( done and self.steps_remaining != 0 and (not self._was_successful) and self.death_penalty is not None ): reward += self.death_penalty t = tuple(self.agent_pos) if self.exploration_reward is not None: if t not in self.explored_points: reward += self.exploration_reward self.explored_points.add(t) return minigrid_obs, reward, done, info def same_seed_reset(self): assert self.init_agent_pos is not None self._was_successful = False # Current position and direction of the agent self.agent_pos = self.init_agent_pos self.agent_dir = self.init_agent_dir self.explored_points.clear() self.explored_points.add(tuple(self.agent_pos)) self.should_reveal_image = False # Check that the agent doesn't overlap with an object start_cell = self.grid.get(*self.agent_pos) assert start_cell is None or start_cell.can_overlap() assert self.carrying is None # Step count since episode start self.step_count = 0 # Return first observation obs = self.gen_obs() return obs def reset(self, partial_reset: bool = False): super(AskForHelpSimpleCrossing, self).reset() self.explored_points.clear() self.explored_points.add(tuple(self.agent_pos)) self.init_agent_pos = copy.deepcopy(self.agent_pos) self.init_agent_dir = self.agent_dir self._was_successful = False self.should_reveal_image = False class LavaCrossingS25N10(CrossingEnv): def __init__(self): super(LavaCrossingS25N10, self).__init__(size=25, num_crossings=10) class LavaCrossingS15N7(CrossingEnv): def __init__(self): super(LavaCrossingS15N7, self).__init__(size=15, num_crossings=7) class LavaCrossingS11N7(CrossingEnv): def __init__(self): super(LavaCrossingS11N7, self).__init__(size=9, num_crossings=4) register( id="MiniGrid-LavaCrossingS25N10-v0", entry_point="allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS25N10", ) register( id="MiniGrid-LavaCrossingS15N7-v0", entry_point="allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS15N7", ) register( id="MiniGrid-LavaCrossingS11N7-v0", entry_point="allenact_plugins.minigrid_plugin.minigrid_environments:LavaCrossingS11N7", ) ================================================ FILE: allenact_plugins/minigrid_plugin/minigrid_models.py ================================================ import abc from typing import Callable, Dict, Optional, Tuple, cast import gym import numpy as np import torch from gym.spaces.dict import Dict as SpaceDict import torch.nn as nn from allenact.algorithms.onpolicy_sync.policy import ( ActorCriticModel, Memory, DistributionType, ActorCriticOutput, ObservationType, ) from allenact.base_abstractions.distributions import Distr, CategoricalDistr from allenact.embodiedai.models.basic_models import LinearActorCritic, RNNActorCritic from allenact.utils.misc_utils import prepare_locals_for_super class MiniGridSimpleConvBase(ActorCriticModel[Distr], abc.ABC): actor_critic: ActorCriticModel def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, num_objects: int, num_colors: int, num_states: int, object_embedding_dim: int = 8, **kwargs, ): super().__init__(action_space=action_space, observation_space=observation_space) self.num_objects = num_objects self.object_embedding_dim = object_embedding_dim vis_input_shape = observation_space["minigrid_ego_image"].shape agent_view_x, agent_view_y, view_channels = vis_input_shape assert agent_view_x == agent_view_y self.agent_view = agent_view_x self.view_channels = view_channels assert (np.array(vis_input_shape[:2]) >= 3).all(), ( "MiniGridSimpleConvRNN requires" "that the input size be at least 3x3." ) self.num_channels = 0 if self.num_objects > 0: # Object embedding self.object_embedding = nn.Embedding( num_embeddings=num_objects, embedding_dim=self.object_embedding_dim ) self.object_channel = self.num_channels self.num_channels += 1 self.num_colors = num_colors if self.num_colors > 0: # Same dimensionality used for colors and states self.color_embedding = nn.Embedding( num_embeddings=num_colors, embedding_dim=self.object_embedding_dim ) self.color_channel = self.num_channels self.num_channels += 1 self.num_states = num_states if self.num_states > 0: self.state_embedding = nn.Embedding( num_embeddings=num_states, embedding_dim=self.object_embedding_dim ) self.state_channel = self.num_channels self.num_channels += 1 assert self.num_channels == self.view_channels > 0 self.ac_key = "enc" self.observations_for_ac: Dict[str, Optional[torch.Tensor]] = { self.ac_key: None } self.num_agents = 1 def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: minigrid_ego_image = cast(torch.Tensor, observations["minigrid_ego_image"]) use_agent = minigrid_ego_image.shape == 6 nrow, ncol, nchannels = minigrid_ego_image.shape[-3:] nsteps, nsamplers, nagents = masks.shape[:3] assert nrow == ncol == self.agent_view assert nchannels == self.view_channels == self.num_channels embed_list = [] if self.num_objects > 0: ego_object_embeds = self.object_embedding( minigrid_ego_image[..., self.object_channel].long() ) embed_list.append(ego_object_embeds) if self.num_colors > 0: ego_color_embeds = self.color_embedding( minigrid_ego_image[..., self.color_channel].long() ) embed_list.append(ego_color_embeds) if self.num_states > 0: ego_state_embeds = self.state_embedding( minigrid_ego_image[..., self.state_channel].long() ) embed_list.append(ego_state_embeds) ego_embeds = torch.cat(embed_list, dim=-1) if use_agent: self.observations_for_ac[self.ac_key] = ego_embeds.view( nsteps, nsamplers, nagents, -1 ) else: self.observations_for_ac[self.ac_key] = ego_embeds.view( nsteps, nsamplers * nagents, -1 ) # noinspection PyCallingNonCallable out, mem_return = self.actor_critic( observations=self.observations_for_ac, memory=memory, prev_actions=prev_actions, masks=masks, ) self.observations_for_ac[self.ac_key] = None return out, mem_return class MiniGridSimpleConvRNN(MiniGridSimpleConvBase): def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, num_objects: int, num_colors: int, num_states: int, object_embedding_dim: int = 8, hidden_size=512, num_layers=1, rnn_type="GRU", head_type: Callable[ ..., ActorCriticModel[CategoricalDistr] ] = LinearActorCritic, **kwargs, ): super().__init__(**prepare_locals_for_super(locals())) self._hidden_size = hidden_size agent_view_x, agent_view_y, view_channels = observation_space[ "minigrid_ego_image" ].shape self.actor_critic = RNNActorCritic( input_uuid=self.ac_key, action_space=action_space, observation_space=SpaceDict( { self.ac_key: gym.spaces.Box( low=np.float32(-1.0), high=np.float32(1.0), shape=( self.object_embedding_dim * agent_view_x * agent_view_y * view_channels, ), ) } ), hidden_size=hidden_size, num_layers=num_layers, rnn_type=rnn_type, head_type=head_type, ) self.memory_key = "rnn" self.train() @property def num_recurrent_layers(self): return self.actor_critic.num_recurrent_layers @property def recurrent_hidden_state_size(self): return self._hidden_size def _recurrent_memory_specification(self): return { self.memory_key: ( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) } class MiniGridSimpleConv(MiniGridSimpleConvBase): def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, num_objects: int, num_colors: int, num_states: int, object_embedding_dim: int = 8, **kwargs, ): super().__init__(**prepare_locals_for_super(locals())) agent_view_x, agent_view_y, view_channels = observation_space[ "minigrid_ego_image" ].shape self.actor_critic = LinearActorCritic( self.ac_key, action_space=action_space, observation_space=SpaceDict( { self.ac_key: gym.spaces.Box( low=np.float32(-1.0), high=np.float32(1.0), shape=( self.object_embedding_dim * agent_view_x * agent_view_y * view_channels, ), ) } ), ) self.memory_key = None self.train() @property def num_recurrent_layers(self): return 0 @property def recurrent_hidden_state_size(self): return 0 # noinspection PyMethodMayBeStatic def _recurrent_memory_specification(self): return None ================================================ FILE: allenact_plugins/minigrid_plugin/minigrid_offpolicy.py ================================================ import math import queue import random from collections import defaultdict from typing import Dict, Tuple, Any, cast, List, Union, Optional import babyai import blosc import numpy as np import pickle5 as pickle import torch from gym_minigrid.minigrid import MiniGridEnv from allenact.algorithms.offpolicy_sync.losses.abstract_offpolicy_loss import Memory from allenact.algorithms.onpolicy_sync.policy import ObservationType from allenact.algorithms.onpolicy_sync.storage import ( ExperienceStorage, StreamingStorageMixin, ) from allenact.base_abstractions.misc import GenericAbstractLoss, LossOutput, ModelType from allenact.utils.misc_utils import partition_limits from allenact.utils.system import get_logger from allenact_plugins.minigrid_plugin.minigrid_sensors import MiniGridMissionSensor _DATASET_CACHE: Dict[str, Any] = {} class MiniGridOffPolicyExpertCELoss(GenericAbstractLoss): def __init__(self, total_episodes_in_epoch: Optional[int] = None): super().__init__() self.total_episodes_in_epoch = total_episodes_in_epoch def loss( # type: ignore self, *, # No positional arguments model: ModelType, batch: ObservationType, batch_memory: Memory, stream_memory: Memory, ) -> LossOutput: rollout_len, nrollouts = cast(torch.Tensor, batch["minigrid_ego_image"]).shape[ :2 ] # Initialize Memory if empty if len(stream_memory) == 0: spec = model.recurrent_memory_specification for key in spec: dims_template, dtype = spec[key] # get sampler_dim and all_dims from dims_template (and nrollouts) dim_names = [d[0] for d in dims_template] sampler_dim = dim_names.index("sampler") all_dims = [d[1] for d in dims_template] all_dims[sampler_dim] = nrollouts stream_memory.check_append( key=key, tensor=torch.zeros( *all_dims, dtype=dtype, device=cast(torch.Tensor, batch["minigrid_ego_image"]).device, ), sampler_dim=sampler_dim, ) # Forward data (through the actor and critic) ac_out, stream_memory = model.forward( observations=batch, memory=stream_memory, prev_actions=None, # type:ignore masks=cast(torch.FloatTensor, batch["masks"]), ) # Compute the loss from the actor's output and expert action expert_ce_loss = -ac_out.distributions.log_prob(batch["expert_action"]).mean() info = {"expert_ce": expert_ce_loss.item()} return LossOutput( value=expert_ce_loss, info=info, per_epoch_info={}, batch_memory=batch_memory, stream_memory=stream_memory, bsize=rollout_len * nrollouts, ) def transform_demos(demos): # A modified version of babyai.utils.demos.transform_demos # where we use pickle 5 instead of standard pickle new_demos = [] for demo in demos: new_demo = [] mission = demo[0] all_images = demo[1] directions = demo[2] actions = demo[3] # First decompress the pickle pickled_array = blosc.blosc_extension.decompress(all_images, False) # ... and unpickle all_images = pickle.loads(pickled_array) n_observations = all_images.shape[0] assert ( len(directions) == len(actions) == n_observations ), "error transforming demos" for i in range(n_observations): obs = { "image": all_images[i], "direction": directions[i], "mission": mission, } action = actions[i] done = i == n_observations - 1 new_demo.append((obs, action, done)) new_demos.append(new_demo) return new_demos class MiniGridExpertTrajectoryStorage(ExperienceStorage, StreamingStorageMixin): def __init__( self, data_path: str, num_samplers: int, rollout_len: int, instr_len: Optional[int], restrict_max_steps_in_dataset: Optional[int] = None, device: torch.device = torch.device("cpu"), ): super(MiniGridExpertTrajectoryStorage, self).__init__() self.data_path = data_path self._data: Optional[ List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]] ] = None self.restrict_max_steps_in_dataset = restrict_max_steps_in_dataset self.original_num_samplers = num_samplers self.num_samplers = num_samplers self.rollout_len = rollout_len self.instr_len = instr_len self.current_worker = 0 self.num_workers = 1 self.minigrid_mission_sensor: Optional[MiniGridMissionSensor] = None if instr_len is not None: self.minigrid_mission_sensor = MiniGridMissionSensor(instr_len) self.rollout_queues = [] self._remaining_inds = [] self.sampler_to_num_steps_in_queue = [] self._total_experiences = 0 self.device = device @property def data(self) -> List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]: if self._data is None: if self.data_path not in _DATASET_CACHE: get_logger().info( f"Loading minigrid dataset from {self.data_path} for first time..." ) _DATASET_CACHE[self.data_path] = babyai.utils.load_demos(self.data_path) assert ( _DATASET_CACHE[self.data_path] is not None and len(_DATASET_CACHE[self.data_path]) != 0 ) get_logger().info( "Loading minigrid dataset complete, it contains {} trajectories".format( len(_DATASET_CACHE[self.data_path]) ) ) self._data = _DATASET_CACHE[self.data_path] if self.restrict_max_steps_in_dataset is not None: restricted_data = [] cur_len = 0 for i, d in enumerate(self._data): if cur_len >= self.restrict_max_steps_in_dataset: break restricted_data.append(d) cur_len += len(d[2]) self._data = restricted_data parts = partition_limits(len(self._data), self.num_workers) self._data = self._data[ parts[self.current_worker] : parts[self.current_worker + 1] ] self.rollout_queues = [queue.Queue() for _ in range(self.num_samplers)] self.sampler_to_num_steps_in_queue = [0 for _ in range(self.num_samplers)] for it, q in enumerate(self.rollout_queues): self._fill_rollout_queue(q, it) return self._data def set_partition(self, index: int, num_parts: int): self.current_worker = index self.num_workers = num_parts self.num_samplers = int(math.ceil(self.original_num_samplers / num_parts)) self._data = None for q in self.rollout_queues: try: while True: q.get_nowait() except queue.Empty: pass self.rollout_queues = [] def initialize(self, *, observations: ObservationType, **kwargs): self.reset_stream() assert len(self.data) != 0 def add( self, observations: ObservationType, memory: Optional[Memory], actions: torch.Tensor, action_log_probs: torch.Tensor, value_preds: torch.Tensor, rewards: torch.Tensor, masks: torch.Tensor, ): pass def to(self, device: torch.device): self.device = device @property def total_experiences(self) -> int: return self._total_experiences def reset_stream(self): self.set_partition(index=self.current_worker, num_parts=self.num_workers) def empty(self) -> bool: return False def _get_next_ind(self): if len(self._remaining_inds) == 0: self._remaining_inds = list(range(len(self.data))) random.shuffle(self._remaining_inds) return self._remaining_inds.pop() def _fill_rollout_queue(self, q: queue.Queue, sampler: int): assert q.empty() while self.sampler_to_num_steps_in_queue[sampler] < self.rollout_len: next_ind = self._get_next_ind() for i, step in enumerate(transform_demos([self.data[next_ind]])[0]): q.put((*step, i == 0)) self.sampler_to_num_steps_in_queue[sampler] += 1 return True def get_data_for_rollout_ind(self, sampler_ind: int) -> Dict[str, np.ndarray]: masks: List[bool] = [] minigrid_ego_image = [] minigrid_mission = [] expert_actions = [] q = self.rollout_queues[sampler_ind] while len(masks) != self.rollout_len: if q.empty(): assert self.sampler_to_num_steps_in_queue[sampler_ind] == 0 self._fill_rollout_queue(q, sampler_ind) obs, expert_action, _, is_first_obs = cast( Tuple[ Dict[str, Union[np.array, int, str]], MiniGridEnv.Actions, bool, bool, ], q.get_nowait(), ) self.sampler_to_num_steps_in_queue[sampler_ind] -= 1 masks.append(not is_first_obs) minigrid_ego_image.append(obs["image"]) if self.minigrid_mission_sensor is not None: # noinspection PyTypeChecker minigrid_mission.append( self.minigrid_mission_sensor.get_observation( env=None, task=None, minigrid_output_obs=obs ) ) expert_actions.append([expert_action]) to_return = { "masks": torch.tensor(masks, device=self.device, dtype=torch.float32).view( self.rollout_len, 1 # steps x mask ), "minigrid_ego_image": torch.stack( [torch.tensor(img, device=self.device) for img in minigrid_ego_image], dim=0, ), # steps x height x width x channels "expert_action": torch.tensor( expert_actions, device=self.device, dtype=torch.int64 ).view( self.rollout_len # steps ), } if self.minigrid_mission_sensor is not None: to_return["minigrid_mission"] = torch.stack( [torch.tensor(m, device=self.device) for m in minigrid_mission], dim=0 ) # steps x mission_dims return to_return def next_batch(self) -> Dict[str, torch.Tensor]: all_data = defaultdict(lambda: []) for rollout_ind in range(self.num_samplers): data_for_ind = self.get_data_for_rollout_ind(sampler_ind=rollout_ind) for key in data_for_ind: all_data[key].append(data_for_ind[key]) self._total_experiences += self.num_samplers * self.rollout_len return { key: torch.stack( all_data[key], dim=1, ) # new sampler dim for key in all_data } ================================================ FILE: allenact_plugins/minigrid_plugin/minigrid_sensors.py ================================================ from typing import Optional, Any, cast import gym import gym_minigrid.minigrid import numpy as np import torch from babyai.utils.format import InstructionsPreprocessor from gym_minigrid.minigrid import MiniGridEnv from allenact.base_abstractions.sensor import Sensor, prepare_locals_for_super from allenact.base_abstractions.task import Task, SubTaskType # fmt: off ALL_VOCAB_TOKENS = [ "a", "after", "and", "ball", "behind", "blue", "box", "door", "front", "go", "green", "grey", "in", "key", "left", "next", "of", "on", "open", "pick", "purple", "put", "red", "right", "the", "then", "to", "up", "yellow", "you", "your", ] # fmt: on class EgocentricMiniGridSensor(Sensor[MiniGridEnv, Task[MiniGridEnv]]): def __init__( self, agent_view_size: int, view_channels: int = 1, uuid: str = "minigrid_ego_image", **kwargs: Any ): self.agent_view_size = agent_view_size self.view_channels = view_channels self.num_objects = ( cast( int, max(map(abs, gym_minigrid.minigrid.OBJECT_TO_IDX.values())) # type: ignore ) + 1 ) self.num_colors = ( cast(int, max(map(abs, gym_minigrid.minigrid.COLOR_TO_IDX.values()))) # type: ignore + 1 ) self.num_states = ( cast(int, max(map(abs, gym_minigrid.minigrid.STATE_TO_IDX.values()))) # type: ignore + 1 ) observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self) -> gym.Space: return gym.spaces.Box( low=0, high=max(self.num_objects, self.num_colors, self.num_states) - 1, shape=(self.agent_view_size, self.agent_view_size, self.view_channels), dtype=int, ) def get_observation( self, env: MiniGridEnv, task: Optional[SubTaskType], *args, minigrid_output_obs: Optional[np.ndarray] = None, **kwargs: Any ) -> Any: if minigrid_output_obs is not None and minigrid_output_obs["image"].shape == ( self.agent_view_size, self.agent_view_size, ): img = minigrid_output_obs["image"][:, :, : self.view_channels] else: env.agent_view_size = self.agent_view_size img = env.gen_obs()["image"][:, :, : self.view_channels] assert img.dtype == np.uint8 return img class MiniGridMissionSensor(Sensor[MiniGridEnv, Task[MiniGridEnv]]): def __init__(self, instr_len: int, uuid: str = "minigrid_mission", **kwargs: Any): self.instr_preprocessor = InstructionsPreprocessor( model_name="TMP_SENSOR", load_vocab_from=None ) # We initialize the vocabulary with a fixed collection of tokens # and then ensure that the size cannot exceed this number. This # guarantees that sensors on all processes will produce the same # values. for token in ALL_VOCAB_TOKENS: _ = self.instr_preprocessor.vocab[token] self.instr_preprocessor.vocab.max_size = len(ALL_VOCAB_TOKENS) self.instr_len = instr_len observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self) -> gym.Space: return gym.spaces.Box( low=0, high=self.instr_preprocessor.vocab.max_size, shape=(self.instr_len,), dtype=int, ) def get_observation( self, env: MiniGridEnv, task: Optional[SubTaskType], *args, minigrid_output_obs: Optional[np.ndarray] = None, **kwargs: Any ) -> Any: if minigrid_output_obs is None: minigrid_output_obs = env.gen_obs() out = self.instr_preprocessor([minigrid_output_obs]).view(-1) n: int = out.shape[0] if n > self.instr_len: out = out[: self.instr_len] elif n < self.instr_len: out = torch.nn.functional.pad( input=out, pad=[0, self.instr_len - n], value=0, ) return out.long().numpy() ================================================ FILE: allenact_plugins/minigrid_plugin/minigrid_tasks.py ================================================ import random from typing import Tuple, Any, List, Dict, Optional, Union, Callable, Sequence, cast import gym import networkx as nx import numpy as np from gym.utils import seeding from gym_minigrid.envs import CrossingEnv from gym_minigrid.minigrid import ( DIR_TO_VEC, IDX_TO_OBJECT, MiniGridEnv, OBJECT_TO_IDX, ) from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor, SensorSuite from allenact.base_abstractions.task import Task, TaskSampler from allenact.utils.system import get_logger from allenact_plugins.minigrid_plugin.minigrid_environments import ( AskForHelpSimpleCrossing, ) class MiniGridTask(Task[CrossingEnv]): _ACTION_NAMES: Tuple[str, ...] = ("left", "right", "forward") _ACTION_IND_TO_MINIGRID_IND = tuple( MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES ) _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {} _NEIGHBOR_OFFSETS = tuple( [ (-1, 0, 0), (0, -1, 0), (0, 0, -1), (1, 0, 0), (0, 1, 0), (0, 0, 1), ] ) _XY_DIFF_TO_AGENT_DIR = { tuple(vec): dir_ind for dir_ind, vec in enumerate(DIR_TO_VEC) } """ Task around a MiniGrid Env, allows interfacing allenact with MiniGrid tasks. (currently focussed towards LavaCrossing) """ def __init__( self, env: Union[CrossingEnv], sensors: Union[SensorSuite, List[Sensor]], task_info: Dict[str, Any], max_steps: int, task_cache_uid: Optional[str] = None, corrupt_expert_within_actions_of_goal: Optional[int] = None, **kwargs, ): super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self._graph: Optional[nx.DiGraph] = None self._minigrid_done = False self._task_cache_uid = task_cache_uid self.corrupt_expert_within_actions_of_goal = ( corrupt_expert_within_actions_of_goal ) self.closest_agent_has_been_to_goal: Optional[float] = None @property def action_space(self) -> gym.spaces.Discrete: return gym.spaces.Discrete(len(self._ACTION_NAMES)) def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: return self.env.render(mode=mode) def _step(self, action: int) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) minigrid_obs, reward, self._minigrid_done, info = self.env.step( action=self._ACTION_IND_TO_MINIGRID_IND[action] ) # self.env.render() return RLStepResult( observation=self.get_observations(minigrid_output_obs=minigrid_obs), reward=reward, done=self.is_done(), info=info, ) def get_observations( self, *args, minigrid_output_obs: Optional[Dict[str, Any]] = None, **kwargs ) -> Any: return self.sensor_suite.get_observations( env=self.env, task=self, minigrid_output_obs=minigrid_output_obs ) def reached_terminal_state(self) -> bool: return self._minigrid_done @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return cls._ACTION_NAMES def close(self) -> None: pass def metrics(self) -> Dict[str, Any]: # noinspection PyUnresolvedReferences,PyCallingNonCallable env_metrics = self.env.metrics() if hasattr(self.env, "metrics") else {} return { **super(MiniGridTask, self).metrics(), **{k: float(v) for k, v in env_metrics.items()}, "success": int( self.env.was_successful if hasattr(self.env, "was_successful") else self.cumulative_reward > 0 ), } @property def graph_created(self): return self._graph is not None @property def graph(self): if self._graph is None: if self._task_cache_uid is not None: if self._task_cache_uid not in self._CACHED_GRAPHS: self._CACHED_GRAPHS[self._task_cache_uid] = self.generate_graph() self._graph = self._CACHED_GRAPHS[self._task_cache_uid] else: self._graph = self.generate_graph() return self._graph @graph.setter def graph(self, graph: nx.DiGraph): self._graph = graph @classmethod def possible_neighbor_offsets(cls) -> Tuple[Tuple[int, int, int], ...]: # Tuples of format: # (X translation, Y translation, rotation by 90 degrees) # A constant is returned, this function can be changed if anything # more complex needs to be done. # offsets_superset = itertools.product( # [-1, 0, 1], [-1, 0, 1], [-1, 0, 1] # ) # # valid_offsets = [] # for off in offsets_superset: # if (int(off[0] != 0) + int(off[1] != 0) + int(off[2] != 0)) == 1: # valid_offsets.append(off) # # return tuple(valid_offsets) return cls._NEIGHBOR_OFFSETS @classmethod def _add_from_to_edge( cls, g: nx.DiGraph, s: Tuple[int, int, int], t: Tuple[int, int, int], ): """Adds nodes and corresponding edges to existing nodes. This approach avoids adding the same edge multiple times. Pre-requisite knowledge about MiniGrid: DIR_TO_VEC = [ # Pointing right (positive X) np.array((1, 0)), # Down (positive Y) np.array((0, 1)), # Pointing left (negative X) np.array((-1, 0)), # Up (negative Y) np.array((0, -1)), ] or AGENT_DIR_TO_STR = { 0: '>', 1: 'V', 2: '<', 3: '^' } This also implies turning right (clockwise) means: agent_dir += 1 """ s_x, s_y, s_rot = s t_x, t_y, t_rot = t x_diff = t_x - s_x y_diff = t_y - s_y angle_diff = (t_rot - s_rot) % 4 # If source and target differ by more than one action, continue if (x_diff != 0) + (y_diff != 0) + (angle_diff != 0) != 1 or angle_diff == 2: return action = None if angle_diff == 1: action = "right" elif angle_diff == 3: action = "left" elif cls._XY_DIFF_TO_AGENT_DIR[(x_diff, y_diff)] == s_rot: # if translation is the same direction as source # orientation, then it's a valid forward action action = "forward" else: # This is when the source and target aren't one action # apart, despite having dx=1 or dy=1 pass if action is not None: g.add_edge(s, t, action=action) def _add_node_to_graph( self, graph: nx.DiGraph, s: Tuple[int, int, int], valid_node_types: Tuple[str, ...], attr_dict: Dict[Any, Any] = None, include_rotation_free_leaves: bool = False, ): if s in graph: return if attr_dict is None: get_logger().warning("adding a node with neighbor checks and no attributes") graph.add_node(s, **attr_dict) if include_rotation_free_leaves: rot_free_leaf = (*s[:-1], None) if rot_free_leaf not in graph: graph.add_node(rot_free_leaf) graph.add_edge(s, rot_free_leaf, action="NA") if attr_dict["type"] in valid_node_types: for o in self.possible_neighbor_offsets(): t = (s[0] + o[0], s[1] + o[1], (s[2] + o[2]) % 4) if t in graph and graph.nodes[t]["type"] in valid_node_types: self._add_from_to_edge(graph, s, t) self._add_from_to_edge(graph, t, s) def generate_graph( self, ) -> nx.DiGraph: """The generated graph is based on the fully observable grid (as the expert sees it all). env: environment to generate the graph over """ image = self.env.grid.encode() width, height, _ = image.shape graph = nx.DiGraph() # In fully observable grid, there shouldn't be any "unseen" # Currently dealing with "empty", "wall", "goal", "lava" valid_object_ids = np.sort( [OBJECT_TO_IDX[o] for o in ["empty", "wall", "lava", "goal"]] ) assert np.all(np.union1d(image[:, :, 0], valid_object_ids) == valid_object_ids) # Grid to nodes for x in range(width): for y in range(height): for rotation in range(4): type, color, state = image[x, y] self._add_node_to_graph( graph, (x, y, rotation), attr_dict={ "type": IDX_TO_OBJECT[type], "color": color, "state": state, }, valid_node_types=("empty", "goal"), ) if IDX_TO_OBJECT[type] == "goal": if not graph.has_node("unified_goal"): graph.add_node("unified_goal") graph.add_edge((x, y, rotation), "unified_goal") return graph def query_expert(self, **kwargs) -> Tuple[int, bool]: if self._minigrid_done: get_logger().warning("Episode is completed, but expert is still queried.") return -1, False paths = [] agent_x, agent_y = self.env.agent_pos agent_rot = self.env.agent_dir source_state_key = (agent_x, agent_y, agent_rot) assert source_state_key in self.graph paths.append(nx.shortest_path(self.graph, source_state_key, "unified_goal")) if len(paths) == 0: return -1, False shortest_path_ind = int(np.argmin([len(p) for p in paths])) if self.closest_agent_has_been_to_goal is None: self.closest_agent_has_been_to_goal = len(paths[shortest_path_ind]) - 1 else: self.closest_agent_has_been_to_goal = min( len(paths[shortest_path_ind]) - 1, self.closest_agent_has_been_to_goal ) if ( self.corrupt_expert_within_actions_of_goal is not None and self.corrupt_expert_within_actions_of_goal >= self.closest_agent_has_been_to_goal ): return ( int(self.env.np_random.randint(0, len(self.class_action_names()))), True, ) if len(paths[shortest_path_ind]) == 2: # Since "unified_goal" is 1 step away from actual goals # if a path like [actual_goal, unified_goal] exists, then # you are already at a goal. get_logger().warning( "Shortest path computations suggest we are at" " the target but episode does not think so." ) return -1, False next_key_on_shortest_path = paths[shortest_path_ind][1] return ( self.class_action_names().index( self.graph.get_edge_data(source_state_key, next_key_on_shortest_path)[ "action" ] ), True, ) class AskForHelpSimpleCrossingTask(MiniGridTask): _ACTION_NAMES = ("left", "right", "forward", "toggle") _ACTION_IND_TO_MINIGRID_IND = tuple( MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES ) _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {} def __init__( self, env: AskForHelpSimpleCrossing, sensors: Union[SensorSuite, List[Sensor]], task_info: Dict[str, Any], max_steps: int, **kwargs, ): super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self.did_toggle: List[bool] = [] def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) self.did_toggle.append(self._ACTION_NAMES[action] == "toggle") return super(AskForHelpSimpleCrossingTask, self)._step(action=action) def metrics(self) -> Dict[str, Any]: return { **super(AskForHelpSimpleCrossingTask, self).metrics(), "toggle_percent": float( sum(self.did_toggle) / max(len(self.did_toggle), 1) ), } class MiniGridTaskSampler(TaskSampler): def __init__( self, env_class: Callable[..., Union[MiniGridEnv]], sensors: Union[SensorSuite, List[Sensor]], env_info: Optional[Dict[str, Any]] = None, max_tasks: Optional[int] = None, num_unique_seeds: Optional[int] = None, task_seeds_list: Optional[List[int]] = None, deterministic_sampling: bool = False, cache_graphs: Optional[bool] = False, task_class: Callable[..., MiniGridTask] = MiniGridTask, repeat_failed_task_for_min_steps: int = 0, extra_task_kwargs: Optional[Dict] = None, **kwargs, ): super(MiniGridTaskSampler, self).__init__() self.sensors = ( SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors ) self.max_tasks = max_tasks self.num_unique_seeds = num_unique_seeds self.cache_graphs = cache_graphs self.deterministic_sampling = deterministic_sampling self.repeat_failed_task_for_min_steps = repeat_failed_task_for_min_steps self.extra_task_kwargs = ( extra_task_kwargs if extra_task_kwargs is not None else {} ) self._last_env_seed: Optional[int] = None self._last_task: Optional[MiniGridTask] = None self._number_of_steps_taken_with_task_seed = 0 assert (not deterministic_sampling) or repeat_failed_task_for_min_steps <= 0, ( "If `deterministic_sampling` is True then we require" " `repeat_failed_task_for_min_steps <= 0`" ) assert (not self.cache_graphs) or self.num_unique_seeds is not None, ( "When caching graphs you must specify" " a number of unique tasks to sample from." ) assert (self.num_unique_seeds is None) or ( 0 < self.num_unique_seeds ), "`num_unique_seeds` must be a positive integer." self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list if self.task_seeds_list is not None: if self.num_unique_seeds is not None: assert self.num_unique_seeds == len( self.task_seeds_list ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified." self.num_unique_seeds = len(self.task_seeds_list) elif self.num_unique_seeds is not None: self.task_seeds_list = list(range(self.num_unique_seeds)) if num_unique_seeds is not None and repeat_failed_task_for_min_steps > 0: raise NotImplementedError( "`repeat_failed_task_for_min_steps` must be <=0 if number" " of unique seeds is not None." ) assert ( not self.cache_graphs ) or self.num_unique_seeds <= 1000, "Too many tasks (graphs) to cache" assert (not deterministic_sampling) or ( self.num_unique_seeds is not None ), "Cannot use deterministic sampling when `num_unique_seeds` is `None`." if (not deterministic_sampling) and self.max_tasks: get_logger().warning( "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`," " this might be a mistake when running testing." ) self.env = env_class(**env_info) self.task_class = task_class self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1)) self.num_tasks_generated = 0 @property def length(self) -> Union[int, float]: return ( float("inf") if self.max_tasks is None else self.max_tasks - self.num_tasks_generated ) @property def total_unique(self) -> Optional[Union[int, float]]: return None if self.num_unique_seeds is None else self.num_unique_seeds @property def last_sampled_task(self) -> Optional[Task]: raise NotImplementedError def next_task(self, force_advance_scene: bool = False) -> Optional[MiniGridTask]: if self.length <= 0: return None task_cache_uid = None repeating = False if self.num_unique_seeds is not None: if self.deterministic_sampling: self._last_env_seed = self.task_seeds_list[ self.num_tasks_generated % len(self.task_seeds_list) ] else: self._last_env_seed = self.np_seeded_random_gen.choice( self.task_seeds_list ) else: if self._last_task is not None: self._number_of_steps_taken_with_task_seed += ( self._last_task.num_steps_taken() ) if ( self._last_env_seed is not None and self._number_of_steps_taken_with_task_seed < self.repeat_failed_task_for_min_steps and self._last_task.cumulative_reward == 0 ): repeating = True else: self._number_of_steps_taken_with_task_seed = 0 self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1) task_has_same_seed_reset = hasattr(self.env, "same_seed_reset") if self.cache_graphs: task_cache_uid = str(self._last_env_seed) if repeating and task_has_same_seed_reset: # noinspection PyUnresolvedReferences self.env.same_seed_reset() else: self.env.seed(self._last_env_seed) self.env.saved_seed = self._last_env_seed self.env.reset() self.num_tasks_generated += 1 task = self.task_class( **dict( env=self.env, sensors=self.sensors, task_info={}, max_steps=self.env.max_steps, task_cache_uid=task_cache_uid, ), **self.extra_task_kwargs, ) if repeating and self._last_task.graph_created: task.graph = self._last_task.graph self._last_task = task return task def close(self) -> None: self.env.close() @property def all_observation_spaces_equal(self) -> bool: return True def reset(self) -> None: self.num_tasks_generated = 0 self.env.reset() def set_seed(self, seed: int) -> None: self.np_seeded_random_gen, _ = seeding.np_random(seed) ================================================ FILE: allenact_plugins/minigrid_plugin/scripts/__init__.py ================================================ ================================================ FILE: allenact_plugins/navigation_plugin/__init__.py ================================================ ================================================ FILE: allenact_plugins/navigation_plugin/objectnav/__init__.py ================================================ ================================================ FILE: allenact_plugins/navigation_plugin/objectnav/models.py ================================================ """Baseline models for use in the object navigation task. Object navigation is currently available as a Task in AI2-THOR and Facebook's Habitat. """ from typing import Optional, List, Dict, cast, Tuple, Sequence import gym import torch import torch.nn as nn from gym.spaces import Dict as SpaceDict from allenact.algorithms.onpolicy_sync.policy import ObservationType from allenact.embodiedai.models import resnet as resnet from allenact.embodiedai.models.basic_models import SimpleCNN from allenact.embodiedai.models.visual_nav_models import ( VisualNavActorCritic, FusionType, ) class CatObservations(nn.Module): def __init__(self, ordered_uuids: Sequence[str], dim: int): super().__init__() assert len(ordered_uuids) != 0 self.ordered_uuids = ordered_uuids self.dim = dim def forward(self, observations: ObservationType): if len(self.ordered_uuids) == 1: return observations[self.ordered_uuids[0]] return torch.cat( [observations[uuid] for uuid in self.ordered_uuids], dim=self.dim ) class ObjectNavActorCritic(VisualNavActorCritic): """Baseline recurrent actor critic model for object-navigation. # Attributes action_space : The space of actions available to the agent. Currently only discrete actions are allowed (so this space will always be of type `gym.spaces.Discrete`). observation_space : The observation space expected by the agent. This observation space should include (optionally) 'rgb' images and 'depth' images and is required to have a component corresponding to the goal `goal_sensor_uuid`. goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor` as an example of such a sensor. hidden_size : The hidden size of the GRU RNN. object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal object type. """ def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, goal_sensor_uuid: str, # RNN hidden_size=512, num_rnn_layers=1, rnn_type="GRU", add_prev_actions=False, add_prev_action_null_token=False, action_embed_size=6, # Aux loss multiple_beliefs=False, beliefs_fusion: Optional[FusionType] = None, auxiliary_uuids: Optional[Sequence[str]] = None, # below are custom params rgb_uuid: Optional[str] = None, depth_uuid: Optional[str] = None, object_type_embedding_dim=8, trainable_masked_hidden_state: bool = False, # perception backbone params, backbone="gnresnet18", resnet_baseplanes=32, ): """Initializer. See class documentation for parameter definitions. """ super().__init__( action_space=action_space, observation_space=observation_space, hidden_size=hidden_size, multiple_beliefs=multiple_beliefs, beliefs_fusion=beliefs_fusion, auxiliary_uuids=auxiliary_uuids, ) self.rgb_uuid = rgb_uuid self.depth_uuid = depth_uuid self.goal_sensor_uuid = goal_sensor_uuid self._n_object_types = self.observation_space.spaces[self.goal_sensor_uuid].n self.object_type_embedding_size = object_type_embedding_dim self.backbone = backbone if backbone == "simple_cnn": self.visual_encoder = SimpleCNN( observation_space=observation_space, output_size=hidden_size, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, ) self.visual_encoder_output_size = hidden_size assert self.is_blind == self.visual_encoder.is_blind elif backbone == "gnresnet18": # resnet family self.visual_encoder = resnet.GroupNormResNetEncoder( observation_space=observation_space, output_size=hidden_size, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, baseplanes=resnet_baseplanes, ngroups=resnet_baseplanes // 2, make_backbone=getattr(resnet, backbone), ) self.visual_encoder_output_size = hidden_size assert self.is_blind == self.visual_encoder.is_blind elif backbone in ["identity", "projection"]: good_uuids = [ uuid for uuid in [self.rgb_uuid, self.depth_uuid] if uuid is not None ] cat_model = CatObservations( ordered_uuids=good_uuids, dim=-1, ) after_cat_size = sum( observation_space[uuid].shape[-1] for uuid in good_uuids ) if backbone == "identity": self.visual_encoder = cat_model self.visual_encoder_output_size = after_cat_size else: self.visual_encoder = nn.Sequential( cat_model, nn.Linear(after_cat_size, hidden_size), nn.ReLU(True) ) self.visual_encoder_output_size = hidden_size else: raise NotImplementedError self.create_state_encoders( obs_embed_size=self.goal_visual_encoder_output_dims, num_rnn_layers=num_rnn_layers, rnn_type=rnn_type, add_prev_actions=add_prev_actions, add_prev_action_null_token=add_prev_action_null_token, prev_action_embed_size=action_embed_size, trainable_masked_hidden_state=trainable_masked_hidden_state, ) self.create_actorcritic_head() self.create_aux_models( obs_embed_size=self.goal_visual_encoder_output_dims, action_embed_size=action_embed_size, ) self.object_type_embedding = nn.Embedding( num_embeddings=self._n_object_types, embedding_dim=object_type_embedding_dim, ) self.train() @property def is_blind(self) -> bool: """True if the model is blind (e.g. neither 'depth' or 'rgb' is an input observation type).""" return self.rgb_uuid is None and self.depth_uuid is None @property def goal_visual_encoder_output_dims(self): dims = self.object_type_embedding_size if self.is_blind: return dims return dims + self.visual_encoder_output_size def get_object_type_encoding( self, observations: Dict[str, torch.Tensor] ) -> torch.Tensor: """Get the object type encoding from input batched observations.""" # noinspection PyTypeChecker return self.object_type_embedding( # type:ignore observations[self.goal_sensor_uuid].to(torch.int64) ) def forward_encoder(self, observations: ObservationType) -> torch.Tensor: target_encoding = self.get_object_type_encoding( cast(Dict[str, torch.Tensor], observations) ) obs_embeds = [target_encoding] if not self.is_blind: perception_embed = self.visual_encoder(observations) obs_embeds = [perception_embed] + obs_embeds obs_embeds = torch.cat(obs_embeds, dim=-1) return obs_embeds class ResnetTensorNavActorCritic(VisualNavActorCritic): def __init__( # base params self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, goal_sensor_uuid: str, hidden_size=512, num_rnn_layers=1, rnn_type="GRU", add_prev_actions=False, add_prev_action_null_token=False, action_embed_size=6, multiple_beliefs=False, beliefs_fusion: Optional[FusionType] = None, auxiliary_uuids: Optional[List[str]] = None, # custom params rgb_resnet_preprocessor_uuid: Optional[str] = None, depth_resnet_preprocessor_uuid: Optional[str] = None, goal_dims: int = 32, resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32), combiner_hidden_out_dims: Tuple[int, int] = (128, 32), **kwargs, ): super().__init__( action_space=action_space, observation_space=observation_space, hidden_size=hidden_size, multiple_beliefs=multiple_beliefs, beliefs_fusion=beliefs_fusion, auxiliary_uuids=auxiliary_uuids, **kwargs, ) if ( rgb_resnet_preprocessor_uuid is None or depth_resnet_preprocessor_uuid is None ): resnet_preprocessor_uuid = ( rgb_resnet_preprocessor_uuid if rgb_resnet_preprocessor_uuid is not None else depth_resnet_preprocessor_uuid ) self.goal_visual_encoder = ResnetTensorGoalEncoder( self.observation_space, goal_sensor_uuid, resnet_preprocessor_uuid, goal_dims, resnet_compressor_hidden_out_dims, combiner_hidden_out_dims, ) else: self.goal_visual_encoder = ResnetDualTensorGoalEncoder( # type:ignore self.observation_space, goal_sensor_uuid, rgb_resnet_preprocessor_uuid, depth_resnet_preprocessor_uuid, goal_dims, resnet_compressor_hidden_out_dims, combiner_hidden_out_dims, ) self.create_state_encoders( obs_embed_size=self.goal_visual_encoder.output_dims, num_rnn_layers=num_rnn_layers, rnn_type=rnn_type, add_prev_actions=add_prev_actions, add_prev_action_null_token=add_prev_action_null_token, prev_action_embed_size=action_embed_size, ) self.create_actorcritic_head() self.create_aux_models( obs_embed_size=self.goal_visual_encoder.output_dims, action_embed_size=action_embed_size, ) self.train() @property def is_blind(self) -> bool: """True if the model is blind (e.g. neither 'depth' or 'rgb' is an input observation type).""" return self.goal_visual_encoder.is_blind def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor: return self.goal_visual_encoder(observations) class ResnetTensorGoalEncoder(nn.Module): def __init__( self, observation_spaces: SpaceDict, goal_sensor_uuid: str, resnet_preprocessor_uuid: str, goal_embed_dims: int = 32, resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32), combiner_hidden_out_dims: Tuple[int, int] = (128, 32), ) -> None: super().__init__() self.goal_uuid = goal_sensor_uuid self.resnet_uuid = resnet_preprocessor_uuid self.goal_embed_dims = goal_embed_dims self.resnet_hid_out_dims = resnet_compressor_hidden_out_dims self.combine_hid_out_dims = combiner_hidden_out_dims self.goal_space = observation_spaces.spaces[self.goal_uuid] if isinstance(self.goal_space, gym.spaces.Discrete): self.embed_goal = nn.Embedding( num_embeddings=self.goal_space.n, embedding_dim=self.goal_embed_dims, ) elif isinstance(self.goal_space, gym.spaces.Box): self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims) else: raise NotImplementedError self.blind = self.resnet_uuid not in observation_spaces.spaces if not self.blind: self.resnet_tensor_shape = observation_spaces.spaces[self.resnet_uuid].shape self.resnet_compressor = nn.Sequential( nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1), nn.ReLU(), nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1), nn.ReLU(), ) self.target_obs_combiner = nn.Sequential( nn.Conv2d( self.resnet_hid_out_dims[1] + self.goal_embed_dims, self.combine_hid_out_dims[0], 1, ), nn.ReLU(), nn.Conv2d(*self.combine_hid_out_dims[0:2], 1), ) @property def is_blind(self): return self.blind @property def output_dims(self): if self.blind: return self.goal_embed_dims else: return ( self.combine_hid_out_dims[-1] * self.resnet_tensor_shape[1] * self.resnet_tensor_shape[2] ) def get_object_type_encoding( self, observations: Dict[str, torch.FloatTensor] ) -> torch.FloatTensor: """Get the object type encoding from input batched observations.""" return cast( torch.FloatTensor, self.embed_goal(observations[self.goal_uuid].to(torch.int64)), ) def compress_resnet(self, observations): return self.resnet_compressor(observations[self.resnet_uuid]) def distribute_target(self, observations): target_emb = self.embed_goal(observations[self.goal_uuid]) return target_emb.view(-1, self.goal_embed_dims, 1, 1).expand( -1, -1, self.resnet_tensor_shape[-2], self.resnet_tensor_shape[-1] ) def adapt_input(self, observations): observations = {**observations} resnet = observations[self.resnet_uuid] goal = observations[self.goal_uuid] use_agent = False nagent = 1 if len(resnet.shape) == 6: use_agent = True nstep, nsampler, nagent = resnet.shape[:3] else: nstep, nsampler = resnet.shape[:2] observations[self.resnet_uuid] = resnet.view(-1, *resnet.shape[-3:]) observations[self.goal_uuid] = goal.view(-1, goal.shape[-1]) return observations, use_agent, nstep, nsampler, nagent @staticmethod def adapt_output(x, use_agent, nstep, nsampler, nagent): if use_agent: return x.view(nstep, nsampler, nagent, -1) return x.view(nstep, nsampler * nagent, -1) def forward(self, observations): observations, use_agent, nstep, nsampler, nagent = self.adapt_input( observations ) if self.blind: return self.embed_goal(observations[self.goal_uuid]) embs = [ self.compress_resnet(observations), self.distribute_target(observations), ] x = self.target_obs_combiner( torch.cat( embs, dim=1, ) ) x = x.reshape(x.size(0), -1) # flatten return self.adapt_output(x, use_agent, nstep, nsampler, nagent) class ResnetDualTensorGoalEncoder(nn.Module): def __init__( self, observation_spaces: SpaceDict, goal_sensor_uuid: str, rgb_resnet_preprocessor_uuid: str, depth_resnet_preprocessor_uuid: str, goal_embed_dims: int = 32, resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32), combiner_hidden_out_dims: Tuple[int, int] = (128, 32), ) -> None: super().__init__() self.goal_uuid = goal_sensor_uuid self.rgb_resnet_uuid = rgb_resnet_preprocessor_uuid self.depth_resnet_uuid = depth_resnet_preprocessor_uuid self.goal_embed_dims = goal_embed_dims self.resnet_hid_out_dims = resnet_compressor_hidden_out_dims self.combine_hid_out_dims = combiner_hidden_out_dims self.goal_space = observation_spaces.spaces[self.goal_uuid] if isinstance(self.goal_space, gym.spaces.Discrete): self.embed_goal = nn.Embedding( num_embeddings=self.goal_space.n, embedding_dim=self.goal_embed_dims, ) elif isinstance(self.goal_space, gym.spaces.Box): self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims) else: raise NotImplementedError self.blind = ( self.rgb_resnet_uuid not in observation_spaces.spaces or self.depth_resnet_uuid not in observation_spaces.spaces ) if not self.blind: self.resnet_tensor_shape = observation_spaces.spaces[ self.rgb_resnet_uuid ].shape self.rgb_resnet_compressor = nn.Sequential( nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1), nn.ReLU(), nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1), nn.ReLU(), ) self.depth_resnet_compressor = nn.Sequential( nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1), nn.ReLU(), nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1), nn.ReLU(), ) self.rgb_target_obs_combiner = nn.Sequential( nn.Conv2d( self.resnet_hid_out_dims[1] + self.goal_embed_dims, self.combine_hid_out_dims[0], 1, ), nn.ReLU(), nn.Conv2d(*self.combine_hid_out_dims[0:2], 1), ) self.depth_target_obs_combiner = nn.Sequential( nn.Conv2d( self.resnet_hid_out_dims[1] + self.goal_embed_dims, self.combine_hid_out_dims[0], 1, ), nn.ReLU(), nn.Conv2d(*self.combine_hid_out_dims[0:2], 1), ) @property def is_blind(self): return self.blind @property def output_dims(self): if self.blind: return self.goal_embed_dims else: return ( 2 * self.combine_hid_out_dims[-1] * self.resnet_tensor_shape[1] * self.resnet_tensor_shape[2] ) def get_object_type_encoding( self, observations: Dict[str, torch.FloatTensor] ) -> torch.FloatTensor: """Get the object type encoding from input batched observations.""" return cast( torch.FloatTensor, self.embed_goal(observations[self.goal_uuid].to(torch.int64)), ) def compress_rgb_resnet(self, observations): return self.rgb_resnet_compressor(observations[self.rgb_resnet_uuid]) def compress_depth_resnet(self, observations): return self.depth_resnet_compressor(observations[self.depth_resnet_uuid]) def distribute_target(self, observations): target_emb = self.embed_goal(observations[self.goal_uuid]) return target_emb.view(-1, self.goal_embed_dims, 1, 1).expand( -1, -1, self.resnet_tensor_shape[-2], self.resnet_tensor_shape[-1] ) def adapt_input(self, observations): rgb = observations[self.rgb_resnet_uuid] depth = observations[self.depth_resnet_uuid] use_agent = False nagent = 1 if len(rgb.shape) == 6: use_agent = True nstep, nsampler, nagent = rgb.shape[:3] else: nstep, nsampler = rgb.shape[:2] observations[self.rgb_resnet_uuid] = rgb.view(-1, *rgb.shape[-3:]) observations[self.depth_resnet_uuid] = depth.view(-1, *depth.shape[-3:]) observations[self.goal_uuid] = observations[self.goal_uuid].view(-1, 1) return observations, use_agent, nstep, nsampler, nagent @staticmethod def adapt_output(x, use_agent, nstep, nsampler, nagent): if use_agent: return x.view(nstep, nsampler, nagent, -1) return x.view(nstep, nsampler * nagent, -1) def forward(self, observations): observations, use_agent, nstep, nsampler, nagent = self.adapt_input( observations ) if self.blind: return self.embed_goal(observations[self.goal_uuid]) rgb_embs = [ self.compress_rgb_resnet(observations), self.distribute_target(observations), ] rgb_x = self.rgb_target_obs_combiner( torch.cat( rgb_embs, dim=1, ) ) depth_embs = [ self.compress_depth_resnet(observations), self.distribute_target(observations), ] depth_x = self.depth_target_obs_combiner( torch.cat( depth_embs, dim=1, ) ) x = torch.cat([rgb_x, depth_x], dim=1) x = x.reshape(x.shape[0], -1) # flatten return self.adapt_output(x, use_agent, nstep, nsampler, nagent) ================================================ FILE: allenact_plugins/navigation_plugin/pointnav/__init__.py ================================================ ================================================ FILE: allenact_plugins/navigation_plugin/pointnav/models.py ================================================ """Baseline models for use in the point navigation task. Object navigation is currently available as a Task in AI2-THOR and Facebook's Habitat. """ from typing import Optional, List, Union, Sequence import gym import torch import torch.nn as nn from gym.spaces import Dict as SpaceDict from allenact.algorithms.onpolicy_sync.policy import ObservationType from allenact.embodiedai.models import resnet as resnet from allenact.embodiedai.models.basic_models import SimpleCNN from allenact.embodiedai.models.visual_nav_models import ( VisualNavActorCritic, FusionType, ) class PointNavActorCritic(VisualNavActorCritic): """Use raw image as observation to the agent.""" def __init__( # base params self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, goal_sensor_uuid: str, hidden_size=512, num_rnn_layers=1, rnn_type="GRU", add_prev_actions=False, add_prev_action_null_token=False, action_embed_size=4, multiple_beliefs=False, beliefs_fusion: Optional[FusionType] = None, auxiliary_uuids: Optional[Sequence[str]] = None, # custom params rgb_uuid: Optional[str] = None, depth_uuid: Optional[str] = None, embed_coordinates=False, coordinate_embedding_dim=8, coordinate_dims=2, # perception backbone params, backbone="gnresnet18", resnet_baseplanes=32, ): super().__init__( action_space=action_space, observation_space=observation_space, hidden_size=hidden_size, multiple_beliefs=multiple_beliefs, beliefs_fusion=beliefs_fusion, auxiliary_uuids=auxiliary_uuids, ) self.goal_sensor_uuid = goal_sensor_uuid self.embed_coordinates = embed_coordinates if self.embed_coordinates: self.coordinate_embedding_size = coordinate_embedding_dim else: self.coordinate_embedding_size = coordinate_dims self.sensor_fusion = False if rgb_uuid is not None and depth_uuid is not None: self.sensor_fuser = nn.Linear(hidden_size * 2, hidden_size) self.sensor_fusion = True self.backbone = backbone if backbone == "simple_cnn": self.visual_encoder = SimpleCNN( observation_space=observation_space, output_size=hidden_size, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, ) else: # resnet family self.visual_encoder = resnet.GroupNormResNetEncoder( observation_space=observation_space, output_size=hidden_size, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, baseplanes=resnet_baseplanes, ngroups=resnet_baseplanes // 2, make_backbone=getattr(resnet, backbone), ) if self.embed_coordinates: self.coordinate_embedding = nn.Linear( coordinate_dims, coordinate_embedding_dim ) self.create_state_encoders( obs_embed_size=self.goal_visual_encoder_output_dims, num_rnn_layers=num_rnn_layers, rnn_type=rnn_type, add_prev_actions=add_prev_actions, add_prev_action_null_token=add_prev_action_null_token, prev_action_embed_size=action_embed_size, ) self.create_actorcritic_head() self.create_aux_models( obs_embed_size=self.goal_visual_encoder_output_dims, action_embed_size=action_embed_size, ) self.train() @property def is_blind(self): return self.visual_encoder.is_blind @property def goal_visual_encoder_output_dims(self): dims = self.coordinate_embedding_size if self.is_blind: return dims return dims + self.recurrent_hidden_state_size def get_target_coordinates_encoding(self, observations): if self.embed_coordinates: return self.coordinate_embedding( observations[self.goal_sensor_uuid].to(torch.float32) ) else: return observations[self.goal_sensor_uuid].to(torch.float32) def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor: target_encoding = self.get_target_coordinates_encoding(observations) obs_embeds: Union[torch.Tensor, List[torch.Tensor]] obs_embeds = [target_encoding] if not self.is_blind: perception_embed = self.visual_encoder(observations) if self.sensor_fusion: perception_embed = self.sensor_fuser(perception_embed) obs_embeds = [perception_embed] + obs_embeds obs_embeds = torch.cat(obs_embeds, dim=-1) return obs_embeds ================================================ FILE: allenact_plugins/robothor_plugin/__init__.py ================================================ from allenact.utils.system import ImportChecker with ImportChecker( "Cannot `import ai2thor`, please install `ai2thor` (`pip install ai2thor`)." ): # noinspection PyUnresolvedReferences import ai2thor ================================================ FILE: allenact_plugins/robothor_plugin/configs/__init__.py ================================================ ================================================ FILE: allenact_plugins/robothor_plugin/extra_environment.yml ================================================ channels: - defaults - conda-forge dependencies: - ai2thor>=2.5.3 - numba - pip - colour - packaging - pip: - numpy-quaternion - pyquaternion>=0.9.9 - python-xlib ================================================ FILE: allenact_plugins/robothor_plugin/extra_requirements.txt ================================================ ai2thor>=2.5.3 numpy-quaternion pyquaternion>=0.9.9 colour numba packaging python-xlib ================================================ FILE: allenact_plugins/robothor_plugin/robothor_constants.py ================================================ MOVE_AHEAD = "MoveAhead" ROTATE_LEFT = "RotateLeft" ROTATE_RIGHT = "RotateRight" LOOK_DOWN = "LookDown" LOOK_UP = "LookUp" END = "End" PASS = "Pass" ================================================ FILE: allenact_plugins/robothor_plugin/robothor_distributions.py ================================================ from typing import Tuple import torch from allenact.base_abstractions.distributions import CategoricalDistr, Distr class TupleCategoricalDistr(Distr): def __init__(self, probs=None, logits=None, validate_args=None): self.dists = CategoricalDistr( probs=probs, logits=logits, validate_args=validate_args ) def log_prob(self, actions: Tuple[torch.LongTensor, ...]) -> torch.FloatTensor: # flattened output [steps, samplers, num_agents] return self.dists.log_prob(torch.stack(actions, dim=-1)) def entropy(self) -> torch.FloatTensor: # flattened output [steps, samplers, num_agents] return self.dists.entropy() def sample(self, sample_shape=torch.Size()) -> Tuple[torch.LongTensor, ...]: # split and remove trailing singleton dim res = self.dists.sample(sample_shape).split(1, dim=-1) return tuple([r.view(r.shape[:2]) for r in res]) def mode(self) -> Tuple[torch.LongTensor, ...]: # split and remove trailing singleton dim res = self.dists.mode().split(1, dim=-1) return tuple([r.view(r.shape[:2]) for r in res]) ================================================ FILE: allenact_plugins/robothor_plugin/robothor_environment.py ================================================ import copy import math import random import warnings from typing import Any, Optional, Dict, List, Union, Tuple, Collection import ai2thor.server import numpy as np from ai2thor.controller import Controller from ai2thor.fifo_server import FifoServer from ai2thor.util import metrics from allenact.utils.cache_utils import DynamicDistanceCache from allenact.utils.experiment_utils import recursive_update from allenact.utils.system import get_logger class RoboThorEnvironment: """Wrapper for the robo2thor controller providing additional functionality and bookkeeping. See [here](https://ai2thor.allenai.org/robothor/documentation) for comprehensive documentation on RoboTHOR. # Attributes controller : The AI2-THOR controller. config : The AI2-THOR controller configuration """ def __init__(self, all_metadata_available: bool = True, **kwargs): self.config = dict( rotateStepDegrees=30.0, visibilityDistance=1.0, gridSize=0.25, continuousMode=True, snapToGrid=False, agentMode="locobot", width=640, height=480, agentCount=1, server_class=FifoServer, ) if "agentCount" in kwargs: assert kwargs["agentCount"] > 0 kwargs["agentMode"] = kwargs.get("agentMode", "locobot") if kwargs["agentMode"] not in ["bot", "locobot"]: warnings.warn( f"The RoboTHOR environment has not been tested using" f" an agent of mode '{kwargs['agentMode']}'." ) recursive_update(self.config, kwargs) self.controller = Controller( **self.config, ) self.all_metadata_available = all_metadata_available self.scene_to_reachable_positions: Optional[Dict[str, Any]] = None self.distance_cache: Optional[DynamicDistanceCache] = None if self.all_metadata_available: self.scene_to_reachable_positions = { self.scene_name: copy.deepcopy(self.currently_reachable_points) } assert len(self.scene_to_reachable_positions[self.scene_name]) > 10 self.distance_cache = DynamicDistanceCache(rounding=1) self.agent_count = self.config["agentCount"] self._extra_teleport_kwargs: Dict[str, Any] = ( {} ) # Used for backwards compatability with the teleport action def initialize_grid_dimensions( self, reachable_points: Collection[Dict[str, float]] ) -> Tuple[int, int, int, int]: """Computes bounding box for reachable points quantized with the current gridSize.""" points = { ( round(p["x"] / self.config["gridSize"]), round(p["z"] / self.config["gridSize"]), ): p for p in reachable_points } assert len(reachable_points) == len(points) xmin, xmax = min([p[0] for p in points]), max([p[0] for p in points]) zmin, zmax = min([p[1] for p in points]), max([p[1] for p in points]) return xmin, xmax, zmin, zmax def set_object_filter(self, object_ids: List[str]): self.controller.step("SetObjectFilter", objectIds=object_ids, renderImage=False) def reset_object_filter(self): self.controller.step("ResetObjectFilter", renderImage=False) def path_from_point_to_object_type( self, point: Dict[str, float], object_type: str, allowed_error: float ) -> Optional[List[Dict[str, float]]]: event = self.controller.step( action="GetShortestPath", objectType=object_type, position=point, allowedError=allowed_error, ) if event.metadata["lastActionSuccess"]: return event.metadata["actionReturn"]["corners"] else: get_logger().debug( "Failed to find path for {} in {}. Start point {}, agent state {}.".format( object_type, self.controller.last_event.metadata["sceneName"], point, self.agent_state(), ) ) return None def distance_from_point_to_object_type( self, point: Dict[str, float], object_type: str, allowed_error: float ) -> float: """Minimal geodesic distance from a point to an object of the given type. It might return -1.0 for unreachable targets. """ path = self.path_from_point_to_object_type(point, object_type, allowed_error) if path: # Because `allowed_error != 0` means that the path returned above might not start # at `point`, we explicitly add any offset there is. s_dist = math.sqrt( (point["x"] - path[0]["x"]) ** 2 + (point["z"] - path[0]["z"]) ** 2 ) return metrics.path_distance(path) + s_dist return -1.0 def distance_to_object_type(self, object_type: str, agent_id: int = 0) -> float: """Minimal geodesic distance to object of given type from agent's current location. It might return -1.0 for unreachable targets. """ assert 0 <= agent_id < self.agent_count assert ( self.all_metadata_available ), "`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`." def retry_dist(position: Dict[str, float], object_type: str): allowed_error = 0.05 debug_log = "" d = -1.0 while allowed_error < 2.5: d = self.distance_from_point_to_object_type( position, object_type, allowed_error ) if d < 0: debug_log = ( f"In scene {self.scene_name}, could not find a path from {position} to {object_type} with" f" {allowed_error} error tolerance. Increasing this tolerance to" f" {2 * allowed_error} any trying again." ) allowed_error *= 2 else: break if d < 0: get_logger().debug( f"In scene {self.scene_name}, could not find a path from {position} to {object_type}" f" with {allowed_error} error tolerance. Returning a distance of -1." ) elif debug_log != "": get_logger().debug(debug_log) return d return self.distance_cache.find_distance( self.scene_name, self.controller.last_event.events[agent_id].metadata["agent"]["position"], object_type, retry_dist, ) def path_from_point_to_point( self, position: Dict[str, float], target: Dict[str, float], allowedError: float ) -> Optional[List[Dict[str, float]]]: try: return self.controller.step( action="GetShortestPathToPoint", position=position, target=target, allowedError=allowedError, ).metadata["actionReturn"]["corners"] except ValueError: raise except Exception: get_logger().debug( "Failed to find path for {} in {}. Start point {}, agent state {}.".format( target, self.controller.last_event.metadata["sceneName"], position, self.agent_state(), ) ) return None def distance_from_point_to_point( self, position: Dict[str, float], target: Dict[str, float], allowed_error: float ) -> float: path = self.path_from_point_to_point(position, target, allowed_error) if path: # Because `allowed_error != 0` means that the path returned above might not start # or end exactly at the position/target points, we explictly add any offset there is. s_dist = math.sqrt( (position["x"] - path[0]["x"]) ** 2 + (position["z"] - path[0]["z"]) ** 2 ) t_dist = math.sqrt( (target["x"] - path[-1]["x"]) ** 2 + (target["z"] - path[-1]["z"]) ** 2 ) return metrics.path_distance(path) + s_dist + t_dist return -1.0 def distance_to_point(self, target: Dict[str, float], agent_id: int = 0) -> float: """Minimal geodesic distance to end point from agent's current location. It might return -1.0 for unreachable targets. """ assert 0 <= agent_id < self.agent_count assert ( self.all_metadata_available ), "`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`." def retry_dist(position: Dict[str, float], target: Dict[str, float]): allowed_error = 0.05 debug_log = "" d = -1.0 while allowed_error < 2.5: d = self.distance_from_point_to_point(position, target, allowed_error) if d < 0: debug_log = ( f"In scene {self.scene_name}, could not find a path from {position} to {target} with" f" {allowed_error} error tolerance. Increasing this tolerance to" f" {2 * allowed_error} any trying again." ) allowed_error *= 2 else: break if d < 0: get_logger().debug( f"In scene {self.scene_name}, could not find a path from {position} to {target}" f" with {allowed_error} error tolerance. Returning a distance of -1." ) elif debug_log != "": get_logger().debug(debug_log) return d return self.distance_cache.find_distance( self.scene_name, self.controller.last_event.events[agent_id].metadata["agent"]["position"], target, retry_dist, ) def agent_state(self, agent_id: int = 0) -> Dict: """Return agent position, rotation and horizon.""" assert 0 <= agent_id < self.agent_count agent_meta = self.last_event.events[agent_id].metadata["agent"] return { **{k: float(v) for k, v in agent_meta["position"].items()}, "rotation": {k: float(v) for k, v in agent_meta["rotation"].items()}, "horizon": round(float(agent_meta["cameraHorizon"]), 1), } def teleport( self, pose: Dict[str, float], rotation: Dict[str, float], horizon: float = 0.0, agent_id: int = 0, ): assert 0 <= agent_id < self.agent_count try: e = self.controller.step( action="TeleportFull", x=pose["x"], y=pose["y"], z=pose["z"], rotation=rotation, horizon=horizon, agentId=agent_id, **self._extra_teleport_kwargs, ) except ValueError as e: if len(self._extra_teleport_kwargs) == 0: self._extra_teleport_kwargs["standing"] = True else: raise e return self.teleport( pose=pose, rotation=rotation, horizon=horizon, agent_id=agent_id ) return e.metadata["lastActionSuccess"] def reset( self, scene_name: str = None, filtered_objects: Optional[List[str]] = None ) -> None: """Resets scene to a known initial state.""" if scene_name is not None and scene_name != self.scene_name: self.controller.reset(scene_name) assert self.last_action_success, "Could not reset to new scene" if ( self.all_metadata_available and scene_name not in self.scene_to_reachable_positions ): self.scene_to_reachable_positions[scene_name] = copy.deepcopy( self.currently_reachable_points ) assert len(self.scene_to_reachable_positions[scene_name]) > 10 if filtered_objects: self.set_object_filter(filtered_objects) else: self.reset_object_filter() def random_reachable_state( self, seed: Optional[int] = None ) -> Dict[str, Union[Dict[str, float], float]]: """Returns a random reachable location in the scene.""" assert ( self.all_metadata_available ), "`random_reachable_state` cannot be called when `self.all_metadata_available` is `False`." if seed is not None: random.seed(seed) # xyz = random.choice(self.currently_reachable_points) assert len(self.scene_to_reachable_positions[self.scene_name]) > 10 xyz = copy.deepcopy( random.choice(self.scene_to_reachable_positions[self.scene_name]) ) rotation = random.choice( np.arange(0.0, 360.0, self.config["rotateStepDegrees"]) ) horizon = 0.0 # random.choice([0.0, 30.0, 330.0]) return { **{k: float(v) for k, v in xyz.items()}, "rotation": {"x": 0.0, "y": float(rotation), "z": 0.0}, "horizon": float(horizon), } def randomize_agent_location( self, seed: int = None, partial_position: Optional[Dict[str, float]] = None, agent_id: int = 0, ) -> Dict[str, Union[Dict[str, float], float]]: """Teleports the agent to a random reachable location in the scene.""" assert 0 <= agent_id < self.agent_count if partial_position is None: partial_position = {} k = 0 state: Optional[Dict] = None while k == 0 or (not self.last_action_success and k < 10): # self.reset() state = {**self.random_reachable_state(seed=seed), **partial_position} # get_logger().debug("picked target location {}".format(state)) self.controller.step("TeleportFull", **state, agentId=agent_id) k += 1 if not self.last_action_success: get_logger().warning( ( "Randomize agent location in scene {} and current random state {}" " with seed {} and partial position {} failed in " "10 attempts. Forcing the action." ).format(self.scene_name, state, seed, partial_position) ) self.controller.step("TeleportFull", **state, force_action=True, agentId=agent_id) # type: ignore assert self.last_action_success, "Force action failed with {}".format(state) # get_logger().debug("location after teleport full {}".format(self.agent_state())) # self.controller.step("TeleportFull", **self.agent_state()) # TODO only for debug # get_logger().debug("location after re-teleport full {}".format(self.agent_state())) return self.agent_state(agent_id=agent_id) def known_good_locations_list(self): assert ( self.all_metadata_available ), "`known_good_locations_list` cannot be called when `self.all_metadata_available` is `False`." return self.scene_to_reachable_positions[self.scene_name] @property def currently_reachable_points(self) -> List[Dict[str, float]]: """List of {"x": x, "y": y, "z": z} locations in the scene that are currently reachable.""" self.controller.step(action="GetReachablePositions") assert ( self.last_action_success ), f"Could not get reachable positions for reason {self.last_event.metadata['errorMessage']}." return self.last_action_return @property def scene_name(self) -> str: """Current ai2thor scene.""" return self.controller.last_event.metadata["sceneName"].replace("_physics", "") @property def current_frame(self) -> np.ndarray: """Returns rgb image corresponding to the agent's egocentric view.""" return self.controller.last_event.frame @property def current_depth(self) -> np.ndarray: """Returns depth image corresponding to the agent's egocentric view.""" return self.controller.last_event.depth_frame @property def current_frames(self) -> List[np.ndarray]: """Returns rgb images corresponding to the agents' egocentric views.""" return [ self.controller.last_event.events[agent_id].frame for agent_id in range(self.agent_count) ] @property def current_depths(self) -> List[np.ndarray]: """Returns depth images corresponding to the agents' egocentric views.""" return [ self.controller.last_event.events[agent_id].depth_frame for agent_id in range(self.agent_count) ] @property def last_event(self) -> ai2thor.server.Event: """Last event returned by the controller.""" return self.controller.last_event @property def last_action(self) -> str: """Last action, as a string, taken by the agent.""" return self.controller.last_event.metadata["lastAction"] @property def last_action_success(self) -> bool: """Was the last action taken by the agent a success?""" return self.controller.last_event.metadata["lastActionSuccess"] @property def last_action_return(self) -> Any: """Get the value returned by the last action (if applicable). For an example of an action that returns a value, see `"GetReachablePositions"`. """ return self.controller.last_event.metadata["actionReturn"] def step( self, action_dict: Optional[Dict[str, Union[str, int, float, Dict]]] = None, **kwargs: Union[str, int, float, Dict], ) -> ai2thor.server.Event: """Take a step in the ai2thor environment.""" if action_dict is None: action_dict = dict() action_dict.update(kwargs) return self.controller.step(**action_dict) def stop(self): """Stops the ai2thor controller.""" try: self.controller.stop() except Exception as e: get_logger().warning(str(e)) def all_objects(self) -> List[Dict[str, Any]]: """Return all object metadata.""" return self.controller.last_event.metadata["objects"] def all_objects_with_properties( self, properties: Dict[str, Any] ) -> List[Dict[str, Any]]: """Find all objects with the given properties.""" objects = [] for o in self.all_objects(): satisfies_all = True for k, v in properties.items(): if o[k] != v: satisfies_all = False break if satisfies_all: objects.append(o) return objects def visible_objects(self) -> List[Dict[str, Any]]: """Return all visible objects.""" return self.all_objects_with_properties({"visible": True}) ================================================ FILE: allenact_plugins/robothor_plugin/robothor_models.py ================================================ from typing import Tuple, Optional import gym import torch from gym.spaces import Dict as SpaceDict from allenact.algorithms.onpolicy_sync.policy import ( ActorCriticModel, LinearActorCriticHead, DistributionType, Memory, ObservationType, ) from allenact.base_abstractions.misc import ActorCriticOutput from allenact.embodiedai.models.basic_models import RNNStateEncoder, SimpleCNN from allenact_plugins.robothor_plugin.robothor_distributions import ( TupleCategoricalDistr, ) class TupleLinearActorCriticHead(LinearActorCriticHead): def forward(self, x): out = self.actor_and_critic(x) logits = out[..., :-1] values = out[..., -1:] # noinspection PyArgumentList return ( TupleCategoricalDistr(logits=logits), # [steps, samplers, ...] values.view(*values.shape[:2], -1), # [steps, samplers, flattened] ) class NavToPartnerActorCriticSimpleConvRNN(ActorCriticModel[TupleCategoricalDistr]): action_space: gym.spaces.Tuple def __init__( self, action_space: gym.spaces.Tuple, observation_space: SpaceDict, rgb_uuid: Optional[str] = "rgb", hidden_size=512, num_rnn_layers=1, rnn_type="GRU", ): super().__init__(action_space=action_space, observation_space=observation_space) self._hidden_size = hidden_size self.rgb_uuid = rgb_uuid self.visual_encoder = SimpleCNN( observation_space=observation_space, output_size=hidden_size, rgb_uuid=self.rgb_uuid, depth_uuid=None, ) self.state_encoder = RNNStateEncoder( 0 if self.is_blind else self.recurrent_hidden_state_size, self._hidden_size, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor_critic = TupleLinearActorCriticHead( self._hidden_size, action_space[0].n ) self.train() @property def output_size(self): return self._hidden_size @property def is_blind(self): return self.visual_encoder.is_blind @property def num_recurrent_layers(self): return self.state_encoder.num_recurrent_layers @property def recurrent_hidden_state_size(self): return self._hidden_size @property def num_agents(self): return len(self.action_space) def _recurrent_memory_specification(self): return dict( rnn=( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("agent", self.num_agents), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) ) def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: if not self.is_blind: perception_embed = self.visual_encoder(observations) else: # TODO manage blindness for all agents simultaneously or separate? raise NotImplementedError() # TODO alternative where all agents consume all observations x, rnn_hidden_states = self.state_encoder( perception_embed, memory.tensor("rnn"), masks ) dists, vals = self.actor_critic(x) return ( ActorCriticOutput( distributions=dists, values=vals, extras={}, ), memory.set_tensor("rnn", rnn_hidden_states), ) ================================================ FILE: allenact_plugins/robothor_plugin/robothor_preprocessors.py ================================================ from collections import OrderedDict from typing import Dict, Any, Optional, List, cast import gym import numpy as np import torch from gym.spaces.dict import Dict as SpaceDict from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.cacheless_frcnn import fasterrcnn_resnet50_fpn from allenact.utils.misc_utils import prepare_locals_for_super class BatchedFasterRCNN(torch.nn.Module): # fmt: off COCO_INSTANCE_CATEGORY_NAMES = [ '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # fmt: on def __init__(self, thres=0.12, maxdets=3, res=7): super().__init__() self.model = fasterrcnn_resnet50_fpn(pretrained=True) self.eval() self.min_score = thres self.maxdets = maxdets self.res = res def detector_tensor(self, boxes, classes, scores, aspect_ratio=1.0): res, maxdets = self.res, self.maxdets bins = np.array(list(range(res + 1)))[1:-1] / res res_classes = torch.zeros( res, res, maxdets, dtype=torch.int64 ) # 0 is background res_boxes = -1 * torch.ones( res, res, maxdets, 5 ) # regular range is [0, 1] (vert) or [0, aspect_ratio] (horiz) temp = [[[] for _ in range(res)] for _ in range(res)] # grid of arrays # # TODO Debug # print('NEW IMAGE') for it in range(classes.shape[0]): cx = (boxes[it, 0].item() + boxes[it, 2].item()) / 2 cy = (boxes[it, 1].item() + boxes[it, 3].item()) / 2 px = np.digitize(cx, bins=aspect_ratio * bins).item() py = np.digitize(cy, bins=bins).item() temp[py][px].append( ( scores[it][classes[it]].item(), # prob (boxes[it, 2] - boxes[it, 0]).item() / aspect_ratio, # width (boxes[it, 3] - boxes[it, 1]).item(), # height boxes[it, 0].item() / aspect_ratio, # x boxes[it, 1].item(), # y classes[it].item(), # class ) ) # # TODO Debug: # print(self.COCO_INSTANCE_CATEGORY_NAMES[classes[it].item()]) for py in range(res): for px in range(res): order = sorted(temp[py][px], reverse=True)[:maxdets] for it, data in enumerate(order): res_classes[py, px, it] = data[-1] res_boxes[py, px, it, :] = torch.tensor( list(data[:-1]) ) # prob, size, top left res_classes = res_classes.permute(2, 0, 1).unsqueeze(0).contiguous() res_boxes = ( res_boxes.view(res, res, -1).permute(2, 0, 1).unsqueeze(0).contiguous() ) return res_classes, res_boxes def forward(self, imbatch): with torch.no_grad(): imglist = [im_in.squeeze(0) for im_in in imbatch.split(split_size=1, dim=0)] # # TODO Debug # import cv2 # for it, im_in in enumerate(imglist): # cvim = 255.0 * im_in.to('cpu').permute(1, 2, 0).numpy()[:, :, ::-1] # cv2.imwrite('test_highres{}.png'.format(it), cvim) preds = self.model(imglist) keeps = [ pred["scores"] > self.min_score for pred in preds ] # already after nms # [0, 1] for rows, [0, aspect_ratio] for cols (im_in is C x H x W), with all images of same size (batch) all_boxes = [ pred["boxes"][keep] / imbatch.shape[-2] for pred, keep in zip(preds, keeps) ] all_classes = [pred["labels"][keep] for pred, keep in zip(preds, keeps)] all_pred_scores = [pred["scores"][keep] for pred, keep in zip(preds, keeps)] # hack: fill in a full prob score (all classes, 0 score if undetected) for each box, for backwards compatibility all_scores = [ torch.zeros(pred_scores.shape[0], 91, device=pred_scores.device) for pred_scores in all_pred_scores ] all_scores = [ torch.where( torch.arange(91, device=pred_scores.device).unsqueeze(0) == merged_classes.unsqueeze(1), pred_scores.unsqueeze(1), scores, ) for merged_classes, pred_scores, scores in zip( all_classes, all_pred_scores, all_scores ) ] all_classes_boxes = [ self.detector_tensor( boxes, classes, scores, aspect_ratio=imbatch.shape[-1] / imbatch.shape[-2], ) for boxes, classes, scores in zip(all_boxes, all_classes, all_scores) ] classes = torch.cat( [classes_boxes[0] for classes_boxes in all_classes_boxes], dim=0 ).to(imbatch.device) boxes = torch.cat( [classes_boxes[1] for classes_boxes in all_classes_boxes], dim=0 ).to(imbatch.device) return classes, boxes class FasterRCNNPreProcessorRoboThor(Preprocessor): """Preprocess RGB image using a ResNet model.""" COCO_INSTANCE_CATEGORY_NAMES = BatchedFasterRCNN.COCO_INSTANCE_CATEGORY_NAMES def __init__( self, input_uuids: List[str], output_uuid: str, input_height: int, input_width: int, max_dets: int, detector_spatial_res: int, detector_thres: float, device: Optional[torch.device] = None, device_ids: Optional[List[torch.device]] = None, **kwargs: Any, ): self.input_height = input_height self.input_width = input_width self.max_dets = max_dets self.detector_spatial_res = detector_spatial_res self.detector_thres = detector_thres self.device = torch.device("cpu") if device is None else device self.device_ids = device_ids or cast( List[torch.device], list(range(torch.cuda.device_count())) ) self.frcnn: BatchedFasterRCNN = BatchedFasterRCNN( thres=self.detector_thres, maxdets=self.max_dets, res=self.detector_spatial_res, ) spaces: OrderedDict[str, gym.Space] = OrderedDict() shape = (self.max_dets, self.detector_spatial_res, self.detector_spatial_res) spaces["frcnn_classes"] = gym.spaces.Box( low=0, # 0 is bg high=len(self.COCO_INSTANCE_CATEGORY_NAMES) - 1, shape=shape, dtype=np.int64, ) shape = ( self.max_dets * 5, self.detector_spatial_res, self.detector_spatial_res, ) spaces["frcnn_boxes"] = gym.spaces.Box(low=-np.inf, high=np.inf, shape=shape) assert ( len(input_uuids) == 1 ), "fasterrcnn preprocessor can only consume one observation type" observation_space = SpaceDict(spaces=spaces) super().__init__(**prepare_locals_for_super(locals())) def to(self, device: torch.device) -> "FasterRCNNPreProcessorRoboThor": self.frcnn = self.frcnn.to(device) self.device = device return self def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: frames_tensor = ( obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) ) # bhwc -> bchw (unnormalized) classes, boxes = self.frcnn(frames_tensor) return {"frcnn_classes": classes, "frcnn_boxes": boxes} ================================================ FILE: allenact_plugins/robothor_plugin/robothor_sensors.py ================================================ from typing import Any, Tuple, Optional import ai2thor.controller import gym import numpy as np import quaternion # noqa # pylint: disable=unused-import from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.misc_utils import prepare_locals_for_super from allenact.utils.system import get_logger from allenact_plugins.ithor_plugin.ithor_sensors import ( RGBSensorThor, THOR_ENV_TYPE, THOR_TASK_TYPE, ) from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask class RGBSensorRoboThor(RGBSensorThor): """Sensor for RGB images in RoboTHOR. Returns from a running RoboThorEnvironment instance, the current RGB frame corresponding to the agent's egocentric view. """ def __init__(self, *args: Any, **kwargs: Any): get_logger().warning( "`RGBSensorRoboThor` is deprecated, use `RGBSensorThor` instead." ) super().__init__(*args, **kwargs) class RGBSensorMultiRoboThor(RGBSensor[RoboThorEnvironment, Task[RoboThorEnvironment]]): """Sensor for RGB images in RoboTHOR. Returns from a running RoboThorEnvironment instance, the current RGB frame corresponding to the agent's egocentric view. """ def __init__(self, agent_count: int = 2, **kwargs): # TODO take all named args from superclass and pass with super().__init__(**prepare_locals_for_super(locals())) super().__init__(**kwargs) self.agent_count = agent_count self.agent_id = 0 def frame_from_env( self, env: RoboThorEnvironment, task: Optional[Task[RoboThorEnvironment]] ) -> np.ndarray: return env.current_frames[self.agent_id].copy() def get_observation( self, env: RoboThorEnvironment, task: Task[RoboThorEnvironment], *args: Any, **kwargs: Any ) -> Any: obs = [] for self.agent_id in range(self.agent_count): obs.append(super().get_observation(env, task, *args, **kwargs)) return np.stack(obs, axis=0) # agents x width x height x channels class GPSCompassSensorRoboThor(Sensor[RoboThorEnvironment, PointNavTask]): def __init__(self, uuid: str = "target_coordinates_ind", **kwargs: Any): observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals())) def _get_observation_space(self): return gym.spaces.Box( low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=(2,), dtype=np.float32, ) @staticmethod def _compute_pointgoal( source_position: np.ndarray, source_rotation: np.quaternion, goal_position: np.ndarray, ): direction_vector = goal_position - source_position direction_vector_agent = GPSCompassSensorRoboThor.quaternion_rotate_vector( source_rotation.inverse(), direction_vector ) rho, phi = GPSCompassSensorRoboThor.cartesian_to_polar( direction_vector_agent[2], -direction_vector_agent[0] ) return np.array([rho, phi], dtype=np.float32) @staticmethod def quaternion_from_y_angle(angle: float) -> np.quaternion: r"""Creates a quaternion from rotation angle around y axis""" return GPSCompassSensorRoboThor.quaternion_from_coeff( np.array( [0.0, np.sin(np.pi * angle / 360.0), 0.0, np.cos(np.pi * angle / 360.0)] ) ) @staticmethod def quaternion_from_coeff(coeffs: np.ndarray) -> np.quaternion: r"""Creates a quaternions from coeffs in [x, y, z, w] format""" quat = np.quaternion(0, 0, 0, 0) quat.real = coeffs[3] quat.imag = coeffs[0:3] return quat @staticmethod def cartesian_to_polar(x, y): rho = np.sqrt(x**2 + y**2) phi = np.arctan2(y, x) return rho, phi @staticmethod def quaternion_rotate_vector(quat: np.quaternion, v: np.array) -> np.array: r"""Rotates a vector by a quaternion Args: quat: The quaternion to rotate by v: The vector to rotate Returns: np.array: The rotated vector """ vq = np.quaternion(0, 0, 0, 0) vq.imag = v return (quat * vq * quat.inverse()).imag def get_observation( self, env: RoboThorEnvironment, task: Optional[PointNavTask], *args: Any, **kwargs: Any ) -> Any: agent_state = env.agent_state() agent_position = np.array([agent_state[k] for k in ["x", "y", "z"]]) rotation_world_agent = self.quaternion_from_y_angle( agent_state["rotation"]["y"] ) goal_position = np.array([task.task_info["target"][k] for k in ["x", "y", "z"]]) return self._compute_pointgoal( agent_position, rotation_world_agent, goal_position ) class DepthSensorThor( DepthSensor[ THOR_ENV_TYPE, THOR_TASK_TYPE, ], ): def __init__( self, use_resnet_normalization: Optional[bool] = None, use_normalization: Optional[bool] = None, mean: Optional[np.ndarray] = np.array([[0.5]], dtype=np.float32), stdev: Optional[np.ndarray] = np.array([[0.25]], dtype=np.float32), height: Optional[int] = None, width: Optional[int] = None, uuid: str = "depth", output_shape: Optional[Tuple[int, ...]] = None, output_channels: int = 1, unnormalized_infimum: float = 0.0, unnormalized_supremum: float = 5.0, scale_first: bool = False, **kwargs: Any ): # Give priority to use_normalization, but use_resnet_normalization for backward compat. if not set if use_resnet_normalization is not None and use_normalization is None: use_normalization = use_resnet_normalization elif use_normalization is None: use_normalization = False super().__init__(**prepare_locals_for_super(locals())) def frame_from_env( self, env: THOR_ENV_TYPE, task: Optional[THOR_TASK_TYPE] ) -> np.ndarray: if not isinstance(env, ai2thor.controller.Controller): return env.controller.last_event.depth_frame return env.last_event.depth_frame class DepthSensorRoboThor(DepthSensorThor): # For backwards compatibility def __init__(self, *args: Any, **kwargs: Any): get_logger().warning( "`DepthSensorRoboThor` is deprecated, use `DepthSensorThor` instead." ) super().__init__(*args, **kwargs) ================================================ FILE: allenact_plugins/robothor_plugin/robothor_task_samplers.py ================================================ import copy import gzip import json import random from typing import List, Optional, Union, Dict, Any, cast, Tuple import gym from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import TaskSampler from allenact.utils.cache_utils import str_to_pos_for_cache from allenact.utils.experiment_utils import set_seed, set_deterministic_cudnn from allenact.utils.system import get_logger from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment from allenact_plugins.robothor_plugin.robothor_tasks import ( ObjectNavTask, PointNavTask, NavToPartnerTask, ) class ObjectNavTaskSampler(TaskSampler): def __init__( self, scenes: Union[List[str], str], object_types: List[str], sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, rewards_config: Dict, scene_period: Optional[Union[int, str]] = None, max_tasks: Optional[int] = None, seed: Optional[int] = None, deterministic_cudnn: bool = False, allow_flipping: bool = False, dataset_first: int = -1, dataset_last: int = -1, **kwargs, ) -> None: self.rewards_config = rewards_config self.env_args = env_args self.scenes = scenes self.object_types = object_types self.env: Optional[RoboThorEnvironment] = None self.sensors = sensors self.max_steps = max_steps self._action_space = action_space self.allow_flipping = allow_flipping self.scenes_is_dataset = (dataset_first >= 0) or (dataset_last >= 0) if not self.scenes_is_dataset: assert isinstance( self.scenes, List ), "When not using a dataset, scenes ({}) must be a list".format( self.scenes ) self.scene_counter: Optional[int] = None self.scene_order: Optional[List[str]] = None self.scene_id: Optional[int] = None self.scene_period: Optional[Union[str, int]] = ( scene_period # default makes a random choice ) self.max_tasks: Optional[int] = None self.reset_tasks = max_tasks else: assert isinstance( self.scenes, str ), "When using a dataset, scenes ({}) must be a json file name string".format( self.scenes ) with open(self.scenes, "r") as f: self.dataset_episodes = json.load(f) # get_logger().debug("Loaded {} object nav episodes".format(len(self.dataset_episodes))) self.dataset_first = dataset_first if dataset_first >= 0 else 0 self.dataset_last = ( dataset_last if dataset_last >= 0 else len(self.dataset_episodes) - 1 ) assert ( 0 <= self.dataset_first <= self.dataset_last ), "dataset_last {} must be >= dataset_first {} >= 0".format( dataset_last, dataset_first ) self.reset_tasks = self.dataset_last - self.dataset_first + 1 # get_logger().debug("{} tasks ({}, {}) in sampler".format(self.reset_tasks, self.dataset_first, self.dataset_last)) self._last_sampled_task: Optional[ObjectNavTask] = None self.seed: Optional[int] = None self.set_seed(seed) if deterministic_cudnn: set_deterministic_cudnn() self.reset() def _create_environment(self) -> RoboThorEnvironment: env = RoboThorEnvironment(**self.env_args) return env @property def length(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks @property def total_unique(self) -> Optional[Union[int, float]]: return self.reset_tasks @property def last_sampled_task(self) -> Optional[ObjectNavTask]: return self._last_sampled_task def close(self) -> None: if self.env is not None: self.env.stop() @property def all_observation_spaces_equal(self) -> bool: """Check if observation spaces equal. # Returns True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ return True def sample_scene(self, force_advance_scene: bool): if force_advance_scene: if self.scene_period != "manual": get_logger().warning( "When sampling scene, have `force_advance_scene == True`" "but `self.scene_period` is not equal to 'manual'," "this may cause unexpected behavior." ) self.scene_id = (1 + self.scene_id) % len(self.scenes) if self.scene_id == 0: random.shuffle(self.scene_order) if self.scene_period is None: # Random scene self.scene_id = random.randint(0, len(self.scenes) - 1) elif self.scene_period == "manual": pass elif self.scene_counter >= cast(int, self.scene_period): if self.scene_id == len(self.scene_order) - 1: # Randomize scene order for next iteration random.shuffle(self.scene_order) # Move to next scene self.scene_id = 0 else: # Move to next scene self.scene_id += 1 # Reset scene counter self.scene_counter = 1 elif isinstance(self.scene_period, int): # Stay in current scene self.scene_counter += 1 else: raise NotImplementedError( "Invalid scene_period {}".format(self.scene_period) ) if self.max_tasks is not None: self.max_tasks -= 1 return self.scenes[int(self.scene_order[self.scene_id])] # def sample_episode(self, scene): # self.scene_counters[scene] = (self.scene_counters[scene] + 1) % len(self.scene_to_episodes[scene]) # if self.scene_counters[scene] == 0: # random.shuffle(self.scene_to_episodes[scene]) # return self.scene_to_episodes[scene][self.scene_counters[scene]] def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: # get_logger().debug("max_tasks {}".format(self.max_tasks)) return None if not self.scenes_is_dataset: scene = self.sample_scene(force_advance_scene) if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) pose = self.env.randomize_agent_location() object_types_in_scene = set( [o["objectType"] for o in self.env.last_event.metadata["objects"]] ) task_info = {"scene": scene} for ot in random.sample(self.object_types, len(self.object_types)): if ot in object_types_in_scene: task_info["object_type"] = ot break if len(task_info) == 0: get_logger().warning( "Scene {} does not contain any" " objects of any of the types {}.".format(scene, self.object_types) ) task_info["initial_position"] = {k: pose[k] for k in ["x", "y", "z"]} task_info["initial_orientation"] = cast(Dict[str, float], pose["rotation"])[ "y" ] else: assert self.max_tasks is not None next_task_id = self.dataset_first + self.max_tasks - 1 # get_logger().debug("task {}".format(next_task_id)) assert ( self.dataset_first <= next_task_id <= self.dataset_last ), "wrong task_id {} for min {} max {}".format( next_task_id, self.dataset_first, self.dataset_last ) task_info = copy.deepcopy(self.dataset_episodes[next_task_id]) scene = task_info["scene"] if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene_name=scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) self.env.step( { "action": "TeleportFull", **{k: float(v) for k, v in task_info["initial_position"].items()}, "rotation": { "x": 0.0, "y": float(task_info["initial_orientation"]), "z": 0.0, }, "horizon": 0.0, "standing": True, } ) assert self.env.last_action_success, "Failed to reset agent for {}".format( task_info ) self.max_tasks -= 1 # task_info["actions"] = [] # TODO populated by Task(Generic[EnvType]).step(...) but unused if self.allow_flipping and random.random() > 0.5: task_info["mirrored"] = True else: task_info["mirrored"] = False self._last_sampled_task = ObjectNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task def reset(self): if not self.scenes_is_dataset: self.scene_counter = 0 self.scene_order = list(range(len(self.scenes))) random.shuffle(self.scene_order) self.scene_id = 0 self.max_tasks = self.reset_tasks def set_seed(self, seed: int): self.seed = seed if seed is not None: set_seed(seed) class ObjectNavDatasetTaskSampler(TaskSampler): def __init__( self, scenes: List[str], scene_directory: str, sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, rewards_config: Dict, seed: Optional[int] = None, deterministic_cudnn: bool = False, loop_dataset: bool = True, allow_flipping=False, env_class=RoboThorEnvironment, randomize_materials_in_training: bool = False, **kwargs, ) -> None: self.rewards_config = rewards_config self.env_args = env_args self.scenes = scenes self.episodes = { scene: ObjectNavDatasetTaskSampler.load_dataset( scene, scene_directory + "/episodes" ) for scene in scenes } # Only keep episodes containing desired objects if "object_types" in kwargs: self.episodes = { scene: [ ep for ep in episodes if ep["object_type"] in kwargs["object_types"] ] for scene, episodes in self.episodes.items() } self.episodes = { scene: episodes for scene, episodes in self.episodes.items() if len(episodes) > 0 } self.scenes = [scene for scene in self.scenes if scene in self.episodes] self.env_class = env_class self.object_types = [ ep["object_type"] for scene in self.episodes for ep in self.episodes[scene] ] self.env: Optional[RoboThorEnvironment] = None self.sensors = sensors self.max_steps = max_steps self._action_space = action_space self.allow_flipping = allow_flipping self.scene_counter: Optional[int] = None self.scene_order: Optional[List[str]] = None self.scene_id: Optional[int] = None # get the total number of tasks assigned to this process if loop_dataset: self.max_tasks = None else: self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes) self.reset_tasks = self.max_tasks self.scene_index = 0 self.episode_index = 0 self.randomize_materials_in_training = randomize_materials_in_training self._last_sampled_task: Optional[ObjectNavTask] = None self.seed: Optional[int] = None self.set_seed(seed) if deterministic_cudnn: set_deterministic_cudnn() self.reset() def _create_environment(self) -> RoboThorEnvironment: env = self.env_class(**self.env_args) return env @staticmethod def load_dataset(scene: str, base_directory: str) -> List[Dict]: filename = ( "/".join([base_directory, scene]) if base_directory[-1] != "/" else "".join([base_directory, scene]) ) filename += ".json.gz" fin = gzip.GzipFile(filename, "r") json_bytes = fin.read() fin.close() json_str = json_bytes.decode("utf-8") data = json.loads(json_str) random.shuffle(data) return data @staticmethod def load_distance_cache_from_file(scene: str, base_directory: str) -> Dict: filename = ( "/".join([base_directory, scene]) if base_directory[-1] != "/" else "".join([base_directory, scene]) ) filename += ".json.gz" fin = gzip.GzipFile(filename, "r") json_bytes = fin.read() fin.close() json_str = json_bytes.decode("utf-8") data = json.loads(json_str) return data @property def __len__(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks @property def total_unique(self) -> Optional[Union[int, float]]: return self.reset_tasks @property def last_sampled_task(self) -> Optional[ObjectNavTask]: return self._last_sampled_task def close(self) -> None: if self.env is not None: self.env.stop() @property def all_observation_spaces_equal(self) -> bool: """Check if observation spaces equal. # Returns True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ return True @property def length(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]): self.scene_index = (self.scene_index + 1) % len(self.scenes) # shuffle the new list of episodes to train on random.shuffle(self.episodes[self.scenes[self.scene_index]]) self.episode_index = 0 scene = self.scenes[self.scene_index] episode = self.episodes[scene][self.episode_index] if self.env is None: self.env = self._create_environment() if scene.replace("_physics", "") != self.env.scene_name.replace("_physics", ""): self.env.reset(scene_name=scene) else: self.env.reset_object_filter() self.env.set_object_filter( object_ids=[ o["objectId"] for o in self.env.last_event.metadata["objects"] if o["objectType"] == episode["object_type"] ] ) # only randomize materials in train scenes were_materials_randomized = False if self.randomize_materials_in_training: if ( "Train" in scene or int(scene.replace("FloorPlan", "").replace("_physics", "")) % 100 < 21 ): were_materials_randomized = True self.env.controller.step(action="RandomizeMaterials") task_info = { "scene": scene, "object_type": episode["object_type"], "materials_randomized": were_materials_randomized, } if len(task_info) == 0: get_logger().warning( "Scene {} does not contain any" " objects of any of the types {}.".format(scene, self.object_types) ) task_info["initial_position"] = episode["initial_position"] task_info["initial_orientation"] = episode["initial_orientation"] task_info["initial_horizon"] = episode.get("initial_horizon", 0) task_info["distance_to_target"] = episode.get("shortest_path_length") task_info["path_to_target"] = episode.get("shortest_path") task_info["object_type"] = episode["object_type"] task_info["id"] = episode["id"] if self.allow_flipping and random.random() > 0.5: task_info["mirrored"] = True else: task_info["mirrored"] = False self.episode_index += 1 if self.max_tasks is not None: self.max_tasks -= 1 if not self.env.teleport( pose=episode["initial_position"], rotation=episode["initial_orientation"], horizon=episode.get("initial_horizon", 0), ): return self.next_task() self._last_sampled_task = ObjectNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task def reset(self): self.episode_index = 0 self.scene_index = 0 self.max_tasks = self.reset_tasks def set_seed(self, seed: int): self.seed = seed if seed is not None: set_seed(seed) class PointNavTaskSampler(TaskSampler): def __init__( self, scenes: List[str], # object_types: List[str], # scene_to_episodes: List[Dict[str, Any]], sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, rewards_config: Dict, scene_period: Optional[Union[int, str]] = None, max_tasks: Optional[int] = None, seed: Optional[int] = None, deterministic_cudnn: bool = False, **kwargs, ) -> None: self.rewards_config = rewards_config self.env_args = env_args self.scenes = scenes # self.object_types = object_types # self.scene_to_episodes = scene_to_episodes # self.scene_counters = {scene: -1 for scene in self.scene_to_episodes} # self.scenes = list(self.scene_to_episodes.keys()) self.env: Optional[RoboThorEnvironment] = None self.sensors = sensors self.max_steps = max_steps self._action_space = action_space self.scene_counter: Optional[int] = None self.scene_order: Optional[List[str]] = None self.scene_id: Optional[int] = None self.scene_period: Optional[Union[str, int]] = ( scene_period # default makes a random choice ) self.max_tasks: Optional[int] = None self.reset_tasks = max_tasks self._last_sampled_task: Optional[PointNavTask] = None self.seed: Optional[int] = None self.set_seed(seed) if deterministic_cudnn: set_deterministic_cudnn() self.reset() def _create_environment(self) -> RoboThorEnvironment: env = RoboThorEnvironment(**self.env_args) return env @property def length(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks @property def total_unique(self) -> Optional[Union[int, float]]: # total = 0 # for scene in self.scene_to_episodes: # total += len(self.scene_to_episodes[scene]) # return total return self.reset_tasks @property def last_sampled_task(self) -> Optional[PointNavTask]: return self._last_sampled_task def close(self) -> None: if self.env is not None: self.env.stop() @property def all_observation_spaces_equal(self) -> bool: """Check if observation spaces equal. # Returns True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ return True def sample_scene(self, force_advance_scene: bool): if force_advance_scene: if self.scene_period != "manual": get_logger().warning( "When sampling scene, have `force_advance_scene == True`" "but `self.scene_period` is not equal to 'manual'," "this may cause unexpected behavior." ) self.scene_id = (1 + self.scene_id) % len(self.scenes) if self.scene_id == 0: random.shuffle(self.scene_order) if self.scene_period is None: # Random scene self.scene_id = random.randint(0, len(self.scenes) - 1) elif self.scene_period == "manual": pass elif self.scene_counter >= cast(int, self.scene_period): if self.scene_id == len(self.scene_order) - 1: # Randomize scene order for next iteration random.shuffle(self.scene_order) # Move to next scene self.scene_id = 0 else: # Move to next scene self.scene_id += 1 # Reset scene counter self.scene_counter = 1 elif isinstance(self.scene_period, int): # Stay in current scene self.scene_counter += 1 else: raise NotImplementedError( "Invalid scene_period {}".format(self.scene_period) ) if self.max_tasks is not None: self.max_tasks -= 1 return self.scenes[int(self.scene_order[self.scene_id])] # def sample_episode(self, scene): # self.scene_counters[scene] = (self.scene_counters[scene] + 1) % len(self.scene_to_episodes[scene]) # if self.scene_counters[scene] == 0: # random.shuffle(self.scene_to_episodes[scene]) # return self.scene_to_episodes[scene][self.scene_counters[scene]] def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None scene = self.sample_scene(force_advance_scene) if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene_name=scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) # task_info = copy.deepcopy(self.sample_episode(scene)) # task_info['target'] = task_info['target_position'] # task_info['actions'] = [] locs = self.env.known_good_locations_list() # get_logger().debug("locs[0] {} locs[-1] {}".format(locs[0], locs[-1])) ys = [loc["y"] for loc in locs] miny = min(ys) maxy = max(ys) assert maxy - miny < 1e-6, "miny {} maxy {} for scene {}".format( miny, maxy, scene ) too_close_to_target = True target: Optional[Dict[str, float]] = None for _ in range(10): self.env.randomize_agent_location() target = copy.copy(random.choice(locs)) too_close_to_target = self.env.distance_to_point(target) <= 0 if not too_close_to_target: break pose = self.env.agent_state() task_info = { "scene": scene, "initial_position": {k: pose[k] for k in ["x", "y", "z"]}, "initial_orientation": pose["rotation"]["y"], "target": target, "actions": [], } if too_close_to_target: get_logger().warning("No path for sampled episode {}".format(task_info)) # else: # get_logger().debug("Path found for sampled episode {}".format(task_info)) # pose = {**task_info['initial_position'], 'rotation': {'x': 0.0, 'y': task_info['initial_orientation'], 'z': 0.0}, 'horizon': 0.0} # self.env.step({"action": "TeleportFull", **pose}) # assert self.env.last_action_success, "Failed to initialize agent to {} in {} for epsiode {}".format(pose, scene, task_info) self._last_sampled_task = PointNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task def reset(self): self.scene_counter = 0 self.scene_order = list(range(len(self.scenes))) random.shuffle(self.scene_order) self.scene_id = 0 self.max_tasks = self.reset_tasks # for scene in self.scene_to_episodes: # random.shuffle(self.scene_to_episodes[scene]) # for scene in self.scene_counters: # self.scene_counters[scene] = -1 def set_seed(self, seed: int): self.seed = seed if seed is not None: set_seed(seed) class PointNavDatasetTaskSampler(TaskSampler): def __init__( self, scenes: List[str], scene_directory: str, sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, rewards_config: Dict, seed: Optional[int] = None, deterministic_cudnn: bool = False, loop_dataset: bool = True, shuffle_dataset: bool = True, allow_flipping=False, env_class=RoboThorEnvironment, **kwargs, ) -> None: self.rewards_config = rewards_config self.env_args = env_args self.scenes = scenes self.shuffle_dataset: bool = shuffle_dataset self.episodes = { scene: ObjectNavDatasetTaskSampler.load_dataset( scene, scene_directory + "/episodes" ) for scene in scenes } self.env_class = env_class self.env: Optional[RoboThorEnvironment] = None self.sensors = sensors self.max_steps = max_steps self._action_space = action_space self.allow_flipping = allow_flipping self.scene_counter: Optional[int] = None self.scene_order: Optional[List[str]] = None self.scene_id: Optional[int] = None # get the total number of tasks assigned to this process if loop_dataset: self.max_tasks = None else: self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes) self.reset_tasks = self.max_tasks self.scene_index = 0 self.episode_index = 0 self._last_sampled_task: Optional[PointNavTask] = None self.seed: Optional[int] = None self.set_seed(seed) if deterministic_cudnn: set_deterministic_cudnn() self.reset() def _create_environment(self) -> RoboThorEnvironment: env = self.env_class(**self.env_args) return env @property def __len__(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks @property def total_unique(self) -> Optional[Union[int, float]]: return self.reset_tasks @property def last_sampled_task(self) -> Optional[PointNavTask]: return self._last_sampled_task def close(self) -> None: if self.env is not None: self.env.stop() @property def all_observation_spaces_equal(self) -> bool: """Check if observation spaces equal. # Returns True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ return True def next_task(self, force_advance_scene: bool = False) -> Optional[PointNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]): self.scene_index = (self.scene_index + 1) % len(self.scenes) # shuffle the new list of episodes to train on if self.shuffle_dataset: random.shuffle(self.episodes[self.scenes[self.scene_index]]) self.episode_index = 0 scene = self.scenes[self.scene_index] episode = self.episodes[scene][self.episode_index] if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene_name=scene, filtered_objects=[]) else: self.env = self._create_environment() self.env.reset(scene_name=scene, filtered_objects=[]) def to_pos(s): if isinstance(s, (Dict, Tuple)): return s if isinstance(s, float): return {"x": 0, "y": s, "z": 0} return str_to_pos_for_cache(s) for k in ["initial_position", "initial_orientation", "target_position"]: episode[k] = to_pos(episode[k]) task_info = { "scene": scene, "initial_position": episode["initial_position"], "initial_orientation": episode["initial_orientation"], "target": episode["target_position"], "shortest_path": episode["shortest_path"], "distance_to_target": episode["shortest_path_length"], "id": episode["id"], } if self.allow_flipping and random.random() > 0.5: task_info["mirrored"] = True else: task_info["mirrored"] = False self.episode_index += 1 if self.max_tasks is not None: self.max_tasks -= 1 if not self.env.teleport( pose=episode["initial_position"], rotation=episode["initial_orientation"] ): return self.next_task() self._last_sampled_task = PointNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task def reset(self): self.episode_index = 0 self.scene_index = 0 self.max_tasks = self.reset_tasks def set_seed(self, seed: int): self.seed = seed if seed is not None: set_seed(seed) @property def length(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks class NavToPartnerTaskSampler(TaskSampler): def __init__( self, scenes: List[str], sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, rewards_config: Dict, scene_period: Optional[Union[int, str]] = None, max_tasks: Optional[int] = None, seed: Optional[int] = None, deterministic_cudnn: bool = False, **kwargs, ) -> None: self.rewards_config = rewards_config self.env_args = env_args self.scenes = scenes self.env: Optional[RoboThorEnvironment] = None self.sensors = sensors self.max_steps = max_steps self._action_space = action_space self.scene_counter: Optional[int] = None self.scene_order: Optional[List[str]] = None self.scene_id: Optional[int] = None self.scene_period: Optional[Union[str, int]] = ( scene_period # default makes a random choice ) self.max_tasks: Optional[int] = None self.reset_tasks = max_tasks self._last_sampled_task: Optional[NavToPartnerTask] = None self.seed: Optional[int] = None self.set_seed(seed) if deterministic_cudnn: set_deterministic_cudnn() self.reset() def _create_environment(self) -> RoboThorEnvironment: assert ( self.env_args["agentCount"] == 2 ), "NavToPartner is only defined for 2 agents!" env = RoboThorEnvironment(**self.env_args) return env @property def length(self) -> Union[int, float]: """Length. # Returns Number of total tasks remaining that can be sampled. Can be float('inf'). """ return float("inf") if self.max_tasks is None else self.max_tasks @property def total_unique(self) -> Optional[Union[int, float]]: return self.reset_tasks @property def last_sampled_task(self) -> Optional[NavToPartnerTask]: return self._last_sampled_task def close(self) -> None: if self.env is not None: self.env.stop() @property def all_observation_spaces_equal(self) -> bool: """Check if observation spaces equal. # Returns True if all Tasks that can be sampled by this sampler have the same observation space. Otherwise False. """ return True def sample_scene(self, force_advance_scene: bool): if force_advance_scene: if self.scene_period != "manual": get_logger().warning( "When sampling scene, have `force_advance_scene == True`" "but `self.scene_period` is not equal to 'manual'," "this may cause unexpected behavior." ) self.scene_id = (1 + self.scene_id) % len(self.scenes) if self.scene_id == 0: random.shuffle(self.scene_order) if self.scene_period is None: # Random scene self.scene_id = random.randint(0, len(self.scenes) - 1) elif self.scene_period == "manual": pass elif self.scene_counter >= cast(int, self.scene_period): if self.scene_id == len(self.scene_order) - 1: # Randomize scene order for next iteration random.shuffle(self.scene_order) # Move to next scene self.scene_id = 0 else: # Move to next scene self.scene_id += 1 # Reset scene counter self.scene_counter = 1 elif isinstance(self.scene_period, int): # Stay in current scene self.scene_counter += 1 else: raise NotImplementedError( "Invalid scene_period {}".format(self.scene_period) ) if self.max_tasks is not None: self.max_tasks -= 1 return self.scenes[int(self.scene_order[self.scene_id])] def next_task( self, force_advance_scene: bool = False ) -> Optional[NavToPartnerTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None scene = self.sample_scene(force_advance_scene) if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", "" ): self.env.reset(scene_name=scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) too_close_to_target = True for _ in range(10): self.env.randomize_agent_location(agent_id=0) self.env.randomize_agent_location(agent_id=1) pose1 = self.env.agent_state(0) pose2 = self.env.agent_state(1) dist = self.env.distance_cache.find_distance( self.env.scene_name, {k: pose1[k] for k in ["x", "y", "z"]}, {k: pose2[k] for k in ["x", "y", "z"]}, self.env.distance_from_point_to_point, ) too_close_to_target = ( dist <= 1.25 * self.rewards_config["max_success_distance"] ) if not too_close_to_target: break task_info = { "scene": scene, "initial_position1": {k: pose1[k] for k in ["x", "y", "z"]}, "initial_position2": {k: pose2[k] for k in ["x", "y", "z"]}, "initial_orientation1": pose1["rotation"]["y"], "initial_orientation2": pose2["rotation"]["y"], "id": "_".join( [scene] # + ["%4.2f" % pose1[k] for k in ["x", "y", "z"]] # + ["%4.2f" % pose1["rotation"]["y"]] # + ["%4.2f" % pose2[k] for k in ["x", "y", "z"]] # + ["%4.2f" % pose2["rotation"]["y"]] + ["%d" % random.randint(0, 2**63 - 1)] ), } if too_close_to_target: get_logger().warning("Bad sampled episode {}".format(task_info)) self._last_sampled_task = NavToPartnerTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task def reset(self): self.scene_counter = 0 self.scene_order = list(range(len(self.scenes))) random.shuffle(self.scene_order) self.scene_id = 0 self.max_tasks = self.reset_tasks def set_seed(self, seed: int): self.seed = seed if seed is not None: set_seed(seed) ================================================ FILE: allenact_plugins/robothor_plugin/robothor_tasks.py ================================================ import math from typing import Tuple, List, Dict, Any, Optional, Union, Sequence, cast import gym import numpy as np from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task from allenact.utils.system import get_logger from allenact.utils.tensor_utils import tile_images from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from allenact_plugins.robothor_plugin.robothor_constants import ( MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN, ) from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment def spl_metric( success: bool, optimal_distance: float, travelled_distance: float ) -> Optional[float]: if not success: return 0.0 elif optimal_distance < 0: return None elif optimal_distance == 0: if travelled_distance == 0: return 1.0 else: return 0.0 else: travelled_distance = max(travelled_distance, optimal_distance) return optimal_distance / travelled_distance class PointNavTask(Task[RoboThorEnvironment]): _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END) def __init__( self, env: RoboThorEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, reward_configs: Dict[str, Any], **kwargs, ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self.reward_configs = reward_configs self._took_end_action: bool = False self._success: Optional[bool] = False self.last_geodesic_distance = self.env.distance_to_point( self.task_info["target"] ) self.optimal_distance = self.last_geodesic_distance self._rewards: List[float] = [] self._distance_to_goal: List[float] = [] self._metrics = None self.path: List[Any] = ( [] ) # the initial coordinate will be directly taken from the optimal path self.travelled_distance = 0.0 self.task_info["followed_path"] = [self.env.agent_state()] self.task_info["action_names"] = self.action_names() @property def action_space(self): return gym.spaces.Discrete(len(self._actions)) def reached_terminal_state(self) -> bool: return self._took_end_action @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return cls._actions def close(self) -> None: self.env.stop() def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.class_action_names()[action] if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success pose = self.env.agent_state() self.path.append({k: pose[k] for k in ["x", "y", "z"]}) self.task_info["followed_path"].append(pose) if len(self.path) > 1: self.travelled_distance += IThorEnvironment.position_dist( p0=self.path[-1], p1=self.path[-2], ignore_y=True ) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success, "action": action}, ) return step_result def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented" if mode == "rgb": return self.env.current_frame elif mode == "depth": return self.env.current_depth def _is_goal_in_range(self) -> Optional[bool]: tget = self.task_info["target"] dist = self.dist_to_target() if -0.5 < dist <= 0.2: return True elif dist > 0.2: return False else: get_logger().debug( "No path for {} from {} to {}".format( self.env.scene_name, self.env.agent_state(), tget ) ) return None def shaping(self) -> float: rew = 0.0 if self.reward_configs["shaping_weight"] == 0.0: return rew geodesic_distance = self.dist_to_target() if geodesic_distance == -1.0: geodesic_distance = self.last_geodesic_distance if ( self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5 ): # (robothor limits) rew += self.last_geodesic_distance - geodesic_distance self.last_geodesic_distance = geodesic_distance return rew * self.reward_configs["shaping_weight"] def judge(self) -> float: """Judge the last event.""" reward = self.reward_configs["step_penalty"] reward += self.shaping() if self._took_end_action: if self._success is not None: reward += ( self.reward_configs["goal_success_reward"] if self._success else self.reward_configs["failed_stop_reward"] ) elif self.num_steps_taken() + 1 >= self.max_steps: reward += self.reward_configs.get("reached_max_steps_reward", 0.0) self._rewards.append(float(reward)) return float(reward) def dist_to_target(self): return self.env.distance_to_point(self.task_info["target"]) def metrics(self) -> Dict[str, Any]: if not self.is_done(): return {} total_reward = float(np.sum(self._rewards)) self._rewards = [] if self._success is None: return {} dist2tget = self.dist_to_target() spl = spl_metric( success=self._success, optimal_distance=self.optimal_distance, travelled_distance=self.travelled_distance, ) metrics = { **super(PointNavTask, self).metrics(), "success": self._success, # False also if no path to target "total_reward": total_reward, "dist_to_target": dist2tget, "spl": 0 if spl is None else spl, } return metrics class ObjectNavTask(Task[RoboThorEnvironment]): _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, END, LOOK_UP, LOOK_DOWN) def __init__( self, env: RoboThorEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, reward_configs: Dict[str, Any], **kwargs, ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self.reward_configs = reward_configs self._took_end_action: bool = False self._success: Optional[bool] = False self.mirror = task_info["mirrored"] self._all_metadata_available = env.all_metadata_available self._rewards: List[float] = [] self._distance_to_goal: List[float] = [] self._metrics = None self.path: List = ( [] ) # the initial coordinate will be directly taken from the optimal path self.travelled_distance = 0.0 self.task_info["followed_path"] = [self.env.agent_state()] self.task_info["taken_actions"] = [] self.task_info["action_names"] = self.class_action_names() if self._all_metadata_available: self.last_geodesic_distance = self.env.distance_to_object_type( self.task_info["object_type"] ) self.optimal_distance = self.last_geodesic_distance self.closest_geo_distance = self.last_geodesic_distance self.last_expert_action: Optional[int] = None self.last_action_success = False @property def action_space(self): return gym.spaces.Discrete(len(self._actions)) def reached_terminal_state(self) -> bool: return self._took_end_action @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return cls._actions def close(self) -> None: self.env.stop() def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.class_action_names()[action] if self.mirror: if action_str == ROTATE_RIGHT: action_str = ROTATE_LEFT elif action_str == ROTATE_LEFT: action_str = ROTATE_RIGHT self.task_info["taken_actions"].append(action_str) if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success pose = self.env.agent_state() self.path.append({k: pose[k] for k in ["x", "y", "z"]}) self.task_info["followed_path"].append(pose) if len(self.path) > 1: self.travelled_distance += IThorEnvironment.position_dist( p0=self.path[-1], p1=self.path[-2], ignore_y=True ) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success, "action": action}, ) return step_result def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented" if mode == "rgb": frame = self.env.current_frame.copy() elif mode == "depth": frame = self.env.current_depth.copy() else: raise NotImplementedError(f"Mode '{mode}' is not supported.") if self.mirror: frame = frame[:, ::-1, :].copy() # horizontal flip # print("mirrored render") return frame def _is_goal_in_range(self) -> bool: return any( o["objectType"] == self.task_info["object_type"] for o in self.env.visible_objects() ) def shaping(self) -> float: rew = 0.0 if self.reward_configs["shaping_weight"] == 0.0: return rew geodesic_distance = self.env.distance_to_object_type( self.task_info["object_type"] ) # Ensuring the reward magnitude is not greater than the total distance moved max_reward_mag = 0.0 if len(self.path) >= 2: p0, p1 = self.path[-2:] max_reward_mag = math.sqrt( (p0["x"] - p1["x"]) ** 2 + (p0["z"] - p1["z"]) ** 2 ) if self.reward_configs.get("positive_only_reward", False): if geodesic_distance > 0.5: rew = max(self.closest_geo_distance - geodesic_distance, 0) else: if ( self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5 ): # (robothor limits) rew += self.last_geodesic_distance - geodesic_distance self.last_geodesic_distance = geodesic_distance self.closest_geo_distance = min(self.closest_geo_distance, geodesic_distance) return ( max( min(rew, max_reward_mag), -max_reward_mag, ) * self.reward_configs["shaping_weight"] ) def judge(self) -> float: """Judge the last event.""" reward = self.reward_configs["step_penalty"] reward += self.shaping() if self._took_end_action: if self._success: reward += self.reward_configs["goal_success_reward"] else: reward += self.reward_configs["failed_stop_reward"] elif self.num_steps_taken() + 1 >= self.max_steps: reward += self.reward_configs.get("reached_max_steps_reward", 0.0) self._rewards.append(float(reward)) return float(reward) def get_observations(self, **kwargs) -> Any: obs = self.sensor_suite.get_observations(env=self.env, task=self) if self.mirror: for o in obs: if ("rgb" in o or "depth" in o) and isinstance(obs[o], np.ndarray): if ( len(obs[o].shape) == 3 ): # heuristic to determine this is a visual sensor obs[o] = obs[o][:, ::-1, :].copy() # horizontal flip elif len(obs[o].shape) == 2: # perhaps only two axes for depth? obs[o] = obs[o][:, ::-1].copy() # horizontal flip return obs def metrics(self) -> Dict[str, Any]: if not self.is_done(): return {} metrics = super(ObjectNavTask, self).metrics() if self._all_metadata_available: dist2tget = self.env.distance_to_object_type(self.task_info["object_type"]) spl = spl_metric( success=self._success, optimal_distance=self.optimal_distance, travelled_distance=self.travelled_distance, ) metrics = { **metrics, "success": self._success, "total_reward": np.sum(self._rewards), "dist_to_target": dist2tget, "spl": 0 if spl is None else spl, } return metrics def query_expert(self, end_action_only: bool = False, **kwargs) -> Tuple[int, bool]: if self._is_goal_in_range(): return self.class_action_names().index(END), True if end_action_only: return 0, False else: try: self.env.step( { "action": "ObjectNavExpertAction", "objectType": self.task_info["object_type"], } ) except ValueError: raise RuntimeError( "Attempting to use the action `ObjectNavExpertAction` which is not supported by your version of" " AI2-THOR. The action `ObjectNavExpertAction` is experimental. In order" " to enable this action, please install the (in development) version of AI2-THOR. Through pip" " this can be done with the command" " `pip install -e git+https://github.com/allenai/ai2thor.git@7d914cec13aae62298f5a6a816adb8ac6946c61f#egg=ai2thor`." ) if self.env.last_action_success: expert_action: Optional[str] = self.env.last_event.metadata[ "actionReturn" ] if isinstance(expert_action, str): if self.mirror: if expert_action == "RotateLeft": expert_action = "RotateRight" elif expert_action == "RotateRight": expert_action = "RotateLeft" return self.class_action_names().index(expert_action), True else: # This should have been caught by self._is_goal_in_range()... return 0, False else: return 0, False class NavToPartnerTask(Task[RoboThorEnvironment]): _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT) def __init__( self, env: RoboThorEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, reward_configs: Dict[str, Any], **kwargs, ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self.reward_configs = reward_configs assert self.env.agent_count == 2, "NavToPartnerTask only defined for 2 agents!" pose1 = self.env.agent_state(0) pose2 = self.env.agent_state(1) self.last_geodesic_distance = self.env.distance_cache.find_distance( self.env.scene_name, {k: pose1[k] for k in ["x", "y", "z"]}, {k: pose2[k] for k in ["x", "y", "z"]}, self.env.distance_from_point_to_point, ) self.task_info["followed_path1"] = [pose1] self.task_info["followed_path2"] = [pose2] self.task_info["action_names"] = self.class_action_names() @property def action_space(self): return gym.spaces.Tuple( [ gym.spaces.Discrete(len(self._actions)), gym.spaces.Discrete(len(self._actions)), ] ) def reached_terminal_state(self) -> bool: return ( self.last_geodesic_distance <= self.reward_configs["max_success_distance"] ) @classmethod def class_action_names(cls, **kwargs) -> Tuple[str, ...]: return cls._actions def close(self) -> None: self.env.stop() def _step(self, action: Tuple[int, int]) -> RLStepResult: assert isinstance(action, tuple) action_str1 = self.class_action_names()[action[0]] action_str2 = self.class_action_names()[action[1]] self.env.step({"action": action_str1, "agentId": 0}) self.last_action_success1 = self.env.last_action_success self.env.step({"action": action_str2, "agentId": 1}) self.last_action_success2 = self.env.last_action_success pose1 = self.env.agent_state(0) self.task_info["followed_path1"].append(pose1) pose2 = self.env.agent_state(1) self.task_info["followed_path2"].append(pose2) self.last_geodesic_distance = self.env.distance_cache.find_distance( self.env.scene_name, {k: pose1[k] for k in ["x", "y", "z"]}, {k: pose2[k] for k in ["x", "y", "z"]}, self.env.distance_from_point_to_point, ) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={ "last_action_success": [ self.last_action_success1, self.last_action_success2, ], "action": action, }, ) return step_result def render(self, mode: str = "rgb", *args, **kwargs) -> np.ndarray: assert mode in ["rgb", "depth"], "only rgb and depth rendering is implemented" if mode == "rgb": return tile_images(self.env.current_frames) elif mode == "depth": return tile_images(self.env.current_depths) def judge(self) -> float: """Judge the last event.""" reward = self.reward_configs["step_penalty"] if self.reached_terminal_state(): reward += self.reward_configs["success_reward"] return reward # reward shared by both agents (no shaping) def metrics(self) -> Dict[str, Any]: if not self.is_done(): return {} return { **super().metrics(), "success": self.reached_terminal_state(), } ================================================ FILE: allenact_plugins/robothor_plugin/robothor_viz.py ================================================ import copy import json import math import os from typing import Tuple, Sequence, Union, Dict, Optional, Any, cast, Generator, List import cv2 import numpy as np from PIL import Image, ImageDraw from ai2thor.controller import Controller from matplotlib import pyplot as plt from matplotlib.figure import Figure import colour as col from allenact.utils.system import get_logger from allenact.utils.viz_utils import TrajectoryViz ROBOTHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR = os.path.join( os.path.expanduser("~"), ".allenact", "robothor", "top_down_viz_cache" ) class ThorPositionTo2DFrameTranslator(object): def __init__( self, frame_shape_rows_cols: Tuple[int, int], cam_position: Sequence[float], orth_size: float, ): self.frame_shape = frame_shape_rows_cols self.lower_left = np.array((cam_position[0], cam_position[2])) - orth_size self.span = 2 * orth_size def __call__(self, position: Sequence[float]): if len(position) == 3: x, _, z = position else: x, z = position camera_position = (np.array((x, z)) - self.lower_left) / self.span return np.array( ( round(self.frame_shape[0] * (1.0 - camera_position[1])), round(self.frame_shape[1] * camera_position[0]), ), dtype=int, ) class ThorViz(TrajectoryViz): def __init__( self, path_to_trajectory: Sequence[str] = ("task_info", "followed_path"), label: str = "thor_trajectory", figsize: Tuple[float, float] = (8, 4), # width, height fontsize: float = 10, scenes: Union[ Tuple[str, int, int, int, int], Sequence[Tuple[str, int, int, int, int]] ] = ("FloorPlan_Val{}_{}", 1, 3, 1, 5), viz_rows_cols: Tuple[int, int] = (448, 448), single_color: bool = False, view_triangle_only_on_last: bool = True, disable_view_triangle: bool = False, line_opacity: float = 1.0, **kwargs ): super().__init__( path_to_trajectory=path_to_trajectory, label=label, figsize=figsize, fontsize=fontsize, **kwargs ) if isinstance(scenes[0], str): scenes = [ cast(Tuple[str, int, int, int, int], scenes) ] # make it list of tuples self.scenes = cast(List[Tuple[str, int, int, int, int]], scenes) self.room_path = ROBOTHOR_VIZ_CACHED_TOPDOWN_VIEWS_DIR os.makedirs(self.room_path, exist_ok=True) self.viz_rows_cols = viz_rows_cols self.single_color = single_color self.view_triangle_only_on_last = view_triangle_only_on_last self.disable_view_triangle = disable_view_triangle self.line_opacity = line_opacity # Only needed for rendering self.map_data: Optional[Dict[str, Any]] = None self.thor_top_downs: Optional[Dict[str, np.ndarray]] = None self.controller: Optional[Controller] = None def init_top_down_render(self): self.map_data = self.get_translator() self.thor_top_downs = self.make_top_down_views() # No controller needed after this point if self.controller is not None: self.controller.stop() self.controller = None @staticmethod def iterate_scenes( all_scenes: Sequence[Tuple[str, int, int, int, int]] ) -> Generator[str, None, None]: for scenes in all_scenes: for wall in range(scenes[1], scenes[2] + 1): for furniture in range(scenes[3], scenes[4] + 1): roomname = scenes[0].format(wall, furniture) yield roomname def cached_map_data_path(self, roomname: str) -> str: return os.path.join(self.room_path, "map_data__{}.json".format(roomname)) def get_translator(self) -> Dict[str, Any]: roomname = list(ThorViz.iterate_scenes(self.scenes))[0] json_file = self.cached_map_data_path(roomname) if not os.path.exists(json_file): self.make_controller() self.controller.reset(roomname) map_data = self.get_agent_map_data() get_logger().info("Dumping {}".format(json_file)) with open(json_file, "w") as f: json.dump(map_data, f, indent=4, sort_keys=True) else: with open(json_file, "r") as f: map_data = json.load(f) pos_translator = ThorPositionTo2DFrameTranslator( self.viz_rows_cols, self.position_to_tuple(map_data["cam_position"]), map_data["cam_orth_size"], ) map_data["pos_translator"] = pos_translator get_logger().debug("Using map_data {}".format(map_data)) return map_data def cached_image_path(self, roomname: str) -> str: return os.path.join( self.room_path, "{}__r{}_c{}.png".format(roomname, *self.viz_rows_cols) ) def make_top_down_views(self) -> Dict[str, np.ndarray]: top_downs = {} for roomname in self.iterate_scenes(self.scenes): fname = self.cached_image_path(roomname) if not os.path.exists(fname): self.make_controller() self.dump_top_down_view(roomname, fname) top_downs[roomname] = cv2.imread(fname) return top_downs def crop_viz_image(self, viz_image: np.ndarray) -> np.ndarray: # Top-down view of room spans vertically near the center of the frame in RoboTHOR: y_min = int(self.viz_rows_cols[0] * 0.3) y_max = int(self.viz_rows_cols[0] * 0.8) # But it covers approximately the entire width: x_min = 0 x_max = self.viz_rows_cols[1] cropped_viz_image = viz_image[y_min:y_max, x_min:x_max, :] return cropped_viz_image def make_controller(self): if self.controller is None: self.controller = Controller() self.controller.step({"action": "ChangeQuality", "quality": "Very High"}) self.controller.step( { "action": "ChangeResolution", "x": self.viz_rows_cols[1], "y": self.viz_rows_cols[0], } ) def get_agent_map_data(self): self.controller.step({"action": "ToggleMapView"}) cam_position = self.controller.last_event.metadata["cameraPosition"] cam_orth_size = self.controller.last_event.metadata["cameraOrthSize"] to_return = { "cam_position": cam_position, "cam_orth_size": cam_orth_size, } self.controller.step({"action": "ToggleMapView"}) return to_return @staticmethod def position_to_tuple(position: Dict[str, float]) -> Tuple[float, float, float]: return position["x"], position["y"], position["z"] @staticmethod def add_lines_to_map( ps: Sequence[Any], frame: np.ndarray, pos_translator: ThorPositionTo2DFrameTranslator, opacity: float, color: Optional[Tuple[int, ...]] = None, ) -> np.ndarray: if len(ps) <= 1: return frame if color is None: color = (255, 0, 0) img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA") img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA opacity = int(round(255 * opacity)) # Define transparency for the triangle. draw = ImageDraw.Draw(img2) for i in range(len(ps) - 1): draw.line( tuple(reversed(pos_translator(ps[i]))) + tuple(reversed(pos_translator(ps[i + 1]))), fill=color + (opacity,), width=int(frame.shape[0] / 100), ) img = Image.alpha_composite(img1, img2) return np.array(img.convert("RGB")) @staticmethod def add_line_to_map( p0: Any, p1: Any, frame: np.ndarray, pos_translator: ThorPositionTo2DFrameTranslator, opacity: float, color: Optional[Tuple[int, ...]] = None, ) -> np.ndarray: if p0 == p1: return frame if color is None: color = (255, 0, 0) img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA") img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA opacity = int(round(255 * opacity)) # Define transparency for the triangle. draw = ImageDraw.Draw(img2) draw.line( tuple(reversed(pos_translator(p0))) + tuple(reversed(pos_translator(p1))), fill=color + (opacity,), width=int(frame.shape[0] / 100), ) img = Image.alpha_composite(img1, img2) return np.array(img.convert("RGB")) @staticmethod def add_agent_view_triangle( position: Any, rotation: Dict[str, float], frame: np.ndarray, pos_translator: ThorPositionTo2DFrameTranslator, scale: float = 1.0, opacity: float = 0.1, ) -> np.ndarray: p0 = np.array((position[0], position[2])) p1 = copy.copy(p0) p2 = copy.copy(p0) theta = -2 * math.pi * (rotation["y"] / 360.0) rotation_mat = np.array( [[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]] ) offset1 = scale * np.array([-1 / 2.0, 1]) offset2 = scale * np.array([1 / 2.0, 1]) p1 += np.matmul(rotation_mat, offset1) p2 += np.matmul(rotation_mat, offset2) img1 = Image.fromarray(frame.astype("uint8"), "RGB").convert("RGBA") img2 = Image.new("RGBA", frame.shape[:-1]) # Use RGBA opacity = int(round(255 * opacity)) # Define transparency for the triangle. points = [tuple(reversed(pos_translator(p))) for p in [p0, p1, p2]] draw = ImageDraw.Draw(img2) draw.polygon(points, fill=(255, 255, 255, opacity)) img = Image.alpha_composite(img1, img2) return np.array(img.convert("RGB")) @staticmethod def visualize_agent_path( positions: Sequence[Any], frame: np.ndarray, pos_translator: ThorPositionTo2DFrameTranslator, single_color: bool = False, view_triangle_only_on_last: bool = False, disable_view_triangle: bool = False, line_opacity: float = 1.0, trajectory_start_end_color_str: Tuple[str, str] = ("red", "green"), ) -> np.ndarray: if single_color: frame = ThorViz.add_lines_to_map( list(map(ThorViz.position_to_tuple, positions)), frame, pos_translator, line_opacity, tuple( map( lambda x: int(round(255 * x)), col.Color(trajectory_start_end_color_str[0]).rgb, ) ), ) else: if len(positions) > 1: colors = list( col.Color(trajectory_start_end_color_str[0]).range_to( col.Color(trajectory_start_end_color_str[1]), len(positions) - 1 ) ) for i in range(len(positions) - 1): frame = ThorViz.add_line_to_map( ThorViz.position_to_tuple(positions[i]), ThorViz.position_to_tuple(positions[i + 1]), frame, pos_translator, opacity=line_opacity, color=tuple(map(lambda x: int(round(255 * x)), colors[i].rgb)), ) if view_triangle_only_on_last: positions = [positions[-1]] if disable_view_triangle: positions = [] for position in positions: frame = ThorViz.add_agent_view_triangle( ThorViz.position_to_tuple(position), rotation=position["rotation"], frame=frame, pos_translator=pos_translator, opacity=0.05 + view_triangle_only_on_last * 0.2, ) return frame def dump_top_down_view(self, room_name: str, image_path: str): get_logger().debug("Dumping {}".format(image_path)) self.controller.reset(room_name) self.controller.step( {"action": "Initialize", "gridSize": 0.1, "makeAgentsVisible": False} ) self.controller.step({"action": "ToggleMapView"}) top_down_view = self.controller.last_event.cv2img cv2.imwrite(image_path, top_down_view) def make_fig(self, episode: Any, episode_id: str) -> Figure: trajectory: Sequence[Dict[str, Any]] = self._access( episode, self.path_to_trajectory ) if self.thor_top_downs is None: self.init_top_down_render() roomname = "_".join(episode_id.split("_")[:3]) im = self.visualize_agent_path( trajectory, self.thor_top_downs[roomname], self.map_data["pos_translator"], single_color=self.single_color, view_triangle_only_on_last=self.view_triangle_only_on_last, disable_view_triangle=self.disable_view_triangle, line_opacity=self.line_opacity, ) fig, ax = plt.subplots(figsize=self.figsize) ax.set_title(episode_id, fontsize=self.fontsize) ax.imshow(self.crop_viz_image(im)[:, :, ::-1]) ax.axis("off") return fig class ThorMultiViz(ThorViz): def __init__( self, path_to_trajectory_prefix: Sequence[str] = ("task_info", "followed_path"), agent_suffixes: Sequence[str] = ("1", "2"), label: str = "thor_trajectories", trajectory_start_end_color_strs: Sequence[Tuple[str, str]] = ( ("red", "green"), ("cyan", "purple"), ), **kwargs ): super().__init__(label=label, **kwargs) self.path_to_trajectory_prefix = list(path_to_trajectory_prefix) self.agent_suffixes = list(agent_suffixes) self.trajectory_start_end_color_strs = list(trajectory_start_end_color_strs) def make_fig(self, episode: Any, episode_id: str) -> Figure: if self.thor_top_downs is None: self.init_top_down_render() roomname = "_".join(episode_id.split("_")[:3]) im = self.thor_top_downs[roomname] for agent, start_end_color in zip( self.agent_suffixes, self.trajectory_start_end_color_strs ): path = self.path_to_trajectory_prefix[:] path[-1] = path[-1] + agent trajectory = self._access(episode, path) im = self.visualize_agent_path( trajectory, im, self.map_data["pos_translator"], single_color=self.single_color, view_triangle_only_on_last=self.view_triangle_only_on_last, disable_view_triangle=self.disable_view_triangle, line_opacity=self.line_opacity, trajectory_start_end_color_str=start_end_color, ) fig, ax = plt.subplots(figsize=self.figsize) ax.set_title(episode_id, fontsize=self.fontsize) ax.imshow(self.crop_viz_image(im)[:, :, ::-1]) ax.axis("off") return fig ================================================ FILE: allenact_plugins/robothor_plugin/scripts/__init__.py ================================================ ================================================ FILE: allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.py ================================================ import gzip import json import os from typing import Sequence, Optional from allenact_plugins.robothor_plugin.robothor_task_samplers import ( ObjectNavDatasetTaskSampler, ) def create_debug_dataset_from_train_dataset( scene: str, target_object_type: Optional[str], episodes_subset: Sequence[int], train_dataset_path: str, base_debug_output_path: str, ): downloaded_episodes = os.path.join( train_dataset_path, "episodes", scene + ".json.gz" ) assert os.path.exists(downloaded_episodes), ( "'{}' doesn't seem to exist or is empty. Make sure you've downloaded to download the appropriate" " training dataset with" " datasets/download_navigation_datasets.sh".format(downloaded_episodes) ) # episodes episodes = ObjectNavDatasetTaskSampler.load_dataset( scene=scene, base_directory=os.path.join(train_dataset_path, "episodes") ) if target_object_type is not None: ids = { "{}_{}_{}".format(scene, target_object_type, epit) for epit in episodes_subset } else: ids = {"{}_{}".format(scene, epit) for epit in episodes_subset} debug_episodes = [ep for ep in episodes if ep["id"] in ids] assert len(ids) == len(debug_episodes), ( f"Number of input ids ({len(ids)}) does not equal" f" number of output debug tasks ({len(debug_episodes)})" ) # sort by episode_ids debug_episodes = [ idep[1] for idep in sorted( [(int(ep["id"].split("_")[-1]), ep) for ep in debug_episodes], key=lambda x: x[0], ) ] assert len(debug_episodes) == len(episodes_subset) episodes_dir = os.path.join(base_debug_output_path, "episodes") os.makedirs(episodes_dir, exist_ok=True) episodes_file = os.path.join(episodes_dir, scene + ".json.gz") json_str = json.dumps(debug_episodes) json_bytes = json_str.encode("utf-8") with gzip.GzipFile(episodes_file, "w") as fout: fout.write(json_bytes) assert os.path.exists(episodes_file) if __name__ == "__main__": CURRENT_PATH = os.getcwd() SCENE = "FloorPlan_Train1_1" TARGET = "Television" EPISODES = [0, 7, 11, 12] BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "robothor-objectnav", "debug") create_debug_dataset_from_train_dataset( scene=SCENE, target_object_type=TARGET, episodes_subset=EPISODES, train_dataset_path=os.path.join( CURRENT_PATH, "datasets", "robothor-objectnav", "train" ), base_debug_output_path=BASE_OUT, ) ================================================ FILE: allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.py ================================================ import os from allenact_plugins.robothor_plugin.scripts.make_objectnav_debug_dataset import ( create_debug_dataset_from_train_dataset, ) if __name__ == "__main__": CURRENT_PATH = os.getcwd() SCENE = "FloorPlan_Train1_1" EPISODES = [3, 4, 5, 6] BASE_OUT = os.path.join(CURRENT_PATH, "datasets", "robothor-pointnav", "debug") create_debug_dataset_from_train_dataset( scene=SCENE, target_object_type=None, episodes_subset=EPISODES, train_dataset_path=os.path.join( CURRENT_PATH, "datasets", "robothor-pointnav", "train" ), base_debug_output_path=BASE_OUT, ) ================================================ FILE: allenact_plugins/setup.py ================================================ import glob import os from pathlib import Path from setuptools import find_packages, setup def parse_req_file(fname, initial=None): """Reads requires.txt file generated by setuptools and outputs a new/updated dict of extras as keys and corresponding lists of dependencies as values. The input file's contents are similar to a `ConfigParser` file, e.g. pkg_1 pkg_2 pkg_3 [extras1] pkg_4 pkg_5 [extras2] pkg_6 pkg_7 """ reqs = {} if initial is None else initial cline = None with open(fname, "r") as f: for line in f.readlines(): line = line[:-1].strip() if len(line) == 0: continue if line[0] == "[": # Add new key for current extras (if missing in dict) cline = line[1:-1].strip() if cline not in reqs: reqs[cline] = [] else: # Only keep dependencies from extras if cline is not None: reqs[cline].append(line) return reqs def get_version(fname): """Reads PKG-INFO file generated by setuptools and extracts the Version number.""" res = "UNK" with open(fname, "r") as f: for line in f.readlines(): line = line[:-1] if line.startswith("Version:"): res = line.replace("Version:", "").strip() break if res in ["UNK", ""]: raise ValueError(f"Missing Version number in {fname}") return res def run_setup(): base_dir = os.path.abspath(os.path.dirname(Path(__file__))) if not os.path.exists( os.path.join(base_dir, "allenact_plugins.egg-info/dependency_links.txt") ): # Build mode for sdist # Extra dependencies required for various plugins extras = {} for plugin_path in glob.glob(os.path.join(base_dir, "*_plugin")): plugin_name = os.path.basename(plugin_path).replace("_plugin", "") extra_reqs_path = os.path.join(plugin_path, "extra_requirements.txt") if os.path.exists(extra_reqs_path): with open(extra_reqs_path, "r") as f: # Filter out non-PyPI dependencies extras[plugin_name] = [ clean_dep for clean_dep in (dep.strip() for dep in f.readlines()) if clean_dep != "" and not clean_dep.startswith("#") and "@ git+https://github.com/" not in clean_dep ] extras["all"] = sum(extras.values(), []) os.chdir(os.path.join(base_dir, "..")) with open(".VERSION", "r") as f: __version__ = f.readline().strip() else: # Install mode from sdist __version__ = get_version( os.path.join(base_dir, "allenact_plugins.egg-info/PKG-INFO") ) extras = parse_req_file( os.path.join(base_dir, "allenact_plugins.egg-info/requires.txt") ) setup( name="allenact_plugins", version=__version__, description="Plugins for the AllenAct framework", long_description=( "A collection of plugins/extensions for use within the AllenAct framework." ), classifiers=[ "Intended Audience :: Science/Research", "Development Status :: 3 - Alpha", "License :: OSI Approved :: MIT License", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Programming Language :: Python", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ], keywords=["reinforcement learning", "embodied-AI", "AI", "RL", "SLAM"], url="https://github.com/allenai/allenact", author="Allen Institute for Artificial Intelligence", author_email="lucaw@allenai.org", license="MIT", packages=find_packages(include=["allenact_plugins", "allenact_plugins.*"]), install_requires=[f"allenact=={__version__}"], setup_requires=["pytest-runner"], tests_require=["pytest", "pytest-cov"], extras_require=extras, ) if __name__ == "__main__": run_setup() ================================================ FILE: conda/environment-10.1.yml ================================================ channels: - defaults - pytorch dependencies: - cudatoolkit=10.1 - pytorch>=1.6.0,!=1.8.0 - torchvision>=0.7.0,<0.10.0 ================================================ FILE: conda/environment-10.2.yml ================================================ channels: - defaults - pytorch dependencies: - cudatoolkit=10.2 - pytorch>=1.6.0,!=1.8.0 - torchvision>=0.7.0,<0.10.0 ================================================ FILE: conda/environment-11.1.yml ================================================ channels: - defaults - pytorch - nvidia dependencies: - cudatoolkit=11.1 - pytorch>=1.6.0,!=1.8.0 - torchvision>=0.7.0 ================================================ FILE: conda/environment-9.2.yml ================================================ channels: - defaults - pytorch dependencies: - cudatoolkit=9.2 - pytorch>=1.6.0,!=1.8.0 - torchvision>=0.7.0,<0.10.0 ================================================ FILE: conda/environment-base.yml ================================================ channels: - defaults - pytorch - conda-forge dependencies: - python=3.8 - certifi - chardet=4.0.0 - cloudpickle=1.6.0 - cycler=0.10.0 - decorator=4.4.2 - filelock=3.0.12 - future=0.18.2 - gym>=0.17.0,<0.20.0 - idna>=2.10 - imageio>=2.9.0 - imageio-ffmpeg>=0.4.3 - kiwisolver=1.3.1 - matplotlib>=3.3.1 - networkx - numpy>=1.19.1 - opencv - conda-forge::pillow>=8.2.0,<9.0.0 - pip - proglog>=0.1.9 - protobuf>=3.14.0 - pyglet>=1.5.0 - pyparsing>=2.4.7 - python-dateutil>=2.8.1 - pytorch::pytorch>=1.6.0,!=1.8.0 - pytorch::torchvision>=0.7.0 - requests>=2.25.1 - setproctitle - six>=1.15.0 - tensorboardx>=2.1 - tqdm - urllib3>=1.26.2 - attrs - pip: - moviepy>=1.0.3 - scipy>=1.5.4 - compress-pickle>=1.2.0 ================================================ FILE: conda/environment-cpu.yml ================================================ channels: - defaults - pytorch dependencies: - cpuonly - pytorch>=1.6.0,!=1.8.0 - torchvision>=0.7.0,<0.10.0 ================================================ FILE: conda/environment-dev.yml ================================================ channels: - defaults - conda-forge dependencies: - black>=24.2.0 - docformatter>=1.3.1 - gitpython - markdown>=3.3 - mkdocs>=1.1.2 - mkdocs-material>=5.5.3 - mkdocs-material-extensions>=1.0 - mypy - pre-commit - pytest>=6.1.1 - ruamel.yaml - pip: - pydoc-markdown>=3.4.0 ================================================ FILE: constants.py ================================================ import os from pathlib import Path ABS_PATH_OF_TOP_LEVEL_DIR = os.path.abspath(os.path.dirname(Path(__file__))) ABS_PATH_OF_DOCS_DIR = os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "docs") ================================================ FILE: datasets/.gitignore ================================================ * !.gitignore !*.sh !.habitat_datasets_download_info.json !.habitat_downloader_helper.py !habitat/configs/debug_habitat_pointnav.yaml ================================================ FILE: datasets/.habitat_datasets_download_info.json ================================================ { "pointnav-gibson-v1": { "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v1/pointnav_gibson_v1.zip", "rel_path": "data/datasets/pointnav/gibson/v1/", "config_url": "configs/datasets/imagenav/gibson.yaml" }, "pointnav-gibson-v2": { "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v2/pointnav_gibson_v2.zip", "rel_path": "data/datasets/pointnav/gibson/v2/", "config_url": "configs/datasets/pointnav/gibson.yaml" }, "pointnav-mp3d-v1": { "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/mp3d/v1/pointnav_mp3d_v1.zip", "rel_path": "data/datasets/pointnav/mp3d/v1/", "config_url": "configs/datasets/imagenav/mp3d.yaml" }, "objectnav-mp3d-v1": { "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/objectnav/m3d/v1/objectnav_mp3d_v1.zip", "rel_path": "data/datasets/objectnav/mp3d/v1/", "config_url": "configs/datasets/objectnav/mp3d.yaml" }, "eqa-mp3d-v1": { "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/eqa/mp3d/v1/eqa_mp3d_v1.zip", "rel_path": "data/datasets/eqa/mp3d/v1/", "config_url": "configs/datasets/eqa/mp3d.yaml" }, "vln-r2r-mp3d-v1": { "link": "https://dl.fbaipublicfiles.com/habitat/data/datasets/vln/mp3d/r2r/v1/vln_r2r_mp3d_v1.zip", "rel_path": "data/datasets/vln/mp3d/r2r/v1", "config_url": "configs/datasets/vln/mp3d_r2r.yaml" } } ================================================ FILE: datasets/.habitat_downloader_helper.py ================================================ import json import os import re import shutil import sys from pathlib import Path from urllib.request import urlopen from allenact.utils.misc_utils import all_equal DATASET_DIR = os.path.abspath(os.path.dirname(Path(__file__))) def get_habitat_download_info(allow_create: bool = False): """Get a dictionary giving a specification of where habitat data lives online. # Parameters allow_create: Whether or not we should try to regenerate the json file that represents the above dictionary. This is potentially unsafe so please only set this to `True` if you're sure it will download what you want. """ json_save_path = os.path.join(DATASET_DIR, ".habitat_datasets_download_info.json") if allow_create and not os.path.exists(json_save_path): url = "https://raw.githubusercontent.com/facebookresearch/habitat-lab/master/README.md" output = urlopen(url).read().decode("utf-8") lines = [l.strip() for l in output.split("\n")] task_table_started = False table_lines = [] for l in lines: if l.count("|") > 3 and l[0] == l[-1] == "|": if task_table_started: table_lines.append(l) elif "Task" in l and "Link" in l: task_table_started = True table_lines.append(l) elif task_table_started: break url_pat = re.compile("\[.*\]\((.*)\)") def get_url(in_str: str): match = re.match(pattern=url_pat, string=in_str) if match: return match.group(1) else: return in_str header = None rows = [] for i, l in enumerate(table_lines): l = l.strip("|") entries = [get_url(e.strip().replace("`", "")) for e in l.split("|")] if i == 0: header = [e.lower().replace(" ", "_") for e in entries] elif not all_equal(entries): rows.append(entries) link_ind = header.index("link") extract_ind = header.index("extract_path") config_ind = header.index("config_to_use") assert link_ind >= 0 data_info = {} for row in rows: id = row[link_ind].split("/")[-1].replace(".zip", "").replace("_", "-") data_info[id] = { "link": row[link_ind], "rel_path": row[extract_ind], "config_url": row[config_ind], } with open(json_save_path, "w") as f: json.dump(data_info, f) with open(json_save_path, "r") as f: return json.load(f) if __name__ == "__main__": habitat_dir = os.path.join(DATASET_DIR, "habitat") os.makedirs(habitat_dir, exist_ok=True) os.chdir(habitat_dir) download_info = get_habitat_download_info(allow_create=False) if len(sys.argv) != 2 or sys.argv[1] not in download_info: print( "Incorrect input, expects a single input where this input is one of " f" {['test-scenes', *sorted(download_info.keys())]}." ) quit(1) task_key = sys.argv[1] task_dl_info = download_info[task_key] output_archive_name = "__TO_OVERWRITE__.zip" deletable_dir_name = "__TO_DELETE__" cmd = f"wget {task_dl_info['link']} -O {output_archive_name}" if os.system(cmd): print(f"ERROR: `{cmd}` failed.") quit(1) cmd = f"unzip {output_archive_name} -d {deletable_dir_name}" if os.system(cmd): print(f"ERROR: `{cmd}` failed.") quit(1) download_to_path = task_dl_info["rel_path"].replace("data/", "") if download_to_path[-1] == "/": download_to_path = download_to_path[:-1] os.makedirs(download_to_path, exist_ok=True) cmd = f"rsync -avz {deletable_dir_name}/ {download_to_path}/" if os.system(cmd): print(f"ERROR: `{cmd}` failed.") quit(1) os.remove(output_archive_name) shutil.rmtree(deletable_dir_name) ================================================ FILE: datasets/download_habitat_datasets.sh ================================================ #!/bin/bash # Move to the directory containing this file cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" || exit mkdir -p habitat mkdir -p habitat/scene_datasets mkdir -p habitat/datasets mkdir -p habitat/configs cd habitat || exit output_archive_name=__TO_OVERWRITE__.zip deletable_dir_name=__TO_DELETE__ install_test_scenes_and_data() { if ! wget http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip -O $output_archive_name; then echo "Could not unzip download test scenes from http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip" exit 1 fi if ! unzip $output_archive_name -d $deletable_dir_name; then echo "Could not unzip $output_archive_name to $deletable_dir_name" exit 1 fi rsync -avz $deletable_dir_name/data/datasets . && \ rsync -avz $deletable_dir_name/data/scene_datasets . && \ rm $output_archive_name && \ rm -r $deletable_dir_name } install_scene_data() { python3 ../.habitat_downloader_helper.py "$1" } if [ "$1" = "test-scenes" ] then install_test_scenes_and_data else install_scene_data $1 fi ================================================ FILE: datasets/download_navigation_datasets.sh ================================================ #!/bin/bash # Move to the directory containing this file cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" || exit install_dataset() { dataset_name="$1" download_suffix="$2" if ! mkdir "$dataset_name" ; then echo "Could not create directory " $(pwd)/$dataset_name "Does it already exist? If so, delete it." exit 1 fi url_archive_name=$dataset_name$download_suffix.tar.gz output_archive_name=__TO_OVERWRITE__.tar.gz wget https://prior-datasets.s3.us-east-2.amazonaws.com/embodied-ai/navigation/$url_archive_name -O $output_archive_name tar -xf "$output_archive_name" -C "$dataset_name" --strip-components=1 && rm $output_archive_name echo "saved folder: "$dataset_name"" } # Download, Unzip, and Remove zip if [ "$1" = "robothor-pointnav" ] then echo "Downloading RoboTHOR PointNav Dataset ..." install_dataset "$1" "-v0" cd .. echo "Generating RoboTHOR PointNav Debug Dataset ..." PYTHONPATH=. python ./allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.py elif [ "$1" = "robothor-objectnav" ] then echo "Downloading RoboTHOR ObjectNav Dataset ..." install_dataset "$1" "-challenge-2021" cd .. echo "Generating RoboTHOR ObjectNav Debug Dataset ..." PYTHONPATH=. python ./allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.py elif [ "$1" = "ithor-pointnav" ] then echo "Downloading iTHOR PointNav Dataset ..." install_dataset "$1" "-v0" cd .. echo "Generating iTHOR PointNav Debug Dataset ..." PYTHONPATH=. python ./allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.py elif [ "$1" = "ithor-objectnav" ] then echo "Downloading iTHOR ObjectNav Dataset ..." install_dataset "$1" "-v0" cd .. echo "Generating iTHOR ObjectNav Debug Dataset ..." PYTHONPATH=. python ./allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.py elif [ "$1" = "all-thor" ] then bash download_navigation_datasets.sh "robothor-pointnav" bash download_navigation_datasets.sh "robothor-objectnav" bash download_navigation_datasets.sh "ithor-pointnav" bash download_navigation_datasets.sh "ithor-objectnav" else echo "\nFailed: Usage download_navigation_datasets.sh robothor-pointnav | robothor-objectnav | ithor-pointnav | ithor-objectnav | all-thor" exit 1 fi ================================================ FILE: dev_requirements.txt ================================================ black==24.2.0 ruamel.yaml gitpython markdown==3.3 mypy pytest docformatter==1.3.1 docstr-coverage==1.2.0 mkdocs==1.1.2 mkdocs-material==5.5.3 pre-commit pydoc-markdown==3.4.0 compress-pickle==1.2.0 ================================================ FILE: docs/.gitignore ================================================ api/ ================================================ FILE: docs/CNAME ================================================ allenact.org ================================================ FILE: docs/FAQ.md ================================================ # FAQ ## How do I file a bug regarding the code or documentation? Please file bugs by submitting an [issue](https://github.com/allenai/allenact/issues). We also welcome contributions from the community, including new features and bugfixes on existing functionality. Please refer to our [contribution guidelines](CONTRIBUTING.md). ## How do I generate documentation? Documentation is generated using [mkdoc](https://www.mkdocs.org/) and [pydoc-markdown](https://pypi.org/project/pydoc-markdown/). ### Building documentation locally The `mkdocs` command used to build our documentation relies on all documentation existing as subdirectories of the `docs` folder. To ensure that all relevant markdown files are placed into this directory, you should always run ```bash bash scripts/build_docs.sh ``` from the top-level project directory before running any of the `mkdocs` commands below. If you have made no changes to the documentation and only wish to build documentation on your local machine, run the following from within the `allenact` root directory. Note: This will generate HTML documentation within the `site` folder ```bash mkdocs build ``` ### Serving documentation locally If you have made no changes to the documentation and only wish to serve documentation on your local machine (with live reloading of modified documentation), run the following from within the `allenact` root directory. ```bash mkdocs serve ``` Then navigate to [http://127.0.0.1:8000/](http://127.0.0.1:8000/) ### Modifying and serving documentation locally If you have made changes to the documentation, you will need to run a documentation builder script before you serve it on your local machine. ```bash bash scripts/build_docs.sh mkdocs serve ``` Then navigate to [http://127.0.0.1:8000/](http://127.0.0.1:8000/) Alternatively, the `site` directory (once built) can be served as a static webpage on your local machine without installing any dependencies by running `python -m http.server 8000` from within the `site` directory. ================================================ FILE: docs/css/extra.css ================================================ /* Allow word-breaks in headers */ h1 { word-wrap: break-word; } /* Don't have the edit button as it's broken for us */ .md-content__button { display: none; } ================================================ FILE: docs/getting_started/abstractions.md ================================================ # Primary abstractions Our package relies on a collection of fundamental abstractions to define how, and in what task, an agent should be trained and evaluated. A subset of these abstractions are described in plain language below. Each of the below sections end with a link to the (formal) documentation of the abstraction as well as a link to an example implementation of the abstraction (if relevant). The following provides a high-level illustration of how these abstractions interact. ![abstractions-overview](../img/abstractions.png) ## Experiment configuration In `allenact`, experiments are defined by implementing the abstract `ExperimentConfig` class. The methods of this implementation are then called during training/inference to properly set up the desired experiment. For example, the `ExperimentConfig.create_model` method will be called at the beginning of training to create the model to be trained. See either the ["designing your first minigrid experiment"](/tutorials/minigrid-tutorial) or the ["designing an experiment for point navigation"](/tutorials/training-a-pointnav-model) tutorials to get an in-depth description of how these experiment configurations are defined in practice. See also the [abstract `ExperimentConfig` class](/api/allenact/base_abstractions/experiment_config#experimentconfig) and an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_environment/#ithorenvironment). ## Task sampler A task sampler is responsible for generating a sequence of tasks for agents to solve. The sequence of tasks can be randomly generated (e.g. in training) or extracted from an ordered pool (e.g. in validation or testing). See the [abstract `TaskSampler` class](/api/allenact/base_abstractions/task/#tasksampler) and an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_task_samplers/#objectnavtasksampler). ## Task Tasks define the scope of the interaction between agents and an environment (including the action types agents are allowed to execute), as well as metrics to evaluate the agents' performance. For example, we might define a task `ObjectNaviThorGridTask` in which agents receive observations obtained from the environment (e.g. RGB images) or directly from the task (e.g. a target object class) and are allowed to execute actions such as `MoveAhead`, `RotateRight`, `RotateLeft`, and `End` whenever agents determine they have reached their target. The metrics might include a success indicator or some quantitative metric on the optimality of the followed path. See the [abstract `Task` class](/api/allenact/base_abstractions/task/#task) and an [example implementation](/api/allenact_plugins/robothor_plugin/robothor_tasks/#objectnavtask). ## Sensor Sensors provide observations extracted from an environment (e.g. RGB or depth images) or directly from a task (e.g. the end point in point navigation or target object class in semantic navigation) that can be directly consumed by agents. See the [abstract `Sensor` class](/api/allenact/base_abstractions/sensor/#sensor) and an [example implementation](/api/allenact_plugins/ithor_plugin/ithor_sensors/#rgbsensorthor). ## Actor critic model The actor-critic agent is responsible for computing batched action probabilities and state values given the observations provided by sensors, internal state representations, previous actions, and potentially other inputs. See the [abstract `ActorCriticModel` class](/api/allenact/algorithms/onpolicy_sync/policy/#ActorCriticModel) and an [example implementation](/api/projects/objectnav_baselines/models/object_nav_models#ObjectNavBaselineActorCritic). ## Training pipeline The training pipeline, defined in the [`ExperimentConfig`'s `training_pipeline` method](/api/allenact/base_abstractions/experiment_config/#training_pipeline), contains one or more training stages where different [losses can be combined or sequentially applied](/howtos/defining-a-new-training-pipeline). ## Losses Actor-critic losses compute a combination of action loss and value loss out of collected experience that can be used to train actor-critic models with back-propagation, e.g. PPO or A2C. See the [`AbstractActorCriticLoss` class](/api/allenact/algorithms/onpolicy_sync/losses/abstract_loss#abstractactorcriticloss) and an [example implementation](/api/allenact/algorithms/onpolicy_sync/losses/ppo/#ppo). Off-policy losses implement generic training iterations in which a batch of data is run through a model (that can be a subgraph of an [`ActorCriticModel`](#actor-critic-model)) and a loss is computed on the model's output. See the [`AbstractOffPolicyLoss` class](/api/allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss#abstractoffpolicyloss) and an [example implementation](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy/#MiniGridOffPolicyExpertCELoss). ================================================ FILE: docs/getting_started/running-your-first-experiment.md ================================================ # Running your first experiment Assuming you have [installed the full library](../installation/installation-allenact.md#full-library), you can run your first experiment by calling ```bash PYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o experiment_output/minigrid -s 12345 ``` from the `allenact` root directory. * With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file will be found in the `projects/tutorials` directory. * With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers). * With `-o experiment_output/minigrid` we set the output folder into which results and logs will be saved. * With `-s 12345` we set the random seed. If everything was installed correctly, a simple model will be trained (and validated) in the MiniGrid environment and a new folder `experiment_output/minigrid` will be created containing: * a `checkpoints/MiniGridTutorial/LOCAL_TIME_STR/` subfolder with model weight checkpoints, * a `used_configs/MiniGridTutorial/LOCAL_TIME_STR/` subfolder with all used configuration files, * and a tensorboard log file under `tb/MiniGridTutorial/LOCAL_TIME_STR/`. Here `LOCAL_TIME_STR` is a string that records the time when the experiment was started (e.g. the string `"2020-08-21_18-19-47"` corresponds to an experiment started on August 21st 2020, 47 seconds past 6:19pm. If we have Tensorboard installed, we can track training progress with ```bash tensorboard --logdir experiment_output/minigrid/tb ``` which will default to the URL [http://localhost:6006/](http://localhost:6006/). After 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder. The training curves should look similar to: ![training curves](../img/minigrid_train.png) If everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4. (For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the not-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example with a different random seed). The validation curves should look similar to: ![validation curves](../img/minigrid_valid.png) A detailed tutorial describing how the `minigrid_tutorial` experiment configuration was created can be found [here](../tutorials/minigrid-tutorial.md). To run your own custom experiment simply define a new experiment configuration in a file `projects/YOUR_PROJECT_NAME/experiments/my_custom_experiment.py` after which you may run it with `PYTHONPATH=. python allenact/main.py my_custom_experiment -b projects/YOUR_PROJECT_NAME/experiments`. ================================================ FILE: docs/getting_started/structure.md ================================================ # Structure of the codebase The codebase consists of the following directories: `allenact`, `datasets`, `docs`, `overrides`, `allenact_plugins`, `pretrained_model_ckpts`, `projects`, `scripts`, and `tests`. Below, we explain the overall structure and how different components of the codebase are organized. ## [`allenact` directory](https://github.com/allenai/allenact/tree/master/allenact) Contains runtime algorithms for on-policy and off-policy training and inference, base abstractions used throughout the code base and basic models to be used as building blocks in future models. * `allenact.algorithms` includes on-policy and off-policy training nd inference algorithms and abstractions for losses, policies, rollout storage, etc. * `allenact.base_abstractions` includes the base `ExperimentConfig`, distributions, base `Sensor`, `TaskSampler`, `Task`, etc. * `allenact.embodiedai` includes basic CNN, and RNN state encoders, besides basic `ActorCriticModel` implementations for embodied AI tasks. ## [`datasets` directory](https://github.com/allenai/allenact/tree/master/datasets) A directory made to store task-specific datasets. For example, the script `datasets/download_navigation_datasets.sh` can be used to automatically download task dataset files for Point Navigation within the RoboTHOR environment and it will place these files into a new `datasets/robothor-pointnav` directory. ## [`docs` directory](https://github.com/allenai/allenact/tree/master/docs) Contains documentation for the framework, including guides for installation and first experiments, how-to's for the definition and usage of different abstractions, tutorials and per-project documentation. ## [`overrides` directory](https://github.com/allenai/allenact/tree/master/overrides) Files within this directory are used to the look and structure of the documentation generated when running `mkdocs`. See our [FAQ](../FAQ.md) for information on how to generate this documentation for yourself. ## [`allenact_plugins` directory](https://github.com/allenai/allenact/tree/master/allenact_plugins) Contains implementations of `ActorCriticModel`s and `Task`s in different environments. Each plugin folder is named as `{environment}_plugin` and contains three subfolders: 1. `configs` to host useful configuration for the environment or tasks. 1. `data` to store data to be consumed by the environment or tasks. 1. `scripts` to setup the plugin or gather and process data. ## [`pretrained_model_ckpts` directory](https://github.com/allenai/allenact/tree/master/pretrained_model_ckpts) Directory into which pretrained model checkpoints will be saved. See also the `pretrained_model_ckpts/download_navigation_model_ckpts.sh` which can be used to download such checkpoints. ## [`projects` directory](https://github.com/allenai/allenact/tree/master/projects) Contains project-specific code like experiment configurations and scripts to process results, generate visualizations or prepare data. ## [`scripts` directory](https://github.com/allenai/allenact/tree/master/scripts) Includes framework-wide scripts to build the documentation, format code, run_tests and start an xserver. The latter can be used for OpenGL-based environments having super-user privileges in Linux, assuming NVIDIA drivers and `xserver-xorg` are installed. ## [`tests` directory](https://github.com/allenai/allenact/tree/master/tests) Includes unit tests for `allenact`. ## [`allenact.utils` directory](https://github.com/allenai/allenact/tree/master/allenact/utils) It includes different types of utilities, mainly divided into: * `allenact.utils.experiment_utils`, including the `TrainingPipeline`, `PipelineStage` and other utilities to configure an experiment. * `allenact.utils.model_utils`, including generic CNN creation, forward-pass helpers and other utilities. * `allenact.utils.tensor_utils`, including functions to batch observations, convert tensors into video, scale image tensors, etc. * `allenact.utils.viz_utils`, including a `VizSuite` class that can be instantiated with different visualization plugins during inference. * `allenact.utils.system`, including logging and networking helpers. Other utils files, including `allenact.utils.misc_utils`, contain a number of helper functions for different purposes. ================================================ FILE: docs/howtos/changing-rewards-and-losses.md ================================================ # Changing rewards and losses In order to train actor-critic agents, we need to specify * `rewards` at the task level, and * `losses` at the training pipeline level. ## Rewards We will use the [object navigation task in `iTHOR`](/api/allenact_plugins/ithor_plugin/ithor_tasks/#objectnavtask) as a running example. We can see how the `ObjectNaviThorGridTask._step(self, action: int) -> RLStepResult` method computes the reward for the latest action by invoking a function like: ```python def judge(self) -> float: reward = -0.01 if not self.last_action_success: reward += -0.03 if self._took_end_action: reward += 1.0 if self._success else -1.0 return float(reward) ``` Any reward shaping can be easily added by e.g. modifying the definition of an existing class: ```python class NavigationWithShaping(allenact_plugins.ithor_plugin.ithor_tasks.ObjectNaviThorGridTask): def judge(self) -> float: reward = super().judge() if self.previous_state is not None: reward += float(my_reward_shaping_function( self.previous_state, self.current_state, )) self.previous_state = self.current_state return reward ``` ## Losses We support [A2C](/api/allenact/algorithms/onpolicy_sync/losses/a2cacktr#a2c), [PPO](/api/allenact/algorithms/onpolicy_sync/losses/ppo#ppo), and [imitation](/api/allenact/algorithms/onpolicy_sync/losses/imitation#imitation) losses amongst others. We can easily include [DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf) or variations thereof by assuming the availability of an expert providing optimal actions to agents and combining imitation and PPO losses in different ways through multiple stages: ```python class MyExperimentConfig(allenact.base_abstractions.experiment_config.ExperimentConfig): ... @classmethod def training_pipeline(cls, **kwargs): dagger_steps = int(3e4) ppo_steps = int(3e4) ppo_steps2 = int(1e6) ... return allenact.utils.experiment_utils.TrainingPipeline( named_losses={ "imitation_loss": allenact.algorithms.onpolicy_sync.losses.imitation.Imitation(), "ppo_loss": allenact.algorithms.onpolicy_sync.losses.ppo.PPO( **allenact.algorithms.onpolicy_sync.losses.ppo.PPOConfig, ), }, ... pipeline_stages=[ allenact.utils.experiment_utils.PipelineStage( loss_names=["imitation_loss", "ppo_loss"], teacher_forcing=allenact.utils.experiment_utils.LinearDecay( startp=1.0, endp=0.0, steps=dagger_steps, ), max_stage_steps=dagger_steps, ), allenact.utils.experiment_utils.PipelineStage( loss_names=["ppo_loss", "imitation_loss"], max_stage_steps=ppo_steps ), allenact.utils.experiment_utils.PipelineStage( loss_names=["ppo_loss"], max_stage_steps=ppo_steps2, ), ], ) ``` ================================================ FILE: docs/howtos/defining-a-new-model.md ================================================ # Defining a new model All actor-critic models must implement the interface described by the [ActorCriticModel class](/api/allenact/algorithms/onpolicy_sync/policy/#actorcriticmodel). This interface includes two methods that need to be implemented: * `recurrent_memory_specification`, returning a description of the model's recurrent memory; and * `forward`, returning an [ActorCriticOutput](/api/allenact/base_abstractions/misc/#actorcriticoutput) given the current observation, hidden state and previous actions. For convenience, we provide a [recurrent network module](/api/allenact/embodiedai/models/basic_models/#rnnstateencoder) and [a simple CNN module](/api/allenact/embodiedai/models/basic_models/#simplecnn) from the Habitat baseline navigation models, that will be used in this example. ### Actor-critic model interface As an example, let's build an object navigation agent. ```python class ObjectNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]): """Baseline recurrent actor critic model for object-navigation. # Attributes action_space : The space of actions available to the agent. Currently only discrete actions are allowed (so this space will always be of type `gym.spaces.Discrete`). observation_space : The observation space expected by the agent. This observation space should include (optionally) 'rgb' images and 'depth' images and is required to have a component corresponding to the goal `goal_sensor_uuid`. goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor` as an example of such a sensor. hidden_size : The hidden size of the GRU RNN. object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal object type. """ def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, goal_sensor_uuid: str, rgb_uuid: Optional[str], depth_uuid: Optional[str], hidden_size=512, object_type_embedding_dim=8, trainable_masked_hidden_state: bool = False, num_rnn_layers=1, rnn_type="GRU", ): """Initializer. See class documentation for parameter definitions. """ super().__init__(action_space=action_space, observation_space=observation_space) self.goal_sensor_uuid = goal_sensor_uuid self._n_object_types = self.observation_space.spaces[self.goal_sensor_uuid].n self._hidden_size = hidden_size self.object_type_embedding_size = object_type_embedding_dim self.visual_encoder = SimpleCNN( observation_space=self.observation_space, output_size=self._hidden_size, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, ) self.state_encoder = RNNStateEncoder( (0 if self.is_blind else self._hidden_size) + object_type_embedding_dim, self._hidden_size, trainable_masked_hidden_state=trainable_masked_hidden_state, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor = LinearActorHead(self._hidden_size, action_space.n) self.critic = LinearCriticHead(self._hidden_size) self.object_type_embedding = nn.Embedding( num_embeddings=self._n_object_types, embedding_dim=object_type_embedding_dim, ) self.train() @property def recurrent_hidden_state_size(self) -> int: """The recurrent hidden state size of the model.""" return self._hidden_size @property def is_blind(self) -> bool: """True if the model is blind (e.g. neither 'depth' or 'rgb' is an input observation type).""" return self.visual_encoder.is_blind @property def num_recurrent_layers(self) -> int: """Number of recurrent hidden layers.""" return self.state_encoder.num_recurrent_layers def _recurrent_memory_specification(self): return dict( rnn=( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) ) def get_object_type_encoding( self, observations: Dict[str, torch.FloatTensor] ) -> torch.FloatTensor: """Get the object type encoding from input batched observations.""" # noinspection PyTypeChecker return self.object_type_embedding( # type:ignore observations[self.goal_sensor_uuid].to(torch.int64) ) def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: """Processes input batched observations to produce new actor and critic values. Processes input batched observations (along with prior hidden states, previous actions, and masks denoting which recurrent hidden states should be masked) and returns an `ActorCriticOutput` object containing the model's policy (distribution over actions) and evaluation of the current state (value). # Parameters observations : Batched input observations. memory : `Memory` containing the hidden states from initial timepoints. prev_actions : Tensor of previous actions taken. masks : Masks applied to hidden states. See `RNNStateEncoder`. # Returns Tuple of the `ActorCriticOutput` and recurrent hidden state. """ target_encoding = self.get_object_type_encoding( cast(Dict[str, torch.FloatTensor], observations) ) x = [target_encoding] if not self.is_blind: perception_embed = self.visual_encoder(observations) x = [perception_embed] + x x_cat = torch.cat(x, dim=-1) # type: ignore x_out, rnn_hidden_states = self.state_encoder( x_cat, memory.tensor("rnn"), masks ) return ( ActorCriticOutput( distributions=self.actor(x_out), values=self.critic(x_out), extras={} ), memory.set_tensor("rnn", rnn_hidden_states), ) ``` ================================================ FILE: docs/howtos/defining-a-new-task.md ================================================ # Defining a new task In order to use new tasks in our experiments, we need to define two classes: * A [Task](/api/allenact/base_abstractions/task#task), including, among others, a `step` implementation providing a [RLStepResult](/api/allenact/base_abstractions/misc#rlstepresult), a `metrics` method providing quantitative performance measurements for agents and, optionally, a `query_expert` method that can be used e.g. with an [imitation loss](/api/allenact/algorithms/onpolicy_sync/losses/imitation#imitation) during training. * A [TaskSampler](/api/allenact/base_abstractions/task#tasksampler), that allows instantiating new Tasks for the agents to solve during training, validation and testing. ## Task Let's define a semantic navigation task, where agents have to navigate from a starting point in an environment to an object of a specific class using a minimal amount of steps and deciding when the goal has been reached. We need to define the methods `action_space`, `render`, `_step`, `reached_terminal_state`, `class_action_names`, `close`, `metrics`, and `query_expert` from the base `Task` definition. ### Initialization, action space and termination Let's start with the definition of the action space and task initialization: ```python ... from allenact_plugins.ithor_plugin.ithor_constants import ( MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END, ) ... class ObjectNaviThorGridTask(Task[IThorEnvironment]): _actions = (MOVE_AHEAD, ROTATE_LEFT, ROTATE_RIGHT, LOOK_DOWN, LOOK_UP, END) def __init__( self, env: IThorEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, **kwargs ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self._took_end_action: bool = False self._success: Optional[bool] = False @property def action_space(self): return gym.spaces.Discrete(len(self._actions)) @classmethod def class_action_names(cls) -> Tuple[str, ...]: return cls._actions def reached_terminal_state(self) -> bool: return self._took_end_action def close(self) -> None: self.env.stop() ... ``` ### Step method Next, we define the main method `_step` that will be called every time the agent produces a new action: ```python class ObjectNaviThorGridTask(Task[IThorEnvironment]): ... def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.class_action_names()[action] if action_str == END: self._took_end_action = True self._success = self.is_goal_object_visible() self.last_action_success = self._success else: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result ... def is_goal_object_visible(self) -> bool: return any( o["objectType"] == self.task_info["object_type"] for o in self.env.visible_objects() ) def judge(self) -> float: reward = -0.01 if not self.last_action_success: reward += -0.03 if self._took_end_action: reward += 1.0 if self._success else -1.0 return float(reward) ``` ### Metrics, rendering and expert actions Finally, we define methods to render and evaluate the current task, and optionally generate expert actions to be used e.g. for DAgger training. ```python def render(self, mode: str = "rgb", *args, **kwargs) -> numpy.ndarray: assert mode == "rgb", "only rgb rendering is implemented" return self.env.current_frame def metrics(self) -> Dict[str, Any]: if not self.is_done(): return {} else: return {"success": self._success, "ep_length": self.num_steps_taken()} def query_expert(self, **kwargs) -> Tuple[int, bool]: return my_objnav_expert_implementation(self) ``` ## TaskSampler We also need to define the corresponding TaskSampler, which must contain implementations for methods `__len__`, `total_unique`, `last_sampled_task`, `next_task`, `close`, `reset`, and `set_seed`. Currently, an additional method `all_observation_spaces_equal` is used to ensure compatibility with the current [RolloutBlockStorage](/api/allenact/algorithms/onpolicy_sync/storage#rolloutblockstorage). Let's define a tasks sampler able to provide an infinite number of object navigation tasks for AI2-THOR. ### Initialization and termination ```python class ObjectNavTaskSampler(TaskSampler): def __init__( self, scenes: List[str], object_types: str, sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, seed: Optional[int] = None, deterministic_cudnn: bool = False, *args, **kwargs ) -> None: self.env_args = env_args self.scenes = scenes self.object_types = object_types self.grid_size = 0.25 self.env: Optional[IThorEnvironment] = None self.sensors = sensors self.max_steps = max_steps self._action_sapce = action_space self.scene_id: Optional[int] = None self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None set_seed(seed) self.reset() def close(self) -> None: if self.env is not None: self.env.stop() def reset(self): self.scene_id = 0 def _create_environment(self) -> IThorEnvironment: env = IThorEnvironment( make_agents_visible=False, object_open_speed=0.05, restrict_to_initially_reachable_points=True, **self.env_args, ) return env ``` ### Task sampling Finally, we need to define methods to determine the number of available tasks (possibly infinite) and sample tasks: ```python @property def length(self) -> Union[int, float]: return float("inf") @property def total_unique(self) -> Optional[Union[int, float]]: return None @property def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]: return self._last_sampled_task @property def all_observation_spaces_equal(self) -> bool: return True def next_task(self) -> Optional[ObjectNaviThorGridTask]: self.scene_id = random.randint(0, len(self.scenes) - 1) self.scene = self.scenes[self.scene_id] if self.env is not None: if scene != self.env.scene_name: self.env.reset(scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) self.env.randomize_agent_location() task_info = {"object_type": random.sample(self.object_types, 1)} self._last_sampled_task = ObjectNaviThorGridTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_sapce, ) return self._last_sampled_task ``` ================================================ FILE: docs/howtos/defining-a-new-training-pipeline.md ================================================ # Defining a new training pipeline Defining a new training pipeline, or even new learning algorithms, is straightforward with the modular design in `AllenAct`. A convenience [Builder](/api/allenact/utils/experiment_utils#builder) object allows us to defer the instantiation of objects of the class passed as their first argument while allowing passing additional keyword arguments to their initializers. ## On-policy We can implement a training pipeline which trains with a single stage using PPO: ```python class ObjectNavThorPPOExperimentConfig(ExperimentConfig): ... @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 2 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) ... ``` Alternatively, we could use a more complex pipeline that includes dataset aggregation ([DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf)). This requires the existence of an expert (implemented in the task definition) that provides optimal actions to agents. We have implemented such a pipeline by extending the above configuration as follows: ```python class ObjectNavThorDaggerThenPPOExperimentConfig(ExperimentConfig): ... SENSORS = [ ... ExpertActionSensor(nactions=6), # Notice that we have added # an expert action sensor. ] ... @classmethod def training_pipeline(cls, **kwargs): dagger_steps = int(1e4) # Much smaller number of steps as we're using imitation learning ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 1 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), "imitation_loss": Imitation(), # We add an imitation loss. }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ # The pipeline now has two stages, in the first # we use DAgger (imitation loss + teacher forcing). # In the second stage we no longer use teacher # forcing and add in the ppo loss. PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=dagger_steps, ), max_stage_steps=dagger_steps, ), PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) ``` ## Off-policy We can also define off-policy stages where an external dataset is used, in this case, for Behavior Cloning: ```python class BCOffPolicyBabyAIGoToLocalExperimentConfig(ExperimentConfig): ... @classmethod def training_pipeline(cls, **kwargs): total_train_steps = int(1e7) num_steps=128 return TrainingPipeline( save_interval=10000, # Save every 10000 steps (approximately) metric_accumulate_interval=1, optimizer_builder=Builder(optim.Adam, dict(lr=2.5e-4)), num_mini_batch=0, # no on-policy training update_repeats=0, # no on-policy training num_steps=num_steps // 4, # rollouts from environment tasks named_losses={ "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss( total_episodes_in_epoch=int(1e6) # dataset contains 1M episodes ), }, gamma=0.99, use_gae=True, gae_lambda=1.0, max_grad_norm=0.5, advance_scene_rollout_period=None, pipeline_stages=[ PipelineStage( loss_names=[], # no on-policy losses max_stage_steps=total_train_steps, # We only train from off-policy data: offpolicy_component=OffPolicyPipelineComponent( data_iterator_builder=lambda **kwargs: create_minigrid_offpolicy_data_iterator( path=DATASET_PATH, # external dataset nrollouts=128, # per trainer batch size rollout_len=num_steps, # For truncated-BPTT instr_len=5, **kwargs, ), loss_names=["offpolicy_expert_ce_loss"], # off-policy losses updates=16, # 16 batches per rollout ), ), ], ) ``` Note that, in this example, `128 / 4 = 32` steps will be sampled from tasks in a MiniGrid environment (which can be useful to track the agent's performance), while a subgraph of the model (in this case the entire Actor) is trained from batches of 128-step truncated episodes sampled from an offline dataset stored under `DATASET_PATH`. ================================================ FILE: docs/howtos/defining-an-experiment.md ================================================ # Defining an experiment Let's look at an example experiment configuration for an object navigation example with an actor-critic agent observing RGB images from the environment and target object classes from the task. This is a simplified example where the agent is confined to a single `iTHOR` scene (`FloorPlan1`) and needs to find a single object (a tomato). To see how one might running a "full"/"hard" version of navigation within AI2-THOR, see our tutorials [PointNav in RoboTHOR](../tutorials/training-a-pointnav-model.md) and [Swapping in a new environment](../tutorials/transfering-to-a-different-environment-framework.md). The interface to be implemented by the experiment specification is defined in [allenact.base_abstractions.experiment_config](/api/allenact/base_abstractions/experiment_config#experimentconfig). If you'd like to skip ahead and see the finished configuration, [see here](https://github.com/allenai/allenact/blob/master/projects/tutorials/object_nav_ithor_ppo_one_object.py). We begin by making the following imports: ```python from math import ceil from typing import Dict, Any, List, Optional import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig from allenact.base_abstractions.sensor import SensorSuite from allenact.base_abstractions.task import TaskSampler from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor, GoalObjectTypeThorSensor from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask from projects.objectnav_baselines.models.object_nav_models import ( ObjectNavBaselineActorCritic, ) from allenact.utils.experiment_utils import Builder, PipelineStage, TrainingPipeline, LinearDecay ``` Now first method to implement is `tag`, which provides a string identifying the experiment: ```python class ObjectNavThorPPOExperimentConfig(ExperimentConfig): ... @classmethod def tag(cls): return "ObjectNavThorPPO" ... ``` ## Model creation Next, `create_model` will be used to instantiate an [baseline object navigation actor-critic model](/api/projects/objectnav_baselines/models/object_nav_models#ObjectNavBaselineActorCritic): ```python class ObjectNavThorExperimentConfig(ExperimentConfig): ... # A simple setting, train/valid/test are all the same single scene # and we're looking for a single object OBJECT_TYPES = ["Tomato"] TRAIN_SCENES = ["FloorPlan1_physics"] VALID_SCENES = ["FloorPlan1_physics"] TEST_SCENES = ["FloorPlan1_physics"] # Setting up sensors and basic environment details SCREEN_SIZE = 224 SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, ), GoalObjectTypeThorSensor(object_types=OBJECT_TYPES), ] ... @classmethod def create_model(cls, **kwargs) -> nn.Module: return ObjectNavBaselineActorCritic( action_space=gym.spaces.Discrete(len(ObjectNaviThorGridTask.class_action_names())), observation_space=SensorSuite(cls.SENSORS).observation_spaces, rgb_uuid=cls.SENSORS[0].uuid, depth_uuid=None, goal_sensor_uuid="goal_object_type_ind", hidden_size=512, object_type_embedding_dim=8, ) ... ``` ## Training pipeline We now implement a training pipeline which trains with a single stage using PPO. In the below we use [Builder](/api/allenact/utils/experiment_utils#builder) objects, which allow us to defer the instantiation of objects of the class passed as their first argument while allowing passing additional keyword arguments to their initializers. This is necessary when instantiating things like PyTorch optimizers who take as input the list of parameters associated with our agent's model (something we can't know until the `create_model` function has been called). ```python class ObjectNavThorPPOExperimentConfig(ExperimentConfig): ... @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 2 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) ... ``` Alternatively, we could use a more sophisticated pipeline that begins training with dataset aggregation ([DAgger](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf)) before moving to training with PPO. This requires the existence of an expert (implemented in the task definition) that provides optimal actions to agents. We have implemented such a pipeline by extending the above configuration as follows ```python class ObjectNavThorDaggerThenPPOExperimentConfig(ObjectNavThorPPOExperimentConfig): ... SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, ), GoalObjectTypeThorSensor(object_types=OBJECT_TYPES), ExpertActionSensor(nactions=6), # Notice that we have added an expert action sensor. ] ... @classmethod def training_pipeline(cls, **kwargs): dagger_steps = int(1e4) # Much smaller number of steps as we're using imitation learning ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 1 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), "imitation_loss": Imitation(), # We add an imitation loss. }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ # The pipeline now has two stages, in the first # we use DAgger (imitation loss + teacher forcing). # In the second stage we no longer use teacher # forcing and add in the ppo loss. PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=dagger_steps, ), max_stage_steps=dagger_steps, ), PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) ``` A version of our experiment config file for which we have implemented this two-stage training can be found [here](https://github.com/allenai/allenact/blob/master/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py). This two-stage configuration `ObjectNavThorDaggerThenPPOExperimentConfig` is actually implemented _as a subclass of `ObjectNavThorPPOExperimentConfig`_. This is a common pattern used in AllenAct and lets one skip a great deal of boilerplate when defining a new experiment as a slight modification of an old one. Of course one must then be careful: changes to the superclass configuration will propagate to all subclassed configurations. ## Machine configuration In `machine_params` we define machine configuration parameters that will be used for training, validation and test: ```python class ObjectNavThorPPOExperimentConfig(allenact.base_abstractions.experiment_config.ExperimentConfig): ... @classmethod def machine_params(cls, mode="train", **kwargs): num_gpus = torch.cuda.device_count() has_gpu = num_gpus != 0 if mode == "train": nprocesses = 20 if has_gpu else 4 gpu_ids = [0] if has_gpu else [] elif mode == "valid": nprocesses = 1 gpu_ids = [1 % num_gpus] if has_gpu else [] elif mode == "test": nprocesses = 1 gpu_ids = [0] if has_gpu else [] else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") return {"nprocesses": nprocesses, "gpu_ids": gpu_ids} ... ``` In the above we use the availability of cuda (`torch.cuda.device_count() != 0`) to determine whether we should use parameters appropriate for local machines or for a server. We might optionally add a list of `sampler_devices` to assign devices (likely those not used for running our agent) to task sampling workers. ## Task sampling The above has defined the model we'd like to use, the types of losses we wish to use during training, and the machine specific parameters that should be used during training. Critically we have not yet defined which task we wish to train our agent to complete. This is done by implementing the `ExperimentConfig.make_sampler_fn` function ```python class ObjectNavThorPPOExperimentConfig(ExperimentConfig): ... @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return ObjectNavTaskSampler(**kwargs) ... ``` Now, before training starts, our trainer will know to generate a collection of task samplers using `make_sampler_fn` for training (and possibly validation or testing). The `kwargs` parameters used in the above function call can be different for each training process, we implement such differences using the `ExperimentConfig.train_task_sampler_args` function ```python class ObjectNavThorPPOExperimentConfig(ExperimentConfig): ... def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.TRAIN_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = "manual" res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res ... ``` Now training process `i` out of `n` total processes will be instantiated with the parameters `ObjectNavThorPPOExperimentConfig.train_task_sampler_args(i, n, ...)`. Similar functions (`valid_task_sampler_args` and `test_task_sampler_args`) exist for generating validation and test parameters. Note also that with this function we can assign devices to run our environment for each worker. See the documentation of `ExperimentConfig` for more information. ## Running the experiment We are now in the position to run the experiment (with seed 12345) using the command ```bash python main.py object_nav_ithor_ppo_one_object -b projects/tutorials -s 12345 ``` ================================================ FILE: docs/howtos/running-a-multi-agent-experiment.md ================================================ # To-do ================================================ FILE: docs/howtos/visualizing-results.md ================================================ # To-do ================================================ FILE: docs/installation/download-datasets.md ================================================ # Downloading datasets **Note:** These instructions assume you have [installed the full library](../installation/installation-allenact.md#full-library) and, generally, [installed specific plugin requirements](../installation/installation-allenact.md#plugins-extra-requirements). The below provides instructions on how to download datasets necessary for defining the train, validation, and test sets used within the ObjectNav/PointNav tasks in the `iTHOR` and `RoboTHOR` environments. ## Point Navigation (PointNav) ### RoboTHOR To get the PointNav dataset for `RoboTHOR` run the following command: ```bash bash datasets/download_navigation_datasets.sh robothor-pointnav ``` This will download the dataset into `datasets/robothor-pointnav`. ### iTHOR To get the PointNav dataset for `iTHOR` run the following command: ```bash bash datasets/download_navigation_datasets.sh ithor-pointnav ``` This will download the dataset into `datasets/ithor-pointnav`. ## Object Navigation (ObjectNav) ### RoboTHOR To get the ObjectNav dataset for `RoboTHOR` run the following command: ```bash bash datasets/download_navigation_datasets.sh robothor-objectnav ``` This will download the dataset into `datasets/robothor-objectnav`. ### iTHOR To get the ObjectNav dataset for `iTHOR` run the following command: ```bash bash datasets/download_navigation_datasets.sh ithor-objectnav ``` This will download the dataset into `datasets/ithor-objectnav`. ================================================ FILE: docs/installation/installation-allenact.md ================================================ # Installation of AllenAct **Note 1:** This library has been tested *only in python 3.6.*/3.7.*. The following assumes you have a working version of *python 3.6/3.7* installed locally. **Note 2:** If you are installing `allenact` intending to use a GPU for training/inference and your current machine uses an older version of CUDA you may need to manually install the version of PyTorch that supports your CUDA version. In such a case, after installing the below requirements, you should follow the directions for installing PyTorch with older versions of CUDA available on the [PyTorch homepage](https://pytorch.org/). In order to install `allenact` and/or its requirements we recommend creating a new [python virtual environment](https://docs.python.org/3/tutorial/venv.html) and installing all of the below requirements into this virtual environment. Alternatively, we also document how to [install a conda environment](#installing-a-conda-environment) with all the requirements, which is especially useful if you plan to train models in [Habitat](https://aihabitat.org/). ## Different ways to use `allenact` There are three main installation paths depending on how you wish to use `allenact`. 1. You want to use the `allenact` abstractions and training engine for your own task/environment and don't really care about using any of our plugins that offer additional support (in the form of models, sensors, task samplers, etc.) for select tasks/environments like AI2-THOR, Habitat, and MiniGrid. - If this sounds like you, install the [standalone framework](#standalone-framework). 1. You want to use `allenact` as above but would also like to use some of our additional plugins. - If this sounds like you, install the [framework and plugins](#framework-and-plugins). 1. You want full access to everything in `allenact` (including all plugins and all of our projects and baselines) and want to have the option to edit the internal implementation of `allenact` to suit your desire. - If this sounds like you, install the [full library](#full-library). ## Standalone framework You can install `allenact` easily using pip: ```bash pip install allenact ``` If you'd like to install the latest development version of `allenact` (possibly unstable) directly from GitHub see the next section. ### Bleeding edge pip install To install the latest `allenact` framework, you can use ```bash pip install -e "git+https://github.com/allenai/allenact.git@main#egg=allenact&subdirectory=allenact" ``` and, similarly, you can also use ```bash pip install -e "git+https://github.com/allenai/allenact.git@main#egg=allenact_plugins[all]&subdirectory=allenact_plugins" ``` to install all plugins. Depending on your machine configuration, you may need to use `pip3` instead of `pip` in the commands above. ## Framework and plugins To install `allenact` and all available plugins, run ```bash pip install allenact allenact_plugins[all] ``` which will install `allenact` and `allenact_plugins` packages along with the requirements for _all_ of the plugins (when possible). If you only want to install the requirements for some subset of plugins, you can specify these plugins with the `allenact_plugins[plugin1,plugin2]` notation. For instance, to install requirements for the `ithor_plugin` and the `minigrid_plugin`, simply run: ```bash pip install allenact allenact_plugins[ithor,minigrid] ``` A list of all available plugins can be found [here](https://github.com/allenai/allenact/tree/master/allenact_plugins). ## Full library Clone the `allenact` repository to your local machine and move into the top-level directory ```bash git clone git@github.com:allenai/allenact.git cd allenact ``` Below we describe two alternative ways to install all dependencies via `pip` or `conda`. ### Installing requirements with `pip` All requirements for `allenact` (not including plugin requirements) may be installed by running the following command: ```bash pip install -r requirements.txt; pip install -r dev_requirements.txt ``` To install plugin requirements, see below. #### Plugins extra requirements To install the specific requirements of each plugin, we need to additionally call ```bash pip install -r allenact_plugins/_plugin/extra_requirements.txt ``` from the top-level directory. ### Installing a `conda` environment _If you are unfamiliar with Conda, please familiarize yourself with their [introductory documentation](https://docs.conda.io/projects/conda/en/latest/). If you have not already, you will need to first [install Conda (i.e. Anaconda or Miniconda)](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) on your machine. We suggest installing [Miniconda](https://docs.conda.io/projects/conda/en/latest/glossary.html#miniconda-glossary) as it's relatively lightweight._ The `conda` folder contains YAML files specifying [Conda environments](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file) compatible with AllenAct. These environment files include: * `environment-base.yml` - A base environment file to be used on all machines (it includes [PyTorch](https://pytorch.org/) with the latest `cudatoolkit`). * `environment-dev.yml` - Additional dev dependencies. * `environment-.yml` - Additional dependencies, where `` is the CUDA version used on your machine (if you are using linux, you might find this version by running `/usr/local/cuda/bin/nvcc --version`). * `environment-cpu.yml` - Additional dependencies to be used on machines where GPU support is not needed (everything will be run on the CPU). For the moment let's assume you're using `environment-base.yml` above. To install a conda environment with name `allenact` using this file you can simply run the following (*this will take a few minutes*): ```bash conda env create --file ./conda/environment-base.yml --name allenact ``` The above is very simple but has the side effect of creating a new `src` directory where it will place some of AllenAct's dependencies. To get around this, instead of running the above you can instead run the commands: ```bash export MY_ENV_NAME=allenact export CONDA_BASE="$(dirname $(dirname "${CONDA_EXE}"))" export PIP_SRC="${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc" conda env create --file ./conda/environment-base.yml --name $MY_ENV_NAME ``` These additional commands tell conda to place these dependencies under the `${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc` directory rather than under `src`, this is more in line with where we'd expect dependencies to be placed when running `pip install ...`. If needed, you can use one of the `environment-.yml` environment files to install the proper version of the `cudatoolkit` by running: ```bash conda env update --file ./conda/environment-.yml --name allenact ``` or the CPU-only version: ```bash conda env update --file ./conda/environment-cpu.yml --name allenact ``` #### Using the `conda` environment Now that you've installed the conda environment as above, you can activate it by running: ```bash conda activate allenact ``` after which you can run everything as you would normally. #### Installing supported environments with `conda` Each supported plugin contains a YAML environment file that can be applied upon the existing `allenact` environment. To install the specific requirements of each plugin, we need to additionally call ```bash conda env update --file allenact_plugins/_plugin/extra_environment.yml --name $MY_ENV_NAME ``` from the top-level directory. **Habitat:** Note that, for habitat, we provide two environment types, regarding whether our machine is connected to a display. More details can be found [here](../installation/installation-framework.md#installation-of-habitat). ================================================ FILE: docs/installation/installation-framework.md ================================================ # Installation of supported environments In general, each supported environment can be installed by just following the instructions to [install the full library and specific requirements of every plugin](../installation/installation-allenact.md#full-library) either [via pip](../installation/installation-allenact.md#installing-requirements-with-pip) or [via Conda](../installation/installation-allenact.md#installing-a-conda-environment). Below we provide additional installation instructions for a number of environments that we support and provide some guidance for problems commonly experienced when using these environments. ## Installation of iTHOR (`ithor` plugin) The first time you will run an experiment with `iTHOR` (or any script that uses `ai2thor`) the library will download all of the assets it requires to render the scenes automatically. However, the datasets must be manually downloaded as described [here](../installation/download-datasets.md). **Trying to use `iTHOR` on a machine without an attached display?** **Note:** These instructions assume you have [installed the full library](../installation/installation-allenact.md#full-library). If you wish to run `iTHOR` on a machine without an attached display (for instance, a remote server such as an AWS machine) you will also need to run a script that launches `xserver` processes on your GPUs. This can be done with the following command: ```bash sudo python scripts/startx.py & ``` Notice that you need to run the command with `sudo` (i.e. administrator privileges). If you do not have `sudo` access (for example if you are running this on a shared university machine) you can ask your administrator to run it for you. You only need to run it once (as long as you do not turn off your machine). ## Installation of RoboTHOR (`robothor` plugin) `RoboTHOR` is installed in the same way as `iTHOR`. For more information see the above section on installing `iTHOR`. ## Installation of Habitat Installing habitat requires 1. Installing the `habitat-lab` and `habitat-sim` packages. - This may be done by either following the [directions provided by Habitat themselves](https://github.com/facebookresearch/habitat-lab#installation) or by using our `conda` installation instructions below. 1. Downloading the scene assets (i.e. the Gibson or Matterport scene files) relevant to whichever task you're interested in. - Unfortunately we cannot legally distribute these files to you directly. Instead you will need to download these yourself. See [here](https://github.com/facebookresearch/habitat-lab#Gibson) for how you can download the Gibson files and [here](https://github.com/facebookresearch/habitat-lab#matterport3d) for directions on how to download the Matterport flies. 1. Downloading the dataset files for the task you're interested in (e.g. PointNav, ObjectNav, etc). - See [here](https://github.com/facebookresearch/habitat-lab#task-datasets) for links to these dataset files. ### Using `conda` Habitat has recently released the option to install their simulator using `conda` which avoids having to manually build dependencies or use Docker. This does not guarantee that the installation process is completely painless (it is difficult to avoid all possible build issues) but we've found it to be a nice alternative to using Docker. To use this installation option please first install an AllenAct `conda` environment using the instructions available [here](../installation/installation-allenact.md#installing-a-conda-environment). After installing this environment, you can then install `habitat-sim` and `habitat-lab` by running: If you are on a machine with an attached display: ```bash export MY_ENV_NAME=allenact export CONDA_BASE="$(dirname $(dirname "${CONDA_EXE}"))" export PIP_SRC="${CONDA_BASE}/envs/${MY_ENV_NAME}/pipsrc" conda env update --file allenact_plugins/habitat_plugin/extra_environment.yml --name $MY_ENV_NAME ``` If you are on a machine without an attached display (e.g. a server), replace the last command by: ```bash conda env update --file allenact_plugins/habitat_plugin/extra_environment_headless.yml --name $MY_ENV_NAME ``` After these steps, feel free to proceed to download the required scene assets and task-specific dataset files as described above. ================================================ FILE: docs/javascripts/extra.js ================================================ // The below can be used to open all nav links in the documentation, code found at // from https://github.com/squidfunk/mkdocs-material/issues/767#issuecomment-384558269 // from the user Akkadius. /* document.addEventListener("DOMContentLoaded", function() { load_navpane(); }); function load_navpane() { var width = window.innerWidth; if (width <= 1200) { return; } var nav = document.getElementsByClassName("md-nav"); for (var i = 0; i < nav.length; i++) { if (typeof nav.item(i).style === "undefined") { continue; } if (nav.item(i).getAttribute("data-md-level") && nav.item(i).getAttribute("data-md-component")) { nav.item(i).style.display = 'block'; nav.item(i).style.overflow = 'visible'; } } var nav = document.getElementsByClassName("md-nav__toggle"); for(var i = 0; i < nav.length; i++) { nav.item(i).checked = true; } } */ ================================================ FILE: docs/notebooks/firstbook.md ================================================ # To-do ================================================ FILE: docs/projects/advisor_2020/README.md ================================================ # Experiments for Advisor ## TODO: 1. Add details taken from https://unnat.github.io/advisor/. 2. Cite the arxiv paper. 3. Give a list of things you can run with bash commands. 4. Ideally be able to recreate a large set of experiments. ================================================ FILE: docs/projects/babyai_baselines/README.md ================================================ # Baseline experiments for the BabyAI environment We perform a collection of baseline experiments within the BabyAI environment on the GoToLocal task, see the `projects/babyai_baselines/experiments/go_to_local` directory. For instance, to train a model using PPO, run ```bash python main.py go_to_local.ppo --experiment_base projects/babyai_baselines/experiments ``` Note that these experiments will be quite slow when not using a GPU as the BabyAI model architecture is surprisingly large. Specifying a GPU (if available) can be done from the command line using hooks we created using [gin-config](https://github.com/google/gin-config). E.g. to train using the 0th GPU device, add ```bash --gp "machine_params.gpu_id = 0" ``` to the above command. ================================================ FILE: docs/projects/gym_baselines/README.md ================================================ # Baseline models Gym (for MuJoCo environments) This project contains the code for training baseline models for the tasks under the [MuJoCo](https://gym.openai.com/envs/#mujoco) group of Gym environments, included ["Ant-v2"](https://gym.openai.com/envs/Ant-v2/), ["HalfCheetah-v2"](https://gym.openai.com/envs/HalfCheetah-v2/), ["Hopper-v2"](https://gym.openai.com/envs/Hopper-v2/), ["Humanoid-v2"](https://gym.openai.com/envs/Humanoid-v2/), ["InvertedDoublePendulum-v2"](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["InvertedPendulum-v2"](https://gym.openai.com/envs/InvertedPendulum-v2/), [Reacher-v2](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["Swimmer-v2"](https://gym.openai.com/envs/Swimmer-v2/), and [Walker2d-v2"](https://gym.openai.com/envs/Walker2d-v2/). Provided are experiment configs for training a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](https://allenact.org/api/allenact_plugins/gym_plugin/gym_models/#memorylessactorcritic), with a [Gaussian distribution](https://allenact.org/api/allenact_plugins/gym_plugin/gym_distributions/#gaussiandistr) to sample actions for all continuous-control environments under the `MuJoCo` group of `Gym` environments. The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf) Reinforcement Learning Algorithm. To train an experiment run the following command from the `allenact` root directory: ```bash python main.py -o ``` Where `` is the path of the directory where we want the model weights and logs to be stored and `` is the path to the python file containing the experiment configuration. An example usage of this command would be: ```bash python main.py projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py -o /YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo ``` This trains a lightweight implementation with separate MLPs for actors and critic with a Gaussian distribution to sample actions in the "Ant-v2" environment, and stores the model weights and logs to `/YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo`. ## Results In our experiments, the rewards for MuJoCo environments we obtained after training using PPO are similar to those reported by OpenAI Gym Baselines(1M steps). The Humanoid environment is compared with the original PPO paper where training 50M steps using PPO. Due to the time constraint, we only tested our baseline across two seeds so far. | Environment | Gym Baseline Reward | Ours Reward | | ----------- | ------------------- | ----------- | |[Ant-v2](https://gym.openai.com/envs/Ant-v2/)| 1083.2 |1098.6(reached 4719 in 25M steps) | | [HalfCheetah-v2](https://gym.openai.com/envs/HalfCheetah-v2/) | 1795.43 | 1741(reached 4019 in 18M steps) | |[Hopper-v2](https://gym.openai.com/envs/Hopper-v2/)|2316.16|2266| |[Humanoid-v2](https://gym.openai.com/envs/Humanoid-v2/)|4000+|4500+(reached 6500 in 70M steps)| | [InvertedPendulum-v2](https://gym.openai.com/envs/InvertedPendulum-v2/) | 809.43 | 1000 | |[Reacher-v2](https://gym.openai.com/envs/Reacher-v2/)|-6.71|-7.045| |[Swimmer-v2](https://gym.openai.com/envs/Swimmer-v2/)|111.19|124.7| |[Walker2d](https://gym.openai.com/envs/Walker2d-v2/)|3424.95|2723 in 10M steps| ================================================ FILE: docs/projects/objectnav_baselines/README.md ================================================ # Baseline models ObjectNav (for RoboTHOR/iTHOR) This project contains the code for training baseline models for the ObjectNav task. In ObjectNav, the agent spawns at a location in an environment and is tasked to explore the environment until it finds an object of a certain type (such as TV or Basketball). Once the agent is confident that it has the object within sight it executes the `END` action which terminates the episode. If the agent is within a set distance to the target (in our case 1.0 meters) and the target is visible within its observation frame the agent succeeded, otherwise it failed. Provided are experiment configs for training a simple convolutional model with an GRU using `RGB`, `Depth` or `RGB-D` (i.e. `RGB+Depth`) as inputs in [RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/). The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf) Reinforcement Learning Algorithm. For the RoboTHOR environment we also have and experiment (`objectnav_robothor_rgb_resnetgru_dagger.py`) showing how a model can be trained using DAgger, a form of imitation learning. To train an experiment run the following command from the `allenact` root directory: ```bash python main.py -o -c ``` Where `` is the path of the directory where we want the model weights and logs to be stored and `` is the path to the python file containing the experiment configuration. An example usage of this command would be: ```bash python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet_ddppo.py -o storage/objectnav-robothor-rgb ``` This trains a simple convolutional neural network with a GRU using RGB input passed through a pretrained ResNet-18 visual encoder on the PointNav task in the RoboTHOR environment and stores the model weights and logs to `storage/pointnav-robothor-rgb`. ## RoboTHOR ObjectNav 2021 Challenge The experiment configs found under the `projects/objectnav_baselines/experiments/robothor` directory are designed to conform to the requirements of the [RoboTHOR ObjectNav 2021 Challenge](https://ai2thor.allenai.org/robothor/cvpr-2021-challenge). ### Training a baseline To train a baseline ResNet->GRU model taking RGB-D inputs, run the following command ```bash python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet_ddppo.py -o storage/objectnav-robothor-rgbd ``` By default, when using a machine with a GPU, the above experiment will attempt to train using 60 parallel processes across all available GPUs. See the `TRAIN_GPU_IDS` constant in `experiments/objectnav_thor_base.py` and the `NUM_PROCESSES` constant in `experiments/robothor/objectnav_robothor_base.py` if you'd like to change which GPUs are used or how many processes are run respectively. ### Downloading our pretrained model checkpoint We provide a pretrained model obtained allowing the above command to run for all 300M training steps and then selecting the model checkpoint with best validation-set performance (for us occuring at ~170M training steps). You can download this model checkpoint by running ```bash bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021 ``` from the top-level directory. This will download the pretrained model weights and save them at the path ```bash pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt ``` ### Running inference on the pretrained model You can run inference on the above pretrained model (on the test dataset) by running ```bash export SAVED_MODEL_PATH=pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.py -c $SAVED_MODEL_PATH --eval ``` To discourage "cheating", the test dataset has been scrubbed of the information needed to actually compute the success rate / SPL of your model and so running the above will only save the trajectories your models take. To evaluate these trajectories you will have to submit them to our leaderboard, see [here for more details](https://github.com/allenai/robothor-challenge/). If you'd like to get a sense of if your model is doing well before submitting to the leaderboard, you can obtain the success rate / SPL of it on our validation dataset. To do this, you can simply comment-out the line ```python TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/test") ``` within the `projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py` file and rerun the above `python main.py ...` command (when the test dataset is not given, the code defaults to using the validation set). ================================================ FILE: docs/projects/pointnav_baselines/README.md ================================================ # Baseline models for the Point Navigation task in the Habitat, RoboTHOR and iTHOR environments This project contains the code for training baseline models on the PointNav task. In this setting the agent spawns at a location in an environment and is tasked to move to another location. The agent is given a "compass" that tells it the distance and bearing to the target position at every frame. Once the agent is confident that it has reached the end it executes the `END` action which terminates the episode. If the agent is within a set distance to the target (in our case 0.2 meters) the agent succeeded, else it failed. Provided are experiment configs for training a simple convolutional model with an GRU using `RGB`, `Depth` or `RGBD` as inputs in [Habitat](https://github.com/facebookresearch/habitat-sim), [RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/). The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf) Reinforcement Learning Algorithm. To train an experiment run the following command from the `allenact` root directory: ```bash python main.py -o -c -b ``` Where `` is the path of the directory where we want the model weights and logs to be stored, `` is the directory where our experiment file is located and `` is the name of the python module containing the experiment. An example usage of this command would be: ```bash python main.py -o storage/pointnav-robothor-depth -b projects/pointnav_baselines/experiments/robothor/ pointnav_robothor_depth_simpleconvgru_ddppo ``` This trains a simple convolutional neural network with a GRU using Depth input on the PointNav task in the RoboTHOR environment and stores the model weights and logs to `storage/pointnav-robothor-rgb`. ================================================ FILE: docs/projects/two_body_problem_2019/README.md ================================================ # Experiments for the Two Body Problem paper ## TODO: 1. Add details taken from https://prior.allenai.org/projects/two-body-problem 2. Cite the CVPR paper. 3. Give a list of things you can run with bash commands. 4. At least a subset of the experiments. ================================================ FILE: docs/tutorials/distributed-objectnav-tutorial.md ================================================ # Tutorial: Distributed training across multiple nodes. **Note** The provided commands to execute in this tutorial assume include a configuration script to [clone the full library](../installation/installation-allenact.md#full-library). Setting up headless THOR might require superuser privileges. We also assume [NCCL](https://developer.nvidia.com/nccl) is available for communication across computation nodes and all nodes have a running `ssh` server. The below introduced experimental tools and commands for distributed training assume a Linux OS (tested on Ubuntu 18.04). In this tutorial, we: 1. Introduce the available API for training across multiple nodes, as well as experimental scripts for distributed configuration, training start and termination, and remote command execution. 1. Introduce the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenAct`. Note that, in contrast with previous tutorials using AI2-THOR, this time we don't require an xserver (in Linux) to be active. 1. Show a training example for RoboTHOR ObjectNav on a cluster, with each node having sufficient GPUs and GPU memory to host 60 experience samplers collecting rollout data. Thanks to the massive parallelization of experience collection and model training enabled by [DD-PPO](https://arxiv.org/abs/1911.00357), we can greatly speed up training by scaling across multiple nodes: ![training speedup](../img/multinode_training.jpg) ## The task: ObjectNav In ObjectNav, the goal for the agent is to navigate to an object (possibly unseen during training) of a known given class and signal task completion when it determines it has reached the goal. ## Implementation For this tutorial, we'll use the readily available `objectnav_baselines` project, which includes configurations for a wide variety of object navigation experiments for both iTHOR and RoboTHOR. Since those configuration files are defined for a single-node setup, we will mainly focus on the changes required in the `machine_params` and `training_pipeline` methods. Note that, in order to use the headless version of AI2-THOR, we currently need to install a specific THOR commit, different from the default one in `robothor_plugin`. Note that this command is included in the configuration script below, so **we don't need to run this**: ```bash pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48 ``` The experiment config starts as follows: ```python import math from typing import Optional, Sequence import torch import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.utils.experiment_utils import ( Builder, LinearDecay, MultiLinearDecay, TrainingPipeline, PipelineStage, ) from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_rgb_resnet18gru_ddppo import ( ObjectNavRoboThorRGBPPOExperimentConfig as BaseConfig, ) class DistributedObjectNavRoboThorRGBPPOExperimentConfig(BaseConfig): def tag(self) -> str: return "DistributedObjectNavRoboThorRGBPPO" ``` We override ObjectNavRoboThorBaseConfig's THOR_COMMIT_ID to match the installed headless one: ```python THOR_COMMIT_ID = "91139c909576f3bf95a187c5b02c6fd455d06b48" ``` Also indicate that we're using headless THOR (for `task_sampler_args` methods): ```python THOR_IS_HEADLESS = True ``` **Temporary hack** Disable the `commit_id` argument passed to the THOR `Controller`'s `init` method: ```python def env_args(self): res = super().env_args() res.pop("commit_id", None) return res ``` And, of course, define the number of nodes. This will be used by `machine_params` and `training_pipeline` below. We override the existing `ExperimentConfig`'s `init` method to include control on the number of nodes: ```python def __init__( self, distributed_nodes: int = 1, num_train_processes: Optional[int] = None, train_gpu_ids: Optional[Sequence[int]] = None, val_gpu_ids: Optional[Sequence[int]] = None, test_gpu_ids: Optional[Sequence[int]] = None, ): super().__init__( num_train_processes=num_train_processes, train_gpu_ids=train_gpu_ids, val_gpu_ids=val_gpu_ids, test_gpu_ids=test_gpu_ids, ) self.distributed_nodes = distributed_nodes ``` ### Machine parameters **Note:** We assume that all nodes are identical (same number and model of GPUs and drivers). The `machine_params` method will be invoked by `runner.py` with different arguments, e.g. to determine the configuration for validation or training. When working in distributed settings, `AllenAct` needs to know the total number of trainers across all nodes as well as the local number of trainers. This is accomplished through the introduction of a `machine_id` keyword argument, which will be used to define the training parameters as follows: ```python def machine_params(self, mode="train", **kwargs): params = super().machine_params(mode, **kwargs) if mode == "train": params.devices = params.devices * self.distributed_nodes params.nprocesses = params.nprocesses * self.distributed_nodes params.sampler_devices = params.sampler_devices * self.distributed_nodes if "machine_id" in kwargs: machine_id = kwargs["machine_id"] assert ( 0 <= machine_id < self.distributed_nodes ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]" local_worker_ids = list( range( len(self.train_gpu_ids) * machine_id, len(self.train_gpu_ids) * (machine_id + 1), ) ) params.set_local_worker_ids(local_worker_ids) # Confirm we're setting up train params nicely: print( f"devices {params.devices}" f"\nnprocesses {params.nprocesses}" f"\nsampler_devices {params.sampler_devices}" f"\nlocal_worker_ids {params.local_worker_ids}" ) elif mode == "valid": # Use all GPUs at their maximum capacity for training # (you may run validation in a separate machine) params.nprocesses = (0,) return params ``` In summary, we need to specify which indices in `devices`, `nprocesses` and `sampler_devices` correspond to the local `machine_id` node (whenever a `machine_id` is given as a keyword argument), otherwise we specify the global configuration. ### Training pipeline In preliminary ObjectNav experiments, we observe that small batches are useful during the initial training steps in terms of sample efficiency, whereas large batches are preferred during the rest of training. In order to scale to the larger amount of collected data in multi-node settings, we will proceed with a two-stage pipeline: 1. In the first stage, we'll enforce a number of updates per amount of collected data similar to the configuration with a single node by enforcing more batches per rollout (for about 30 million steps). 1. In the second stage we'll switch to a configuration with larger learning rate and batch size to be used up to the grand total of 300 million experience steps. We first define a helper method to generate a learning rate curve with decay for each stage: ```python @staticmethod def lr_scheduler(small_batch_steps, transition_steps, ppo_steps, lr_scaling): safe_small_batch_steps = int(small_batch_steps * 1.02) large_batch_and_lr_steps = ppo_steps - safe_small_batch_steps - transition_steps # Learning rate after small batch steps (assuming decay to 0) break1 = 1.0 - safe_small_batch_steps / ppo_steps # Initial learning rate for large batch (after transition from initial to large learning rate) break2 = lr_scaling * ( 1.0 - (safe_small_batch_steps + transition_steps) / ppo_steps ) return MultiLinearDecay( [ # Base learning rate phase for small batch (with linear decay towards 0) LinearDecay(steps=safe_small_batch_steps, startp=1.0, endp=break1,), # Allow the optimizer to adapt its statistics to the changes with a larger learning rate LinearDecay(steps=transition_steps, startp=break1, endp=break2,), # Scaled learning rate phase for large batch (with linear decay towards 0) LinearDecay(steps=large_batch_and_lr_steps, startp=break2, endp=0,), ] ) ``` The training pipeline looks like: ```python def training_pipeline(self, **kwargs): # These params are identical to the baseline configuration for 60 samplers (1 machine) ppo_steps = int(300e6) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 if torch.cuda.is_available() else 1 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 # We add 30 million steps for small batch learning small_batch_steps = int(30e6) # And a short transition phase towards large learning rate # (see comment in the `lr_scheduler` helper method transition_steps = int(2 / 3 * self.distributed_nodes * 1e6) # Find exact number of samplers per GPU assert ( self.num_train_processes % len(self.train_gpu_ids) == 0 ), "Expected uniform number of samplers per GPU" samplers_per_gpu = self.num_train_processes // len(self.train_gpu_ids) # Multiply num_mini_batch by the largest divisor of # samplers_per_gpu to keep all batches of same size: num_mini_batch_multiplier = [ i for i in reversed( range(1, min(samplers_per_gpu // 2, self.distributed_nodes) + 1) ) if samplers_per_gpu % i == 0 ][0] # Multiply update_repeats so that the product of this factor and # num_mini_batch_multiplier is >= self.distributed_nodes: update_repeats_multiplier = int( math.ceil(self.distributed_nodes / num_mini_batch_multiplier) ) return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig, show_ratios=False)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ # We increase the number of batches for the first stage to reach an # equivalent number of updates per collected rollout data as in the # 1 node/60 samplers setting PipelineStage( loss_names=["ppo_loss"], max_stage_steps=small_batch_steps, num_mini_batch=num_mini_batch * num_mini_batch_multiplier, update_repeats=update_repeats * update_repeats_multiplier, ), # The we proceed with the base configuration (leading to larger # batches due to the increased number of samplers) PipelineStage( loss_names=["ppo_loss"], max_stage_steps=ppo_steps - small_batch_steps, ), ], # We use the MultiLinearDecay curve defined by the helper function, # setting the learning rate scaling as the square root of the number # of nodes. Linear scaling might also works, but we leave that # check to the reader. lr_scheduler_builder=Builder( LambdaLR, { "lr_lambda": self.lr_scheduler( small_batch_steps=small_batch_steps, transition_steps=transition_steps, ppo_steps=ppo_steps, lr_scaling=math.sqrt(self.distributed_nodes), ) }, ), ) ``` ## Multi-node configuration **Note:** In the following, we'll assume you don't have an available setup for distributed execution, such as [slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup and run distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for a rather basic usage pattern that might not suit your needs. If we haven't set up AllenAct with the headless version of AI2-THOR in our nodes, we can define a configuration script similar to: ```bash #!/bin/bash # Prepare a virtualenv for allenact sudo apt-get install -y python3-venv python3 -mvenv ~/allenact_venv source ~/allenact_venv/bin/activate pip install -U pip wheel # Install AllenAct cd ~ git clone https://github.com/allenai/allenact.git cd allenact # Install AllenaAct + RoboTHOR plugin dependencies pip install -r requirements.txt pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt # Download + setup datasets bash datasets/download_navigation_datasets.sh robothor-objectnav # Install headless AI2-THOR and required libvulkan1 sudo apt-get install -y libvulkan1 pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48 # Download AI2-THOR binaries python -c "from ai2thor.controller import Controller; c=Controller(); c.stop()" echo DONE ``` and save it as `headless_robothor_config.sh`. Note that some of the configuration steps in the script assume you have superuser privileges. Then, we can just copy this file to the first node in our cluster and run it with: ```bash source ``` If everything went well, we should be able to ```bash cd ~/allenact && source ~/allenact_venv/bin/activate ``` Note that we might need to install `libvulkan1` in each node (even if the AllenAct setup is shared across nodes) if it is not already available. ### Local filesystems If our cluster does not use a shared filesystem, we'll need to propagate the setup to the rest of nodes. Assuming we can just `ssh` with the current user to all nodes, we can propagate our config with ```bash scripts/dconfig.py --runs_on \ --config_script ``` and we can check the state of the installation with the `scripts/dcommand.py` tool: ```bash scripts/dcommand.py --runs_on \ --command 'tail -n 5 ~/log_allenact_distributed_config' ``` If everything went fine, all requirements are ready to start running our experiment. ## Run your experiment **Note:** In this section, we again assume you don't have an available setup for distributed execution, such as [slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup/run distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for a rather basic usage pattern that might not suit your needs. Our experimental extension to AllenAct's `main.py` script allows using practically identical commands to the ones used in a single-node setup to start our experiments. From the root `allenact` directory, we can simply invoke ```bash scripts/dmain.py projects/tutorials/distributed_objectnav_tutorial.py \ --config_kwargs '{"distributed_nodes":3}' \ --runs_on \ --env_activate_path ~/allenact_venv/bin/activate \ --allenact_path ~/allenact \ --distributed_ip_and_port : ``` This script will do several things for you, including synchronization of the changes in the `allenact` directory to all machines, enabling virtual environments in each node, sharing the same random seed for all `main.py` instances, assigning `--machine_id` parameters required for multi-node training, and redirecting the process output to a log file under the output results folder. Note that by changing the value associated with the `distributed_nodes` key in the `config_kwargs` map and the `runs_on` list of IPs, we can easily scale our training to e.g. 1, 3, or 8 nodes as shown in the chart above. Note that for this call to work unmodified, you should have sufficient GPUs/GPU memory to host 60 samplers per node. ## Track and stop your experiment You might have noticed that, when your experiment started with the above command, a file was created under `~/.allenact`. This file includes IP addresses and screen session IDs for all nodes. It can be used by the already introduced `scripts/dcommand.py` script, if we omit the `--runs_on` argument, to call a command on each node via ssh; but most importantly it is used by the `scripts/dkill.py` script to terminate all screen sessions hosting our training processes. ### Experiment tracking A simple way to check all machines are training, assuming you have `nvidia-smi` installed in all nodes, is to just call ```bash scripts/dcommand.py ``` from the root `allenact` directory. If everything is working well, the GPU usage stats from `nvidia-smi` should reflect ongoing activity. You can also add different commands to be executed by each node. It is of course also possible to run tensorboard on any of the nodes, if that's your preference. ### Experiment termination Just call ```bash scripts/dkill.py ``` After killing all involved screen sessions, you will be asked about whether you also want to delete the "killfile" stored under the `~/.allenact` directory (which might be your preferred option once all processes are terminated). We hope this tutorial will help you start quickly testing new ideas! Even if we've only explored moderates settings of up to 480 experience samplers, you might want to consider some additional changes (like the [choice for the optimizer](https://arxiv.org/abs/2103.07013)) if you plan to run at larger scale. ================================================ FILE: docs/tutorials/gym-mujoco-tutorial.md ================================================ # Tutorial: OpenAI gym MuJoCo environment. **Note** The provided commands to execute in this tutorial assume you have [installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the `gym_plugin`. The latter can be installed by ```bash pip install -r allenact_plugins/gym_plugin/extra_requirements.txt ``` The environments for this tutorial use [MuJoCo](http://www.mujoco.org/)(**Mu**lti-**Jo**int dynamics in **Co**ntact) physics simulator, which is also required to be installed properly with instructions [here](https://github.com/openai/mujoco-py). ## The task For this tutorial, we'll focus on one of the continuous-control environments under the `mujoco` group of `gym` environments: [Ant-v2](https://gym.openai.com/envs/Ant-v2/). In this task, the goal is to make a four-legged creature, "ant", walk forward as fast as possible. A random agent of "Ant-v2" is shown below. ![The Ant-v2 task](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_random.gif). To achieve the goal, we need to provide continuous control for the agent moving forward with four legs with the `x` velocity as high as possible for at most 1000 episodes steps. The agent is failed, or done, if the `z` position is out of the range [0.2, 1.0]. The dimension of the action space is 8 and 111 for the dimension of the observation space that maps to different body parts, including 3D position `(x,y,z)`, orientation(quaternion `x`,`y`,`z`,`w`) of the torso, and the joint angles, 3D velocity `(x,y,z)`, 3D angular velocity `(x,y,z)`, and joint velocities. The rewards for the agent "ant" are composed of the forward rewards, healthy rewards, control cost, and contact cost. ## Implementation For this tutorial, we'll use the readily available `gym_plugin`, which includes a [wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a [task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and [task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a [sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym` environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). The experiment config, similar to the one used for the [Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows: ```python from typing import Dict, Optional, List, Any, cast import gym import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.ppo import PPO from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from allenact.utils.viz_utils import VizSuite, AgentViewViz class HandManipulateTutorialExperimentConfig(ExperimentConfig): @classmethod def tag(cls) -> str: return "GymMuJoCoTutorial" ``` ### Sensors and Model As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide full observations from the state of the `gym` environment to our model. ```python SENSORS = [ GymMuJoCoSensor("Ant-v2", uuid="gym_mujoco_data"), ] ``` We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a [Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions. ```python @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=gym.spaces.Box( -3.0, 3.0, (8,), "float32" ), # 8 actors, each in the range [-3.0, 3.0] observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) ``` ### Task samplers We use an available `TaskSampler` implementation for `gym` environments that allows to sample [GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask): [GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created above, which contain a custom identifier for the actual observation space (`gym_mujoco_data`) also used by the model. ```python @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="Ant-v2", **kwargs) ``` For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three modes, `train, valid, test`: ```python def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="train", seeds=seeds ) def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="valid", seeds=seeds ) def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds) ``` Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while, during testing (or validation), we sample a fixed number of tasks. ```python def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["Ant-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) ``` Note that we just sample 4 tasks for validation and testing in this case, which suffice to illustrate the model's success. ### Machine parameters In this tutorial, we just train the model on the CPU. We allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode. ```python @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: visualizer = None if mode == "test": visualizer = VizSuite( mode=mode, video_viz=AgentViewViz( label="episode_vid", max_clip_length=400, vector_task_source=("render", {"mode": "rgb_array"}), fps=30, ), ) return { "nprocesses": 8 if mode == "train" else 1, # rollout "devices": [], "visualizer": visualizer, } ``` ### Training pipeline The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate and 10 single-batch update repeats per rollout. The reward should exceed 4,000 in 20M steps in the test. In order to make the "ant" run with an obvious fast speed, we train the agents using PPO with 3e7 steps. ```python @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: lr = 3e-4 ppo_steps = int(3e7) clip_param = 0.2 value_loss_coef = 0.5 entropy_coef = 0.0 num_mini_batch = 4 # optimal 64 update_repeats = 10 max_grad_norm = 0.5 num_steps = 2048 gamma = 0.99 use_gae = True gae_lambda = 0.95 advance_scene_rollout_period = None save_interval = 200000 metric_accumulate_interval = 50000 return TrainingPipeline( named_losses=dict( ppo_loss=PPO( clip_param=clip_param, value_loss_coef=value_loss_coef, entropy_coef=entropy_coef, ), ), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps), ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=advance_scene_rollout_period, save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)}, ), ) ``` ## Training and validation We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_mujoco_tutorial.py`. To start training from scratch, we just need to invoke ```bash PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_mujoco_output -s 0 -e ``` from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the [Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters. If we have Tensorboard installed, we can track progress with ```bash tensorboard --logdir /PATH/TO/gym_mujoco_output ``` which will default to the URL [http://localhost:6006/](http://localhost:6006/). After 30,000,000 steps, the script will terminate. If everything went well, the `valid` success rate should be 1 and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a little below 1,000. ## Testing The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the subfolders in the path to the checkpoints, saved under the output folder. In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option: ```bash PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/gym_mujoco_output \ -s 0 \ -e \ --eval \ --checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE ``` If everything went well, the `test` success rate should converge to 1, the `test` success rate should be 1 and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a little below 1,000. The `gif` results can be seen in the image tab of Tensorboard while testing. The output should be something like this: ![results](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.png). And the `gif` results can be seen in the image tab of Tensorboard while testing. ![mp4 demo](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.gif) If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display available: ```bash DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/gym_mujoco_output \ -s 0 \ -e \ --eval \ --checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE ``` ================================================ FILE: docs/tutorials/gym-tutorial.md ================================================ # Tutorial: OpenAI gym for continuous control. **Note** The provided commands to execute in this tutorial assume you have [installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the `gym_plugin`. The latter can be installed by ```bash pip install -r allenact_plugins/gym_plugin/extra_requirements.txt ``` In this tutorial, we: 1. Introduce the `gym_plugin`, which enables some of the tasks in [OpenAI's gym](https://gym.openai.com/) for training and inference within AllenAct. 1. Show an example of continuous control with an arbitrary action space covering 2 policies for one of the `gym` tasks. ## The task For this tutorial, we'll focus on one of the continuous-control environments under the `Box2D` group of `gym` environments: [LunarLanderContinuous-v2](https://gym.openai.com/envs/LunarLanderContinuous-v2/). In this task, the goal is to smoothly land a lunar module in a landing pad, as shown below. ![The LunarLanderContinuous-v2 task](../img/lunar_lander_continuous_demo.png). To achieve this goal, we need to provide continuous control for a main engine and directional one (2 real values). In order to solve the task, the expected reward is of at least 200 points. The controls for main and directional engines are both in the range [-1.0, 1.0] and the observation space is composed of 8 scalars indicating `x` and `y` positions, `x` and `y` velocities, lander angle and angular velocity, and left and right ground contact. Note that these 8 scalars provide a full observation of the state. ## Implementation For this tutorial, we'll use the readily available `gym_plugin`, which includes a [wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a [task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and [task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a [sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym` environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). The experiment config, similar to the one used for the [Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows: ```python from typing import Dict, Optional, List, Any, cast import gym import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.ppo import PPO from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from allenact.utils.viz_utils import VizSuite, AgentViewViz class GymTutorialExperimentConfig(ExperimentConfig): @classmethod def tag(cls) -> str: return "GymTutorial" ``` ### Sensors and Model As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide full observations from the state of the `gym` environment to our model. ```python SENSORS = [ GymBox2DSensor("LunarLanderContinuous-v2", uuid="gym_box_data"), ] ``` We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a [Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions. ```python @classmethod def create_model(cls, **kwargs) -> nn.Module: return MemorylessActorCritic( input_uuid="gym_box_data", action_space=gym.spaces.Box( -1.0, 1.0, (2,) ), # 2 actors, each in the range [-1.0, 1.0] observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) ``` ### Task samplers We use an available `TaskSampler` implementation for `gym` environments that allows to sample [GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask): [GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created above, which contain a custom identifier for the actual observation space (`gym_box_data`) also used by the model. ```python @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(**kwargs) ``` For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three modes, `train, valid, test`: ```python def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="train", seeds=seeds ) def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="valid", seeds=seeds ) def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds) ``` Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while, during testing (or validation), we sample a fixed number of tasks. ```python def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 3 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["LunarLanderContinuous-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) ``` Note that we just sample 3 tasks for validation and testing in this case, which suffice to illustrate the model's success. ### Machine parameters Given the simplicity of the task and model, we can just train the model on the CPU. During training, success should reach 100% in less than 10 minutes, whereas solving the task (evaluation reward > 200) might take about 20 minutes (on a laptop CPU). We allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode. ```python @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: visualizer = None if mode == "test": visualizer = VizSuite( mode=mode, video_viz=AgentViewViz( label="episode_vid", max_clip_length=400, vector_task_source=("render", {"mode": "rgb_array"}), fps=30, ), ) return { "nprocesses": 8 if mode == "train" else 1, "devices": [], "visualizer": visualizer, } ``` ### Training pipeline The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate and 80 single-batch update repeats per rollout: ```python @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(1.2e6) return TrainingPipeline( named_losses=dict( ppo_loss=PPO(clip_param=0.2, value_loss_coef=0.5, entropy_coef=0.0,), ), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps), ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-3)), num_mini_batch=1, update_repeats=80, max_grad_norm=100, num_steps=2000, gamma=0.99, use_gae=False, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=200000, metric_accumulate_interval=50000, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}, # type:ignore ), ) ``` ## Training and validation We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_tutorial.py`. To start training from scratch, we just need to invoke ```bash PYTHONPATH=. python allenact/main.py gym_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_output -s 54321 -e ``` from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the [Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters. If we have Tensorboard installed, we can track progress with ```bash tensorboard --logdir /PATH/TO/gym_output ``` which will default to the URL [http://localhost:6006/](http://localhost:6006/). After 1,200,000 steps, the script will terminate. If everything went well, the `valid` success rate should quickly converge to 1 and the mean reward to above 250, while the average episode length should stay below or near 300. ## Testing The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the subfolders in the path to the checkpoints, saved under the output folder. In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option: ```bash PYTHONPATH=. python allenact/main.py gym_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/gym_output \ -s 54321 \ -e \ --eval \ --checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \ --approx_ckpt_step_interval 800000 # Skip some checkpoints ``` The option `--approx_ckpt_step_interval 800000` tells AllenAct that we only want to evaluate checkpoints which were saved every ~800000 steps, this lets us avoid evaluating every saved checkpoint. If everything went well, the `test` success rate should converge to 1, the episode length below or near 300 steps, and the mean reward to above 250. The images tab in tensorboard will contain videos for the sampled test episodes. ![video_results](../img/lunar_lander_continuous_test.png). If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display available: ```bash DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/gym_output \ -s 54321 \ -e \ --eval \ --checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \ --approx_ckpt_step_interval 800000 ``` ================================================ FILE: docs/tutorials/index.md ================================================ # AllenAct Tutorials **Note** The provided commands to execute these tutorials assume you have [installed the full library](../installation/installation-allenact.md#full-library) and the specific requirements for each used plugin. We provide several tutorials to help ramp up researchers to the field of Embodied-AI as well as to the AllenAct framework. ## [Navigation in MiniGrid](../tutorials/minigrid-tutorial.md) ![MiniGridEmptyRandom5x5 task example](../img/minigrid_environment.png) We train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the [MiniGrid](https://github.com/maximecb/gym-minigrid) environment. This tutorial presents: * Writing an experiment configuration file with a simple training pipeline from scratch. * Using one of the supported environments with minimal user effort. * Training, validation and testing your experiment from the command line. [Follow the tutorial here.](../tutorials/minigrid-tutorial.md) ## [PointNav in RoboTHOR](../tutorials/training-a-pointnav-model.md) ![RoboTHOR Robot](../img/RoboTHOR_robot.jpg) We train an agent on the Point Navigation task within the RoboTHOR Embodied-AI environment. This tutorial presents: * The basics of the Point Navigation task, a common task in Embodied AI * Using an external dataset * Writing an experiment configuration file with a simple training pipeline from scratch. * Use one of the supported environments with minimal user effort. * Train, validate and test your experiment from the command line. * Testing a pre-trained model [Follow the tutorial here.](../tutorials/training-a-pointnav-model.md) ## [Swapping in a new environment](../tutorials/transfering-to-a-different-environment-framework.md) ![Environment Transfer](../img/env_transfer.jpg) This tutorial demonstrates how easy it is modify the experiment config created in the RoboTHOR PointNav tutorial to work with the iTHOR and Habitat environments. [Follow the tutorial here.](../tutorials/transfering-to-a-different-environment-framework.md) ## [Using a pretrained model](../tutorials/running-inference-on-a-pretrained-model.md) ![Pretrained inference](../img/viz_pretrained_2videos.jpg) This tutorial shows how to run inference on one or more checkpoints of a pretrained model and generate visualizations of different types. [Follow the tutorial here.](../tutorials/running-inference-on-a-pretrained-model.md) ## [Off-policy training](../tutorials/offpolicy-tutorial.md) This tutorial shows how to train an Actor using an off-policy dataset with expert actions. [Follow the tutorial here.](../tutorials/offpolicy-tutorial.md) ## [OpenAI gym for continuous control](../tutorials/gym-tutorial.md) ![gym task example](../img/lunar_lander_continuous_demo.png) We train an agent to complete the `LunarLanderContinuous-v2` task from [OpenAI gym](https://gym.openai.com/envs/LunarLanderContinuous-v2). This tutorial presents: * A `gym` plugin fopr `AllenAct`. * A continuous control example with multiple actors using PPO. [Follow the tutorial here.](../tutorials/gym-tutorial.md) ## [Multi-node training for RoboTHOR ObjectNav](../tutorials/distributed-objectnav-tutorial.md) ![training speedup](../img/multinode_training.jpg) We train an agent to navigate to an object in a fraction of the time required for training in one node by distributing training across multiple nodes. This tutorial presents: 1. The AllenAct API for training across multiple nodes, as well as experimental scripts for distributed configuration, training start and termination, and remote command execution. 2. The introduction of the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenaAct`. [Follow the tutorial here.](../tutorials/distributed-objectnav-tutorial.md) ================================================ FILE: docs/tutorials/minigrid-tutorial.md ================================================ # Tutorial: Navigation in MiniGrid. In this tutorial, we will train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the [MiniGrid](https://github.com/maximecb/gym-minigrid) environment. We will demonstrate how to: * Write an experiment configuration file with a simple training pipeline from scratch. * Use one of the supported environments with minimal user effort. * Train, validate and test your experiment from the command line. This tutorial assumes the [installation instructions](../installation/installation-allenact.md) have already been followed and that, to some extent, this framework's [abstractions](../getting_started/abstractions.md) are known. The `extra_requirements` for `minigrid_plugin` and `babyai_plugin` can be installed with. ```bash pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt; pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt ``` ## The task A `MiniGrid-Empty-Random-5x5-v0` task consists of a grid of dimensions 5x5 where an agent spawned at a random location and orientation has to navigate to the visitable bottom right corner cell of the grid by sequences of three possible actions (rotate left/right and move forward). A visualization of the environment with expert steps in a random `MiniGrid-Empty-Random-5x5-v0` task looks like ![MiniGridEmptyRandom5x5 task example](../img/minigrid_environment.png) The observation for the agent is a subset of the entire grid, simulating a simplified limited field of view, as depicted by the highlighted rectangle (observed subset of the grid) around the agent (red arrow). Gray cells correspond to walls. ## Experiment configuration file Our complete experiment consists of: * Training a basic actor-critic agent with memory to solve randomly sampled navigation tasks. * Validation on a fixed set of tasks (running in parallel with training). * A second stage where we test saved checkpoints with a larger fixed set of tasks. The entire configuration for the experiment, including training, validation, and testing, is encapsulated in a single class implementing the `ExperimentConfig` abstraction. For this tutorial, we will follow the config under `projects/tutorials/minigrid_tutorial.py`. The `ExperimentConfig` abstraction is used by the [OnPolicyTrainer](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicytrainer) class (for training) and the [OnPolicyInference](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicyinference) class (for validation and testing) invoked through the entry script `main.py` that calls an orchestrating [OnPolicyRunner](../api/allenact/algorithms/onpolicy_sync/runner.md#onpolicyrunner) class. It includes: * A `tag` method to identify the experiment. * A `create_model` method to instantiate actor-critic models. * A `make_sampler_fn` method to instantiate task samplers. * Three `{train,valid,test}_task_sampler_args` methods describing initialization parameters for task samplers used in training, validation, and testing; including assignment of workers to devices for simulation. * A `machine_params` method with configuration parameters that will be used for training, validation, and testing. * A `training_pipeline` method describing a possibly multi-staged training pipeline with different types of losses, an optimizer, and other parameters like learning rates, batch sizes, etc. ### Preliminaries We first import everything we'll need to define our experiment. ```python from typing import Dict, Optional, List, Any, cast import gym from gym_minigrid.envs import EmptyRandomEnv5x5 import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvRNN from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor from allenact_plugins.minigrid_plugin.minigrid_tasks import ( MiniGridTaskSampler, MiniGridTask, ) ``` We now create the `MiniGridTutorialExperimentConfig` class which we will use to define our experiment. For pedagogical reasons, we will add methods to this class one at a time below with a description of what these classes do. ```python class MiniGridTutorialExperimentConfig(ExperimentConfig): ``` An experiment is identified by a `tag`. ```python @classmethod def tag(cls) -> str: return "MiniGridTutorial" ``` ### Sensors and Model A readily available Sensor type for MiniGrid, [EgocentricMiniGridSensor](../api/allenact_plugins/minigrid_plugin/minigrid_sensors.md#egocentricminigridsensor), allows us to extract observations in a format consumable by an `ActorCriticModel` agent: ```python SENSORS = [ EgocentricMiniGridSensor(agent_view_size=5, view_channels=3), ] ``` The three `view_channels` include objects, colors and states corresponding to a partial observation of the environment as an image tensor, equivalent to that from `ImgObsWrapper` in [MiniGrid](https://github.com/maximecb/gym-minigrid#wrappers). The relatively large `agent_view_size` means the view will only be clipped by the environment walls in the forward and lateral directions with respect to the agent's orientation. We define our `ActorCriticModel` agent using a lightweight implementation with recurrent memory for MiniGrid environments, [MiniGridSimpleConvRNN](../api/allenact_plugins/minigrid_plugin/minigrid_models.md#minigridsimpleconvrnn): ```python @classmethod def create_model(cls, **kwargs) -> nn.Module: return MiniGridSimpleConvRNN( action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())), observation_space=SensorSuite(cls.SENSORS).observation_spaces, num_objects=cls.SENSORS[0].num_objects, num_colors=cls.SENSORS[0].num_colors, num_states=cls.SENSORS[0].num_states, ) ``` ### Task samplers We use an available TaskSampler implementation for MiniGrid environments that allows to sample both random and deterministic `MiniGridTasks`, [MiniGridTaskSampler](../api/allenact_plugins/minigrid_plugin/minigrid_tasks.md#minigridtasksampler): ```python @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return MiniGridTaskSampler(**kwargs) ``` This task sampler will during training (or validation/testing), randomly initialize new tasks for the agent to complete. While it is not quite as important for this task type (as we test our agent in the same setting it is trained on) there are a lot of good reasons we would like to sample tasks differently during training than during validation or testing. One good reason, that is applicable in this tutorial, is that, during training, we would like to be able to sample tasks forever while, during testing, we would like to sample a fixed number of tasks (as otherwise we would never finish testing!). In `allenact` this is made possible by defining different arguments for the task sampler: ```python def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="train") def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="valid") def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test") ``` where, for convenience, we have defined a `_get_sampler_args` method: ```python def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 20 + 20 * (mode == "test") # 20 tasks for valid, 40 for test # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( max_tasks=max_tasks, # see above env_class=self.make_env, # builder for third-party environment (defined below) sensors=self.SENSORS, # sensors used to return observations to the agent env_info=dict(), # parameters for environment builder (none for now) task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above ) @staticmethod def make_env(*args, **kwargs): return EmptyRandomEnv5x5() ``` Note that the `env_class` argument to the Task Sampler is the one determining which task type we are going to train the model for (in this case, `MiniGrid-Empty-Random-5x5-v0` from [gym-minigrid](https://github.com/maximecb/gym-minigrid#empty-environment)) . The sparse reward is [given by the environment](https://github.com/maximecb/gym-minigrid/blob/6e22a44dc67414b647063692258a4f95ce789161/gym_minigrid/minigrid.py#L819) , and the maximum task length is 100. For training, we opt for a default random sampling, whereas for validation and test we define fixed sets of randomly sampled tasks without needing to explicitly define a dataset. In this toy example, the maximum number of different tasks is 32. For validation we sample 320 tasks using 16 samplers, or 640 for testing, so we can be fairly sure that all possible tasks are visited at least once during evaluation. ### Machine parameters Given the simplicity of the task and model, we can quickly train the model on the CPU: ```python @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: return { "nprocesses": 128 if mode == "train" else 16, "devices": [], } ``` We allocate a larger number of samplers for training (128) than for validation or testing (16), and we default to CPU usage by returning an empty list of `devices`. ### Training pipeline The last definition required before starting to train is a training pipeline. In this case, we just use a single PPO stage with linearly decaying learning rate: ```python @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(150000) return TrainingPipeline( named_losses=dict(ppo_loss=PPO(**PPOConfig)), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)), num_mini_batch=4, update_repeats=3, max_grad_norm=0.5, num_steps=16, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=10000, metric_accumulate_interval=1, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore ), ) ``` You can see that we use a `Builder` class to postpone the construction of some of the elements, like the optimizer, for which the model weights need to be known. ## Training and validation We have a complete implementation of this experiment's configuration class in `projects/tutorials/minigrid_tutorial.py`. To start training from scratch, we just need to invoke ```bash PYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o /PATH/TO/minigrid_output -s 12345 ``` from the `allenact` root directory. * With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file will be found in the `projects/tutorials` directory. * With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers). * With `-o minigrid_output` we set the output folder into which results and logs will be saved. * With `-s 12345` we set the random seed. If we have Tensorboard installed, we can track progress with ```bash tensorboard --logdir /PATH/TO/minigrid_output ``` which will default to the URL [http://localhost:6006/](http://localhost:6006/). After 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder. The training curves should look similar to: ![training curves](../img/minigrid_train.png) If everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4. (For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the not-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example with a different random seed). The validation curves should look similar to: ![validation curves](../img/minigrid_valid.png) ## Testing The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the subfolders in the path to the checkpoints, saved under the output folder. In order to evaluate (i.e. test) a particular checkpoint, we need to pass the `--eval` flag and specify the checkpoint with the `--checkpoint CHECKPOINT_PATH` option: ```bash PYTHONPATH=. python allenact/main.py minigrid_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/minigrid_output \ -s 12345 \ --eval \ --checkpoint /PATH/TO/minigrid_output/checkpoints/MiniGridTutorial/YOUR_START_DATE/exp_MiniGridTutorial__stage_00__steps_000000151552.pt ``` Again, if everything went well, the `test` success rate should converge to 1 and the mean episode length to a value below 4. Detailed results are saved under a `metrics` subfolder in the output folder. The test curves should look similar to: ![test curves](../img/minigrid_test.png) ================================================ FILE: docs/tutorials/offpolicy-tutorial.md ================================================ # Tutorial: Off-policy training. **Note** The provided commands to execute in this tutorial assume you have [installed the full library](../installation/installation-allenact.md#full-library) and the `extra_requirements` for the `babyai_plugin` and `minigrid_plugin`. The latter can be installed with: ```bash pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt; pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt ``` In this tutorial we'll learn how to train an agent from an external dataset by imitating expert actions via Behavior Cloning. We'll use a [BabyAI agent](/api/allenact_plugins/babyai_plugin/babyai_models#BabyAIRecurrentACModel) to solve `GoToLocal` tasks on [MiniGrid](https://github.com/maximecb/gym-minigrid); see the `projects/babyai_baselines/experiments/go_to_local` directory for more details. This tutorial assumes `AllenAct`'s [abstractions](../getting_started/abstractions.md) are known. ## The task In a `GoToLocal` task, the agent immersed in a grid world has to navigate to a specific object in the presence of multiple distractors, requiring the agent to understand `go to` instructions like "go to the red ball". For further details, please consult the [original paper](https://arxiv.org/abs/1810.08272). ## Getting the dataset We will use a large dataset (**more than 4 GB**) including expert demonstrations for `GoToLocal` tasks. To download the data we'll run ```bash PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py GoToLocal ``` from the project's root directory, which will download `BabyAI-GoToLocal-v0.pkl` and `BabyAI-GoToLocal-v0_valid.pkl` to the `allenact_plugins/babyai_plugin/data/demos` directory. We will also generate small versions of the datasets, which will be useful if running on CPU, by calling ```bash PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py ``` from the project's root directory, which will generate `BabyAI-GoToLocal-v0-small.pkl` under the same `allenact_plugins/babyai_plugin/data/demos` directory. ## Data storage In order to train with an off-policy dataset, we need to define an `ExperienceStorage`. In AllenAct, an `ExperienceStorage` object has two primary functions: 1. It stores/manages relevant data (e.g. similarly to the `Dataset` class in PyTorch). 2. It loads stored data into batches that will be used for loss computation (e.g. similarly to the `Dataloader` class in PyTorch). Unlike a PyTorch `Dataset` however, an `ExperienceStorage` object can build its dataset **at runtime** by processing rollouts from the agent. This flexibility allows for us to, for exmaple, implement the experience replay datastructure used in deep Q-learning. For this tutorial we won't need this additional functionality as our off-policy dataset is a fixed collection of expert trajectories. An example of a `ExperienceStorage` for BabyAI expert demos might look as follows: ```python class MiniGridExpertTrajectoryStorage(ExperienceStorage, StreamingStorageMixin): def __init__( self, data_path: str, num_samplers: int, rollout_len: int, instr_len: Optional[int], restrict_max_steps_in_dataset: Optional[int] = None, device: torch.device = torch.device("cpu"), ): ... def data(self) -> List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]: ... def set_partition(self, index: int, num_parts: int): ... def initialize(self, *, observations: ObservationType, **kwargs): ... def add( self, observations: ObservationType, memory: Optional[Memory], actions: torch.Tensor, action_log_probs: torch.Tensor, value_preds: torch.Tensor, rewards: torch.Tensor, masks: torch.Tensor, ): ... def to(self, device: torch.device): ... def total_experiences(self) -> int: ... def reset_stream(self): ... def empty(self) -> bool: ... def _get_next_ind(self): ... def _fill_rollout_queue(self, q: queue.Queue, sampler: int): ... def get_data_for_rollout_ind(self, sampler_ind: int) -> Dict[str, np.ndarray]: ... def next_batch(self) -> Dict[str, torch.Tensor]: ... ``` A complete example can be found in [MiniGridExpertTrajectoryStorage](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridExpertTrajectoryStorage). ## Loss function Off-policy losses must implement the [`GenericAbstractLoss`](/api/allenact/base_abstractions/misc/#genericabstractloss) interface. In this case, we minimize the cross-entropy between the actor's policy and the expert action: ```python class MiniGridOffPolicyExpertCELoss(GenericAbstractLoss): def __init__(self, total_episodes_in_epoch: Optional[int] = None): super().__init__() self.total_episodes_in_epoch = total_episodes_in_epoch def loss( # type: ignore self, *, # No positional arguments model: ModelType, batch: ObservationType, batch_memory: Memory, stream_memory: Memory, ) -> LossOutput: rollout_len, nrollouts = cast(torch.Tensor, batch["minigrid_ego_image"]).shape[ :2 ] # Initialize Memory if empty if len(stream_memory) == 0: spec = model.recurrent_memory_specification for key in spec: dims_template, dtype = spec[key] # get sampler_dim and all_dims from dims_template (and nrollouts) dim_names = [d[0] for d in dims_template] sampler_dim = dim_names.index("sampler") all_dims = [d[1] for d in dims_template] all_dims[sampler_dim] = nrollouts stream_memory.check_append( key=key, tensor=torch.zeros( *all_dims, dtype=dtype, device=cast(torch.Tensor, batch["minigrid_ego_image"]).device, ), sampler_dim=sampler_dim, ) # Forward data (through the actor and critic) ac_out, stream_memory = model.forward( observations=batch, memory=stream_memory, prev_actions=None, # type:ignore masks=cast(torch.FloatTensor, batch["masks"]), ) # Compute the loss from the actor's output and expert action expert_ce_loss = -ac_out.distributions.log_prob(batch["expert_action"]).mean() info = {"expert_ce": expert_ce_loss.item()} return LossOutput( value=expert_ce_loss, info=info, per_epoch_info={}, batch_memory=batch_memory, stream_memory=stream_memory, bsize=rollout_len * nrollouts, ) ``` A complete example can be found in [MiniGridOffPolicyExpertCELoss](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridOffPolicyExpertCELoss). Note that in this case we train the entire actor, but it would also be possible to forward data through a different subgraph of the ActorCriticModel. ## Experiment configuration For the experiment configuration, we'll build on top of an existing [base BabyAI GoToLocal Experiment Config](/api/projects/babyai_baselines/experiments/go_to_local/base/#basebabyaigotolocalexperimentconfig). The complete `ExperimentConfig` file for off-policy training is [here](/api/projects/tutorials/minigrid_offpolicy_tutorial/#bcoffpolicybabyaigotolocalexperimentconfig), but let's focus on the most relevant aspect to enable this type of training: providing an [OffPolicyPipelineComponent](/api/allenact/utils/experiment_utils/#offpolicypipelinecomponent) object as input to a `PipelineStage` when instantiating the `TrainingPipeline` in the `training_pipeline` method. ```python class BCOffPolicyBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig): """BC Off-policy imitation.""" DATASET: Optional[List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]] = None GPU_ID = 0 if torch.cuda.is_available() else None @classmethod def tag(cls): return "BabyAIGoToLocalBCOffPolicy" @classmethod def METRIC_ACCUMULATE_INTERVAL(cls): # See BaseBabyAIGoToLocalExperimentConfig for how this is used. return 1 @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) num_mini_batch = ppo_info["num_mini_batch"] update_repeats = ppo_info["update_repeats"] # fmt: off return cls._training_pipeline( named_losses={ "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss( total_episodes_in_epoch=int(1e6) ), }, named_storages={ "onpolicy": RolloutBlockStorage(), "minigrid_offpolicy_expert": MiniGridExpertTrajectoryStorage( data_path=os.path.join( BABYAI_EXPERT_TRAJECTORIES_DIR, "BabyAI-GoToLocal-v0{}.pkl".format( "" if torch.cuda.is_available() else "-small" ), ), num_samplers=cls.NUM_TRAIN_SAMPLERS, rollout_len=cls.ROLLOUT_STEPS, instr_len=cls.INSTR_LEN, ), }, pipeline_stages=[ # Single stage, only with off-policy training PipelineStage( loss_names=["offpolicy_expert_ce_loss"], # no on-policy losses max_stage_steps=total_train_steps, # keep sampling episodes in the stage stage_components=[ StageComponent( uuid="offpolicy", storage_uuid="minigrid_offpolicy_expert", loss_names=["offpolicy_expert_ce_loss"], training_settings=TrainingSettings( update_repeats=num_mini_batch * update_repeats, num_mini_batch=1, ) ) ], ), ], # As we don't have any on-policy losses, we set the next # two values to zero to ensure we don't attempt to # compute gradients for on-policy rollouts: num_mini_batch=0, update_repeats=0, total_train_steps=total_train_steps, ) # fmt: on ``` You'll have noted that it is possible to combine on-policy and off-policy training in the same stage, even though here we apply pure off-policy training. ## Training We recommend using a machine with a CUDA-capable GPU for this experiment. In order to start training, we just need to invoke ```bash PYTHONPATH=. python allenact/main.py -b projects/tutorials minigrid_offpolicy_tutorial -m 8 -o ``` Note that with the `-m 8` option we limit to 8 the number of on-policy task sampling processes used between off-policy updates. If everything goes well, the training success should quickly reach values around 0.7-0.8 on GPU and converge to values close to 1 if given sufficient time to train. If running tensorboard, you'll notice a separate group of scalars named `train-offpolicy-losses` and `train-offpolicy-misc` with losses, approximate "experiences per second" (i.e. the number of off-policy experiences/steps being used to update the model per second), and other tracked values in addition to the standard `train-onpolicy-*` used for on-policy training. In the `train-metrics` and `train-misc` sections you'll find the metrics quantifying the performance of the agent throughout training and some other plots showing training details. *Note that the x-axis for these plots is different than for the `train-offpolicy-*` sections*. This is because these plots use the number of rollout steps as the x-axis (i.e. steps that the trained agent takes interactively) while the `train-offpolicy-*` plots uses the number of offpolicy "experiences" that have been shown to the agent. A view of the training progress about 5 hours after starting on a CUDA-capable GPU should look similar to the below (note that training reached >99% success after about 50 minutes). ![off-policy progress](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/minigrid-offpolicy/minigrid-offpolicy-tutorial-tb.png) ================================================ FILE: docs/tutorials/running-inference-on-a-pretrained-model.md ================================================ # Tutorial: Inference with a pre-trained model. In this tutorial we will run inference on a pre-trained model for the PointNav task in the RoboTHOR environment. In this task the agent is tasked with going to a specific location within a realistic 3D environment. For information on how to train a PointNav Model see [this tutorial](training-a-pointnav-model.md) We will need to [install the full AllenAct library](../installation/installation-allenact.md#full-library), the `robothor_plugin` requirements via ```bash pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt ``` and [download the RoboTHOR Pointnav dataset](../installation/download-datasets.md) before we get started. For this tutorial we will download the weights of a model trained on the debug dataset. This can be done with a handy script in the `pretrained_model_ckpts` directory: ```bash bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-pointnav-rgb-resnet ``` This will download the weights for an RGB model that has been trained on the PointNav task in RoboTHOR to `pretrained_model_ckpts/robothor-pointnav-rgb-resnet` Next we need to run the inference, using the PointNav experiment config from the [tutorial on making a PointNav experiment](training-a-pointnav-model.md). We can do this with the following command: ```bash PYTHONPATH=. python allenact/main.py -o -b -c --eval ``` Where `` is the location where the results of the test will be dumped, `` is the location of the downloaded model weights, and `` is a path to the directory where our experiment definition is stored. For our current setup the following command would work: ```bash PYTHONPATH=. python allenact/main.py \ training_a_pointnav_model \ -o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \ -b projects/tutorials \ -c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \ --eval ``` For testing on all saved checkpoints we pass a directory to `--checkpoint` rather than just a single file: ```bash PYTHONPATH=. python allenact/main.py \ training_a_pointnav_model \ -o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \ -b projects/tutorials \ -c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30 --eval ``` ## Visualization We also show examples of visualizations that can be extracted from the `"valid"` and `"test"` modes. Currently, visualization is still undergoing design changes and does not support multi-agent tasks, but the available functionality is sufficient for pointnav in RoboThor. Following up on the example above, we can make a specialized pontnav `ExperimentConfig` where we instantiate the base visualization class, `VizSuite`, defined in [`allenact.utils.viz_utils`](https://github.com/allenai/allenact/tree/master/allenact/utils/viz_utils.py), when in `test` mode. Each visualization type can be thought of as a plugin to the base `VizSuite`. For example, all `episode_ids` passed to `VizSuite` will be processed with each of the instantiated visualization types (possibly with the exception of the `AgentViewViz`). In the example below we show how to instantiate different visualization types from 4 different data sources. The data sources available to `VizSuite` are: * Task output (e.g. 2D trajectories) * Vector task (e.g. egocentric views) * Rollout storage (e.g. recurrent memory, taken action logprobs...) * `ActorCriticOutput` (e.g. action probabilities) The visualization types included below are: * `TrajectoryViz`: Generic 2D trajectory view. * `AgentViewViz`: RGB egocentric view. * `ActorViz`: Action probabilities from `ActorCriticOutput[CategoricalDistr]`. * `TensorViz1D`: Evolution of a point from RolloutStorage over time. * `TensorViz2D`: Evolution of a vector from RolloutStorage over time. * `ThorViz`: Specialized 2D trajectory view [for RoboThor](https://github.com/allenai/allenact/tree/master/allenact_plugins/robothor_plugin/robothor_viz.py). Note that we need to explicitly set the `episode_ids` that we wish to visualize. For `AgentViewViz` we have the option of using a different (typically shorter) list of episodes or enforce the ones used for the rest of visualizations. ```python class PointNavRoboThorRGBPPOVizExperimentConfig(PointNavRoboThorRGBPPOExperimentConfig): """ExperimentConfig used to demonstrate how to set up visualization code. # Attributes viz_ep_ids : Scene names that will be visualized. viz_video_ids : Scene names that will have videos visualizations associated with them. """ viz_ep_ids = [ "FloorPlan_Train1_1_3", "FloorPlan_Train1_1_4", "FloorPlan_Train1_1_5", "FloorPlan_Train1_1_6", ] viz_video_ids = [["FloorPlan_Train1_1_3"], ["FloorPlan_Train1_1_4"]] viz: Optional[VizSuite] = None def get_viz(self, mode): if self.viz is not None: return self.viz self.viz = VizSuite( episode_ids=self.viz_ep_ids, mode=mode, # Basic 2D trajectory visualizer (task output source): base_trajectory=TrajectoryViz( path_to_target_location=("task_info", "target",), ), # Egocentric view visualizer (vector task source): egeocentric=AgentViewViz( max_video_length=100, episode_ids=self.viz_video_ids ), # Default action probability visualizer (actor critic output source): action_probs=ActorViz(figsize=(3.25, 10), fontsize=18), # Default taken action logprob visualizer (rollout storage source): taken_action_logprobs=TensorViz1D(), # Same episode mask visualizer (rollout storage source): episode_mask=TensorViz1D(rollout_source=("masks",)), # Default recurrent memory visualizer (rollout storage source): rnn_memory=TensorViz2D(rollout_source=("memory", "single_belief")), # Specialized 2D trajectory visualizer (task output source): thor_trajectory=ThorViz( figsize=(16, 8), viz_rows_cols=(448, 448), scenes=("FloorPlan_Train{}_{}", 1, 1, 1, 1), ), ) return self.viz def machine_params(self, mode="train", **kwargs): res = super().machine_params(mode, **kwargs) if mode == "test": res.set_visualizer(self.get_viz(mode)) return res ``` Running test on the same downloaded models, but using the visualization-enabled `ExperimentConfig` with ```bash PYTHONPATH=. python allenact/main.py \ running_inference_tutorial \ -o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \ -b projects/tutorials \ -c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \ --eval ``` generates different types of visualization and logs them in tensorboard. If everything is properly setup and tensorboard includes the `robothor-pointnav-rgb-resnet` folder, under the `IMAGES` tab, we should see something similar to ![Visualization example](../img/viz_pretrained_2videos.jpg) ================================================ FILE: docs/tutorials/training-a-pointnav-model.md ================================================ # Tutorial: PointNav in RoboTHOR. ![RoboTHOR Robot](../img/RoboTHOR_robot.jpg) ## Introduction One of the most obvious tasks that an embodied agent should master is navigating the world it inhabits. Before we can teach a robot to cook or clean it first needs to be able to move around. The simplest way to formulate "moving around" into a task is by making your agent find a beacon somewhere in the environment. This beacon transmits its location, such that at any time, the agent can get the direction and euclidian distance to the beacon. This particular task is often called Point Navigation, or **PointNav** for short. #### PointNav At first glance, this task seems trivial. If the agent is given the direction and distance of the target at all times, can it not simply follow this signal directly? The answer is no, because agents are often trained on this task in environments that emulate real-world buildings which are not wide-open spaces, but rather contain many smaller rooms. Because of this, the agent has to learn to navigate human spaces and use doors and hallways to efficiently navigate from one side of the building to the other. This task becomes particularly difficult when the agent is tested in an environment that it is not trained in. If the agent does not know how the floor plan of an environment looks, it has to learn to predict the design of man-made structures, to efficiently navigate across them, much like how people instinctively know how to move around a building they have never seen before based on their experience navigating similar buildings. #### What is an environment anyways? Environments are worlds in which embodied agents exist. If our embodied agent is simply a neural network that is being trained in a simulator, then that simulator is its environment. Similarly, if our agent is a physical robot then its environment is the real world. The agent interacts with the environment by taking one of several available actions (such as "move forward", or "turn left"). After each action, the environment produces a new frame that the agent can analyze to determine its next step. For many tasks, including PointNav the agent also has a special "stop" action which indicates that the agent thinks it has reached the target. After this action is called the agent will be reset to a new location, regardless if it reached the target. The hope is that after enough training the agent will learn to correctly assess that it has successfully navigated to the target. ![RoboTHOR Sim vs. Real](../img/RoboTHOR_sim_real.jpg) There are many simulators designed for the training of embodied agents. In this tutorial, we will be using a simulator called [RoboTHOR](https://ai2thor.allenai.org/robothor/), which is designed specifically to train models that can easily be transferred to a real robot, by providing a photo-realistic virtual environment and a real-world replica of the environment that researchers can have access to. RoboTHOR contains 60 different virtual scenes with different floor plans and furniture and 15 validation scenes. It is also important to mention that **AllenAct** has a class abstraction called Environment. This is not the actual simulator game engine or robotics controller, but rather a shallow wrapper that provides a uniform interface to the actual environment. #### Learning algorithm Finally, let us briefly touch on the algorithm that we will use to train our embodied agent to navigate. While *AllenAct* offers us great flexibility to train models using complex pipelines, we will be using a simple pure reinforcement learning approach for this tutorial. More specifically, we will be using DD-PPO, a decentralized and distributed variant of the ubiquitous PPO algorithm. For those unfamiliar with Reinforcement Learning we highly recommend [this tutorial](http://karpathy.github.io/2016/05/31/rl/) by Andrej Karpathy, and [this book](http://www.incompleteideas.net/book/the-book-2nd.html) by Sutton and Barto. Essentially what we are doing is letting our agent explore the environment on its own, rewarding it for taking actions that bring it closer to its goal and penalizing it for actions that take it away from its goal. We then optimize the agent's model to maximize this reward. ## Requirements To train the model on the PointNav task, we need to [install the RoboTHOR environment](../installation/installation-framework.md) and [download the RoboTHOR PointNav dataset](../installation/download-datasets.md) The dataset contains a list of episodes with thousands of randomly generated starting positions and target locations for each of the scenes as well as a precomputed cache of distances, containing the shortest path from each point in a scene, to every other point in that scene. This is used to reward the agent for moving closer to the target in terms of geodesic distance - the actual path distance (as opposed to a straight line distance). ## Config File Setup Now comes the most important part of the tutorial, we are going to write an experiment config file. If this is your first experience with experiment config files in AllenAct, we suggest that you first see our how-to on [defining an experiment](../howtos/defining-an-experiment.md) which will walk you through creating a simplified experiment config file. Unlike a library that can be imported into python, **AllenAct** is structured as a framework with a runner script called `main.py` which will run the experiment specified in a config file. This design forces us to keep meticulous records of exactly which settings were used to produce a particular result, which can be very useful given how expensive RL models are to train. The `projects/` directory is home to different projects using `AllenAct`. Currently it is populated with baselines of popular tasks and tutorials. We already have all the code for this tutorial stored in `projects/tutorials/training_a_pointnav_model.py`. We will be using this file to run our experiments, but you can create a new directory in `projects/` and start writing your experiment there. We start off by importing everything we will need: ```python import glob import os from math import ceil from typing import Dict, Any, List, Optional, Sequence import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from torchvision import models from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph from allenact.base_abstractions.sensor import SensorSuite from allenact.base_abstractions.task import TaskSampler from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, LinearDecay, evenly_distribute_count_into_bins, ) from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor from allenact_plugins.robothor_plugin.robothor_task_samplers import ( PointNavDatasetTaskSampler, ) from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask from projects.pointnav_baselines.models.point_nav_models import ( ResnetTensorPointNavActorCritic, ) ``` Next we define a new experiment config class: ```python class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig): """A Point Navigation experiment configuration in RoboThor.""" ``` We then define the task parameters. For PointNav, these include the maximum number of steps our agent can take before being reset (this prevents the agent from wandering on forever), and a configuration for the reward function that we will be using. ```python # Task Parameters MAX_STEPS = 500 REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, } ``` In this case, we set the maximum number of steps to 500. We give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal in as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination. If the agent selects the `stop` action without reaching the target we do not punish it (although this is sometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves closer to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should be and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around with them. Next, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render every frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set to a 224 by 224 box). ```python # Simulator Parameters CAMERA_WIDTH = 640 CAMERA_HEIGHT = 480 SCREEN_SIZE = 224 ``` Next, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel processes that will be used to train the model. In general, more processes result in faster training, but since each process is a unique instance of the environment in which we are training they can take up a lot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may need to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to be the number of training scenes in RoboTHOR, which allows each process to load only a single scene into memory, saving time and space. `TRAINING_GPUS` takes the ids of the GPUS on which the model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which the validation and testing will occur. During training, a validation process is constantly running and evaluating the current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea. If our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default to running everything on the CPU with only 1 process. ```python ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None NUM_PROCESSES = 20 TRAINING_GPUS: Sequence[int] = [0] VALIDATION_GPUS: Sequence[int] = [0] TESTING_GPUS: Sequence[int] = [0] ``` Since we are using a dataset to train our model we need to define the path to where we have stored it. If we download the dataset instructed above we can define the path as follows ```python TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug") VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug") ``` Next, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the raw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we specified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks the point our agent needs to move to. It tells us the direction and distance to our goal at every time step. ```python SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GPSCompassSensorRoboThor(), ] ``` For the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct* the preprocessor abstraction is designed with large models with frozen weights in mind. These models often hail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a complex embedding, which then gets stored and used as input to our trainable model instead of the original image. Most other preprocessing work is done in the sensor classes (as we just saw with the RGB sensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should use this abstraction. ```python PREPROCESSORS = [ Builder( ResNetPreprocessor, { "input_height": SCREEN_SIZE, "input_width": SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", }, ), ] ``` Next, we must define all of the observation inputs that our model will use. These are just the hardcoded ids of the sensors we are using in the experiment. ```python OBSERVATIONS = [ "rgb_resnet", "target_coordinates_ind", ] ``` Finally, we must define the settings of our simulator. We set the camera dimensions to the values we defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a turn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the agent moves forward, it will do so by 0.25 meters. ```python ENV_ARGS = dict( width=CAMERA_WIDTH, height=CAMERA_HEIGHT, rotateStepDegrees=30.0, visibilityDistance=1.0, gridSize=0.25, ) ``` Now we move on to the methods that we must define to finish implementing an experiment config. Firstly we have a simple method that just returns the name of the experiment. ```python @classmethod def tag(cls): return "PointNavRobothorRGBPPO" ``` Next, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms we will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4. We specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters respectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval` sets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how often we save the model weights and run validation on them. ```python @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 1000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) ``` The `machine_params` method returns the hardware parameters of each process, based on the list of devices we defined above. ```python def machine_params(self, mode="train", **kwargs): sampler_devices: List[int] = [] if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else list(self.TRAINING_GPUS) * workers_per_device ) nprocesses = ( 8 if not torch.cuda.is_available() else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) ) sampler_devices = list(self.TRAINING_GPUS) elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS elif mode == "test": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces, preprocessors=self.PREPROCESSORS, ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sampler_devices=sampler_devices if mode == "train" else gpu_ids, # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, ) ``` Now we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch, so any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which unsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different actions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space` We specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define the size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and distance to the target) with `goal_dims`. ```python @classmethod def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorPointNavActorCritic( action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, ) ``` We also need to define the task sampler that we will be using. This is a piece of code that generates instances of tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting our tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets the agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the `stop` action. ```python @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavDatasetTaskSampler(**kwargs) ``` You might notice that we did not specify the task sampler's arguments, but are rather passing them in. The reason for this is that each process will have its own task sampler, and we need to specify exactly which scenes each process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this. ```python @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)] if len(scenes) == 0: raise RuntimeError( ( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done." ).format(scenes_dir) ) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, } ``` The very last things we need to define are the sampler arguments themselves. We define them separately for a train, validation, and test sampler, but in this case, they are almost the same. The arguments need to include the location of the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above and are just referencing here. The only consequential differences between these task samplers are the path to the dataset we are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since we want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of RoboTHOR are private we are also testing on our validation set. ```python def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.TRAIN_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.TRAIN_DATASET_DIR res["loop_dataset"] = True res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) res["allow_flipping"] = True return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) return res ``` This is it! If we copy all of the code into a file we should be able to run our experiment! ## Training Model On Debug Dataset We can test if our installation worked properly by training our model on a small dataset of 4 episodes. This should take about 20 minutes on a computer with a NVIDIA GPU. We can now train a model by running: ```bash PYTHONPATH=. python allenact/main.py -o -c -b ``` If using the same configuration as we have set up, the following command should work: ```bash PYTHONPATH=. python allenact/main.py training_a_pointnav_model -o storage/robothor-pointnav-rgb-resnet-resnet -b projects/tutorials ``` If we start up a tensorboard server during training and specify that `output_dir=storage` the output should look something like this: ![tensorboard output](../img/point-nav-baseline-tb.png) ## Training Model On Full Dataset We can also train the model on the full dataset by changing back our dataset path and running the same command as above. But be aware, training this takes nearly 2 days on a machine with 8 GPU. ## Testing Model To test the performance of a model please refer to [this tutorial](running-inference-on-a-pretrained-model.md). ## Conclusion In this tutorial, we learned how to create a new PointNav experiment using **AllenAct**. There are many simple and obvious ways to modify the experiment from here - changing the model, the learning algorithm and the environment each requires very few lines of code changed in the above file, allowing us to explore our embodied ai research ideas across different frameworks with ease. ================================================ FILE: docs/tutorials/training-pipelines.md ================================================ # Tutorial: IL to RL with a training pipeline ================================================ FILE: docs/tutorials/transfering-to-a-different-environment-framework.md ================================================ # Tutorial: Swapping in a new environment **Note** The provided paths in this tutorial assume you have [installed the full library](../installation/installation-allenact.md#full-library). ## Introduction This tutorial was designed as a continuation of the `Robothor PointNav Tutorial` and explains how to modify the experiment config created in that tutorial to work with the iTHOR and Habitat environments. Cross-platform support is one of the key design goals of `allenact`. This is achieved through a total decoupling of the environment code from the engine, model and algorithm code, so that swapping in a new environment is as plug and play as possible. Crucially we will be able to run a model on different environments without touching the model code at all, which will allow us to train neural networks in one environment and test them in another. ## RoboTHOR to iTHOR ![iTHOR Framework](../img/iTHOR_framework.jpg) Since both the `RoboTHOR` and the `iTHOR` environment stem from the same family and are developed by the same organization, switching between the two is incredibly easy. We only have to change the path parameter to point to an iTHOR dataset rather than the RoboTHOR one. ```python # Dataset Parameters TRAIN_DATASET_DIR = "datasets/ithor-pointnav/train" VAL_DATASET_DIR = "datasets/ithor-pointnav/val" ``` We also have to download the `iTHOR-PointNav` dataset, following [these instructions](../installation/download-datasets.md). We might also want to modify the `tag` method to accurately reflect our config but this will not change the behavior at all and is merely a bookkeeping convenience. ```python @classmethod def tag(cls): return "PointNavRobothorRGBPPO" ``` ## RoboTHOR to Habitat ![Habitat Framework](../img/habitat_framework.jpg) To train experiments using the Habitat framework we need to install it following [these instructions](../installation/installation-framework.md). Since the roboTHOR and Habitat simulators are sufficiently different and have different parameters to configure this transformation takes a bit more effort, but we only need to modify the environment config and TaskSampler (we have to change the former because the habitat simulator accepts a different format of configuration and the latter because the habitat dataset is formatted differently and thus needs to be parsed differently.) As part of our environment modification, we need to switch from using RoboTHOR sensors to using Habitat sensors. The implementation of sensors we provide offer an uniform interface across all the environments so we simply have to swap out our sensor classes: ```python SENSORS = [ DepthSensorHabitat( height=SCREEN_SIZE, width=SCREEN_SIZE, use_normalization=True, ), TargetCoordinatesSensorHabitat(coordinate_dims=2), ] ``` Next we need to define the simulator config: ```python CONFIG = get_habitat_config("configs/gibson.yaml") CONFIG.defrost() CONFIG.NUM_PROCESSES = NUM_PROCESSES CONFIG.SIMULATOR_GPU_IDS = TRAIN_GPUS CONFIG.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = ["*"] CONFIG.DATASET.DATA_PATH = TRAIN_SCENES CONFIG.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"] CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT CONFIG.SIMULATOR.TURN_ANGLE = 30 CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25 CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS CONFIG.TASK.TYPE = "Nav-v0" CONFIG.TASK.SUCCESS_DISTANCE = 0.2 CONFIG.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"] CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR" CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2 CONFIG.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass" CONFIG.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"] CONFIG.TASK.SPL.TYPE = "SPL" CONFIG.TASK.SPL.SUCCESS_DISTANCE = 0.2 CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = 0.2 CONFIG.MODE = "train" ``` This `CONFIG` object holds very similar values to the ones `ENV_ARGS` held in the RoboTHOR example. We decided to leave this way of passing in configurations exposed to the user to offer maximum customization of the underlying environment. Finally we need to replace the task sampler and its argument generating functions: ```python # Define Task Sampler from allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavTaskSampler(**kwargs) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, } def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.CONFIG.clone() config.defrost() config.DATASET.DATA_PATH = self.VALID_SCENES_PATH config.MODE = "validate" config.freeze() return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, } def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TEST_CONFIGS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, } ``` As we can see this code looks very similar as well, we simply need to pass slightly different parameters. ## Conclusion In this tutorial, we learned how to modify our experiment configurations to work with different environments. By providing a high level of modularity and out-of-the-box support for both `Habitat` and `THOR`, two of the most popular embodied frameworks out there **AllenAct** hopes to give researchers the ability to validate their results across many platforms and help guide them towards genuine progress. The source code for this tutorial can be found in `/projects/framework_transfer_tutorial`. ================================================ FILE: main.py ================================================ #!/usr/bin/env python3 """Entry point to training/validating/testing for a user given experiment name.""" import allenact.main if __name__ == "__main__": allenact.main.main() ================================================ FILE: mkdocs.yml ================================================ site_name: AllenAct site_description: An open source framework for research in Embodied-AI from AI2 site_url: https://allenact.org theme: name: material custom_dir: overrides palette: primary: blue accent: grey logo: img/AI2_Avatar_White.png favicon: img/AllenAct_A.svg highlightjs: true hljs_languages: - python - typescript - json extra_css: - css/extra.css google_analytics: [UA-120916510-8, allenact.org] repo_name: allenai/allenact repo_url: https://github.com/allenai/allenact docs_dir: docs nav: - Overview: index.md - Installation: - Install AllenAct: installation/installation-allenact.md - Install environments: installation/installation-framework.md - Download datasets: installation/download-datasets.md - Getting started: - Run your first experiment: getting_started/running-your-first-experiment.md - Primary abstractions: getting_started/abstractions.md - Structure of the codebase: getting_started/structure.md - Tutorials: - AllenAct Tutorials: tutorials/index.md - Navigation in Minigrid: tutorials/minigrid-tutorial.md - PointNav in RoboTHOR: tutorials/training-a-pointnav-model.md - Swapping environments: tutorials/transfering-to-a-different-environment-framework.md - Using a pre-trained model: tutorials/running-inference-on-a-pretrained-model.md - Off-policy training: tutorials/offpolicy-tutorial.md - OpenAI gym for continuous control: tutorials/gym-tutorial.md - Multi-node ObjectNav training: tutorials/distributed-objectnav-tutorial.md - OpenAI gym for MuJoCo tasks: tutorials/gym-mujoco-tutorial.md # - IL to RL with pipelines: tutorials/training-pipelines.md - HowTos: - Define an experiment: howtos/defining-an-experiment.md - Change rewards and losses: howtos/changing-rewards-and-losses.md - Define a new model: howtos/defining-a-new-model.md - Define a new task: howtos/defining-a-new-task.md - Define a new training pipeline: howtos/defining-a-new-training-pipeline.md # - Visualize results: howtos/visualizing-results.md # - Run a multi-agent experiment: howtos/running-a-multi-agent-experiment.md - Projects: - BabyAI baselines: projects/babyai_baselines/README.md - PointNav baselines: projects/pointnav_baselines/README.md - ObjectNav baselines: projects/objectnav_baselines/README.md # - Advisor code: projects/advisor_2020/README.md # - Two Body Problem code: projects/two_body_problem_2019/README.md - FAQ: FAQ.md - Contributing: CONTRIBUTING.md - Licence: LICENSE.md - API: - allenact: - _constants: api/allenact/_constants.md - embodiedai: - mapping: - mapping_utils: - map_builders: api/allenact/embodiedai/mapping/mapping_utils/map_builders.md - point_cloud_utils: api/allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.md - mapping_losses: api/allenact/embodiedai/mapping/mapping_losses.md - mapping_models: - active_neural_slam: api/allenact/embodiedai/mapping/mapping_models/active_neural_slam.md - preprocessors: - resnet: api/allenact/embodiedai/preprocessors/resnet.md - sensors: - vision_sensors: api/allenact/embodiedai/sensors/vision_sensors.md - models: - aux_models: api/allenact/embodiedai/models/aux_models.md - basic_models: api/allenact/embodiedai/models/basic_models.md - resnet: api/allenact/embodiedai/models/resnet.md - fusion_models: api/allenact/embodiedai/models/fusion_models.md - visual_nav_models: api/allenact/embodiedai/models/visual_nav_models.md - storage: - vdr_storage: api/allenact/embodiedai/storage/vdr_storage.md - aux_losses: - losses: api/allenact/embodiedai/aux_losses/losses.md - base_abstractions: - experiment_config: api/allenact/base_abstractions/experiment_config.md - misc: api/allenact/base_abstractions/misc.md - task: api/allenact/base_abstractions/task.md - sensor: api/allenact/base_abstractions/sensor.md - preprocessor: api/allenact/base_abstractions/preprocessor.md - distributions: api/allenact/base_abstractions/distributions.md - algorithms: - onpolicy_sync: - losses: - grouped_action_imitation: api/allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.md - imitation: api/allenact/algorithms/onpolicy_sync/losses/imitation.md - abstract_loss: api/allenact/algorithms/onpolicy_sync/losses/abstract_loss.md - ppo: api/allenact/algorithms/onpolicy_sync/losses/ppo.md - a2cacktr: api/allenact/algorithms/onpolicy_sync/losses/a2cacktr.md - misc: api/allenact/algorithms/onpolicy_sync/misc.md - runner: api/allenact/algorithms/onpolicy_sync/runner.md - policy: api/allenact/algorithms/onpolicy_sync/policy.md - engine: api/allenact/algorithms/onpolicy_sync/engine.md - vector_sampled_tasks: api/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.md - storage: api/allenact/algorithms/onpolicy_sync/storage.md - offpolicy_sync: - losses: - abstract_offpolicy_loss: api/allenact/algorithms/offpolicy_sync/losses/abstract_offpolicy_loss.md - utils: - model_utils: api/allenact/utils/model_utils.md - experiment_utils: api/allenact/utils/experiment_utils.md - spaces_utils: api/allenact/utils/spaces_utils.md - system: api/allenact/utils/system.md - cacheless_frcnn: api/allenact/utils/cacheless_frcnn.md - misc_utils: api/allenact/utils/misc_utils.md - multi_agent_viz_utils: api/allenact/utils/multi_agent_viz_utils.md - viz_utils: api/allenact/utils/viz_utils.md - tensor_utils: api/allenact/utils/tensor_utils.md - cache_utils: api/allenact/utils/cache_utils.md - allenact_plugins: - habitat_plugin: - habitat_constants: api/allenact_plugins/habitat_plugin/habitat_constants.md - habitat_tasks: api/allenact_plugins/habitat_plugin/habitat_tasks.md - habitat_sensors: api/allenact_plugins/habitat_plugin/habitat_sensors.md - habitat_environment: api/allenact_plugins/habitat_plugin/habitat_environment.md - habitat_preprocessors: api/allenact_plugins/habitat_plugin/habitat_preprocessors.md - habitat_task_samplers: api/allenact_plugins/habitat_plugin/habitat_task_samplers.md - scripts: - agent_demo: api/allenact_plugins/habitat_plugin/scripts/agent_demo.md - make_map: api/allenact_plugins/habitat_plugin/scripts/make_map.md - habitat_utils: api/allenact_plugins/habitat_plugin/habitat_utils.md - lighthouse_plugin: - lighthouse_models: api/allenact_plugins/lighthouse_plugin/lighthouse_models.md - lighthouse_environment: api/allenact_plugins/lighthouse_plugin/lighthouse_environment.md - lighthouse_tasks: api/allenact_plugins/lighthouse_plugin/lighthouse_tasks.md - lighthouse_sensors: api/allenact_plugins/lighthouse_plugin/lighthouse_sensors.md - lighthouse_util: api/allenact_plugins/lighthouse_plugin/lighthouse_util.md - babyai_plugin: - babyai_constants: api/allenact_plugins/babyai_plugin/babyai_constants.md - babyai_models: api/allenact_plugins/babyai_plugin/babyai_models.md - scripts: - truncate_expert_demos: api/allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.md - get_instr_length_percentiles: api/allenact_plugins/babyai_plugin/scripts/get_instr_length_percentiles.md - download_babyai_expert_demos: api/allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.md - babyai_tasks: api/allenact_plugins/babyai_plugin/babyai_tasks.md - ithor_plugin: - ithor_tasks: api/allenact_plugins/ithor_plugin/ithor_tasks.md - ithor_environment: api/allenact_plugins/ithor_plugin/ithor_environment.md - ithor_constants: api/allenact_plugins/ithor_plugin/ithor_constants.md - ithor_util: api/allenact_plugins/ithor_plugin/ithor_util.md - ithor_sensors: api/allenact_plugins/ithor_plugin/ithor_sensors.md - scripts: - make_objectnav_debug_dataset: api/allenact_plugins/ithor_plugin/scripts/make_objectnav_debug_dataset.md - make_pointnav_debug_dataset: api/allenact_plugins/ithor_plugin/scripts/make_pointnav_debug_dataset.md - ithor_viz: api/allenact_plugins/ithor_plugin/ithor_viz.md - ithor_task_samplers: api/allenact_plugins/ithor_plugin/ithor_task_samplers.md - robothor_plugin: - robothor_preprocessors: api/allenact_plugins/robothor_plugin/robothor_preprocessors.md - robothor_task_samplers: api/allenact_plugins/robothor_plugin/robothor_task_samplers.md - robothor_environment: api/allenact_plugins/robothor_plugin/robothor_environment.md - robothor_constants: api/allenact_plugins/robothor_plugin/robothor_constants.md - robothor_distributions: api/allenact_plugins/robothor_plugin/robothor_distributions.md - robothor_models: api/allenact_plugins/robothor_plugin/robothor_models.md - robothor_tasks: api/allenact_plugins/robothor_plugin/robothor_tasks.md - scripts: - make_objectnav_debug_dataset: api/allenact_plugins/robothor_plugin/scripts/make_objectnav_debug_dataset.md - make_pointnav_debug_dataset: api/allenact_plugins/robothor_plugin/scripts/make_pointnav_debug_dataset.md - robothor_sensors: api/allenact_plugins/robothor_plugin/robothor_sensors.md - robothor_viz: api/allenact_plugins/robothor_plugin/robothor_viz.md - minigrid_plugin: - minigrid_tasks: api/allenact_plugins/minigrid_plugin/minigrid_tasks.md - minigrid_environments: api/allenact_plugins/minigrid_plugin/minigrid_environments.md - minigrid_offpolicy: api/allenact_plugins/minigrid_plugin/minigrid_offpolicy.md - minigrid_sensors: api/allenact_plugins/minigrid_plugin/minigrid_sensors.md - configs: - minigrid_nomemory: api/allenact_plugins/minigrid_plugin/configs/minigrid_nomemory.md - minigrid_models: api/allenact_plugins/minigrid_plugin/minigrid_models.md - manipulathor_plugin: - manipulathor_viz: api/allenact_plugins/manipulathor_plugin/manipulathor_viz.md - manipulathor_tasks: api/allenact_plugins/manipulathor_plugin/manipulathor_tasks.md - manipulathor_task_samplers: api/allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.md - manipulathor_constants: api/allenact_plugins/manipulathor_plugin/manipulathor_constants.md - armpointnav_constants: api/allenact_plugins/manipulathor_plugin/armpointnav_constants.md - manipulathor_sensors: api/allenact_plugins/manipulathor_plugin/manipulathor_sensors.md - arm_calculation_utils: api/allenact_plugins/manipulathor_plugin/arm_calculation_utils.md - manipulathor_utils: api/allenact_plugins/manipulathor_plugin/manipulathor_utils.md - manipulathor_environment: api/allenact_plugins/manipulathor_plugin/manipulathor_environment.md - gym_plugin: - gym_environment: api/allenact_plugins/gym_plugin/gym_environment.md - gym_sensors: api/allenact_plugins/gym_plugin/gym_sensors.md - gym_distributions: api/allenact_plugins/gym_plugin/gym_distributions.md - gym_models: api/allenact_plugins/gym_plugin/gym_models.md - gym_tasks: api/allenact_plugins/gym_plugin/gym_tasks.md - constants: api/constants.md - projects: - gym_baselines: - experiments: - gym_base: api/projects/gym_baselines/experiments/gym_base.md - gym_humanoid_base: api/projects/gym_baselines/experiments/gym_humanoid_base.md - gym_mujoco_base: api/projects/gym_baselines/experiments/gym_mujoco_base.md - gym_humanoid_ddppo: api/projects/gym_baselines/experiments/gym_humanoid_ddppo.md - mujoco: - gym_mujoco_swimmer_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_swimmer_ddppo.md - gym_mujoco_reacher_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_reacher_ddppo.md - gym_mujoco_walker2d_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_walker2d_ddppo.md - gym_mujoco_halfcheetah_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_halfcheetah_ddppo.md - gym_mujoco_humanoid_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_humanoid_ddppo.md - gym_mujoco_inverteddoublependulum_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_inverteddoublependulum_ddppo.md - gym_mujoco_ant_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.md - gym_mujoco_hopper_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_hopper_ddppo.md - gym_mujoco_invertedpendulum_ddppo: api/projects/gym_baselines/experiments/mujoco/gym_mujoco_invertedpendulum_ddppo.md - gym_mujoco_ddppo: api/projects/gym_baselines/experiments/gym_mujoco_ddppo.md - models: - gym_models: api/projects/gym_baselines/models/gym_models.md - objectnav_baselines: - experiments: - robothor: - objectnav_robothor_base: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.md - objectnav_robothor_rgb_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_ddppo.md - objectnav_robothor_rgb_resnetgru_dagger: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_dagger.md - objectnav_robothor_rgbd_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.md - objectnav_robothor_rgb_resnetgru_ddppo_and_gbc: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.md - objectnav_robothor_rgb_unfrozenresnet_gru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet_gru_ddppo.md - objectnav_robothor_rgb_unfrozenresnet_gru_vdr_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet_gru_vdr_ddppo.md - objectnav_robothor_depth_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_depth_resnetgru_ddppo.md - objectnav_mixin_unfrozenresnet_gru: api/projects/objectnav_baselines/experiments/objectnav_mixin_unfrozenresnet_gru.md - ithor: - objectnav_ithor_rgbd_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnetgru_ddppo.md - objectnav_ithor_depth_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnetgru_ddppo.md - objectnav_ithor_rgb_resnetgru_ddppo: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnetgru_ddppo.md - objectnav_ithor_base: api/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_base.md - objectnav_thor_mixin_ddppo_and_gbc: api/projects/objectnav_baselines/experiments/objectnav_thor_mixin_ddppo_and_gbc.md - objectnav_thor_base: api/projects/objectnav_baselines/experiments/objectnav_thor_base.md - objectnav_mixin_resnetgru: api/projects/objectnav_baselines/experiments/objectnav_mixin_resnetgru.md - objectnav_base: api/projects/objectnav_baselines/experiments/objectnav_base.md - objectnav_mixin_ddppo: api/projects/objectnav_baselines/experiments/objectnav_mixin_ddppo.md - objectnav_mixin_dagger: api/projects/objectnav_baselines/experiments/objectnav_mixin_dagger.md - models: - object_nav_models: api/projects/objectnav_baselines/models/object_nav_models.md - babyai_baselines: - experiments: - go_to_local: - bc: api/projects/babyai_baselines/experiments/go_to_local/bc.md - distributed_bc_offpolicy: api/projects/babyai_baselines/experiments/go_to_local/distributed_bc_offpolicy.md - dagger: api/projects/babyai_baselines/experiments/go_to_local/dagger.md - bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.md - distributed_bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_local/distributed_bc_teacher_forcing.md - ppo: api/projects/babyai_baselines/experiments/go_to_local/ppo.md - a2c: api/projects/babyai_baselines/experiments/go_to_local/a2c.md - base: api/projects/babyai_baselines/experiments/go_to_local/base.md - go_to_obj: - bc: api/projects/babyai_baselines/experiments/go_to_obj/bc.md - dagger: api/projects/babyai_baselines/experiments/go_to_obj/dagger.md - bc_teacher_forcing: api/projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.md - ppo: api/projects/babyai_baselines/experiments/go_to_obj/ppo.md - a2c: api/projects/babyai_baselines/experiments/go_to_obj/a2c.md - base: api/projects/babyai_baselines/experiments/go_to_obj/base.md - base: api/projects/babyai_baselines/experiments/base.md - pointnav_baselines: - experiments: - robothor: - pointnav_robothor_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.md - pointnav_robothor_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.md - pointnav_robothor_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.md - pointnav_robothor_rgb_simpleconvgru_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo_and_gbc.md - pointnav_robothor_base: api/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_base.md - habitat: - pointnav_habitat_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.md - pointnav_habitat_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.md - pointnav_habitat_base: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_base.md - debug_pointnav_habitat_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgbd_simpleconvgru_ddppo.md - debug_pointnav_habitat_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgb_simpleconvgru_ddppo.md - debug_pointnav_habitat_rgb_simpleconvgru_bc: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_rgb_simpleconvgru_bc.md - pointnav_habitat_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.md - debug_pointnav_habitat_base: api/projects/pointnav_baselines/experiments/habitat/debug_pointnav_habitat_base.md - pointnav_base: api/projects/pointnav_baselines/experiments/pointnav_base.md - pointnav_habitat_mixin_ddppo: api/projects/pointnav_baselines/experiments/pointnav_habitat_mixin_ddppo.md - ithor: - pointnav_ithor_rgbd_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.md - pointnav_ithor_depth_simpleconvgru_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo_and_gbc.md - pointnav_ithor_rgb_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.md - pointnav_ithor_base: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_base.md - pointnav_ithor_depth_simpleconvgru_ddppo: api/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.md - pointnav_mixin_simpleconvgru: api/projects/pointnav_baselines/experiments/pointnav_mixin_simpleconvgru.md - pointnav_thor_mixin_ddppo: api/projects/pointnav_baselines/experiments/pointnav_thor_mixin_ddppo.md - pointnav_thor_mixin_ddppo_and_gbc: api/projects/pointnav_baselines/experiments/pointnav_thor_mixin_ddppo_and_gbc.md - pointnav_thor_base: api/projects/pointnav_baselines/experiments/pointnav_thor_base.md - models: - point_nav_models: api/projects/pointnav_baselines/models/point_nav_models.md - tutorials: - pointnav_habitat_rgb_ddppo: api/projects/tutorials/pointnav_habitat_rgb_ddppo.md - object_nav_ithor_dagger_then_ppo_one_object: api/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.md - minigrid_offpolicy_tutorial: api/projects/tutorials/minigrid_offpolicy_tutorial.md - navtopartner_robothor_rgb_ppo: api/projects/tutorials/navtopartner_robothor_rgb_ppo.md - pointnav_ithor_rgb_ddppo: api/projects/tutorials/pointnav_ithor_rgb_ddppo.md - object_nav_ithor_dagger_then_ppo_one_object_viz: api/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.md - object_nav_ithor_ppo_one_object: api/projects/tutorials/object_nav_ithor_ppo_one_object.md - minigrid_tutorial_conds: api/projects/tutorials/minigrid_tutorial_conds.md - manipulathor_baselines: - armpointnav_baselines: - experiments: - armpointnav_thor_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.md - armpointnav_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_base.md - armpointnav_mixin_ddppo: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_ddppo.md - ithor: - armpointnav_no_vision: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_no_vision.md - armpointnav_ithor_base: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_ithor_base.md - armpointnav_depth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_depth.md - armpointnav_rgb: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgb.md - armpointnav_rgbdepth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgbdepth.md - armpointnav_disjoint_depth: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_disjoint_depth.md - armpointnav_mixin_simplegru: api/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_simplegru.md - models: - arm_pointnav_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.md - base_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/base_models.md - disjoint_arm_pointnav_models: api/projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.md - manipulathor_net_utils: api/projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.md - tests: - mapping: - test_ai2thor_mapping: api/tests/mapping/test_ai2thor_mapping.md - multiprocessing: - test_frozen_attribs: api/tests/multiprocessing/test_frozen_attribs.md - utils: - test_spaces: api/tests/utils/test_spaces.md - vision: - test_pillow_rescaling: api/tests/vision/test_pillow_rescaling.md - sync_algs_cpu: - test_to_to_obj_trains: api/tests/sync_algs_cpu/test_to_to_obj_trains.md - manipulathor_plugin: - test_utils: api/tests/manipulathor_plugin/test_utils.md - hierarchical_policies: - test_minigrid_conditional: api/tests/hierarchical_policies/test_minigrid_conditional.md markdown_extensions: - toc: permalink: '#' - markdown.extensions.codehilite: guess_lang: true - meta - admonition - codehilite # extra_javascript: # - javascripts/extra.js #plugins: # - search # - mkpdfs ================================================ FILE: mypy.ini ================================================ [mypy] python_version = 3.7 follow_imports = skip ignore_missing_imports = True strict_optional = False [mypy-demo.*] ignore_errors = True ================================================ FILE: overrides/main.html ================================================ {% extends "base.html" %} {% block extrahead %} {% set title = config.site_name %} {% if page and page.meta and page.meta.title %} {% set title = title ~ " - " ~ page.meta.title %} {% elif page and page.title and not page.is_homepage %} {% set title = title ~ " - " ~ page.title | striptags %} {% endif %} {% endblock %} ================================================ FILE: pretrained_model_ckpts/.gitignore ================================================ * !.gitignore !*.sh ================================================ FILE: pretrained_model_ckpts/download_navigation_model_ckpts.sh ================================================ #!/bin/bash # Move to the directory containing this file cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" || exit # Download, Unzip, and Remove zip if [ "$1" = "robothor-pointnav-rgb-resnet" ] then echo "Downloading pretrained RoboTHOR PointNav model..." wget https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/navigation/robothor-pointnav-rgb-resnet.tar.gz tar -xf robothor-pointnav-rgb-resnet.tar.gz && rm robothor-pointnav-rgb-resnet.tar.gz echo "saved folder: robothor-pointnav-rgb-resnet" elif [ "$1" = "robothor-objectnav-challenge-2021" ] then echo "Downloading pretrained RoboTHOR ObjectNav model..." wget https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/navigation/robothor-objectnav-challenge-2021.tar.gz tar -xf robothor-objectnav-challenge-2021.tar.gz && rm robothor-objectnav-challenge-2021.tar.gz echo "saved folder: robothor-objectnav-challenge-2021" else echo "Failed: Usage download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021" exit 1 fi ================================================ FILE: projects/__init__.py ================================================ ================================================ FILE: projects/babyai_baselines/README.md ================================================ # Baseline experiments for the BabyAI environment We perform a collection of baseline experiments within the BabyAI environment on the GoToLocal task, see the `projects/babyai_baselines/experiments/go_to_local` directory. For instance, to train a model using PPO, run ```bash python main.py go_to_local.ppo --experiment_base projects/babyai_baselines/experiments ``` Note that these experiments will be quite slow when not using a GPU as the BabyAI model architecture is surprisingly large. Specifying a GPU (if available) can be done from the command line using hooks we created using [gin-config](https://github.com/google/gin-config). E.g. to train using the 0th GPU device, add ```bash --gp "machine_params.gpu_id = 0" ``` to the above command. ================================================ FILE: projects/babyai_baselines/__init__.py ================================================ ================================================ FILE: projects/babyai_baselines/experiments/__init__.py ================================================ ================================================ FILE: projects/babyai_baselines/experiments/base.py ================================================ from abc import ABC from typing import Dict, Any, List, Optional, Union, Sequence, cast import gym import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO, A2C from allenact.algorithms.onpolicy_sync.losses.a2cacktr import A2CConfig from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams from allenact.base_abstractions.misc import Loss from allenact.base_abstractions.sensor import SensorSuite, Sensor, ExpertActionSensor from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import ( Builder, LinearDecay, PipelineStage, TrainingPipeline, ) from allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel from allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask, BabyAITaskSampler from allenact_plugins.minigrid_plugin.minigrid_sensors import ( EgocentricMiniGridSensor, MiniGridMissionSensor, ) class BaseBabyAIExperimentConfig(ExperimentConfig, ABC): """Base experimental config.""" LEVEL: Optional[str] = None TOTAL_RL_TRAIN_STEPS: Optional[int] = None AGENT_VIEW_SIZE: int = 7 ROLLOUT_STEPS: Optional[int] = None NUM_TRAIN_SAMPLERS: Optional[int] = None NUM_TEST_TASKS: Optional[int] = None INSTR_LEN: Optional[int] = None USE_INSTR: Optional[bool] = None GPU_ID: Optional[int] = None USE_EXPERT = False SHOULD_LOG = True PPO_NUM_MINI_BATCH = 2 ARCH: Optional[str] = None NUM_CKPTS_TO_SAVE = 50 TEST_SEED_OFFSET = 0 DEFAULT_LR = 1e-3 @classmethod def METRIC_ACCUMULATE_INTERVAL(cls): return cls.NUM_TRAIN_SAMPLERS * 1000 @classmethod def get_sensors(cls) -> Sequence[Sensor]: assert cls.USE_INSTR is not None return ( [ EgocentricMiniGridSensor( agent_view_size=cls.AGENT_VIEW_SIZE, view_channels=3 ), ] + ( [MiniGridMissionSensor(instr_len=cls.INSTR_LEN)] # type:ignore if cls.USE_INSTR else [] ) + ( [ ExpertActionSensor( # type: ignore nactions=len(BabyAITask.class_action_names()) ) ] if cls.USE_EXPERT else [] ) ) @classmethod def rl_loss_default(cls, alg: str, steps: Optional[int] = None): if alg == "ppo": assert steps is not None return { "loss": Builder( PPO, kwargs={"clip_decay": LinearDecay(steps)}, default=PPOConfig, ), "num_mini_batch": cls.PPO_NUM_MINI_BATCH, "update_repeats": 4, } elif alg == "a2c": return { "loss": A2C(**A2CConfig), "num_mini_batch": 1, "update_repeats": 1, } elif alg == "imitation": return { "loss": Imitation(), "num_mini_batch": cls.PPO_NUM_MINI_BATCH, "update_repeats": 4, } else: raise NotImplementedError @classmethod def _training_pipeline( cls, named_losses: Dict[str, Union[Loss, Builder]], pipeline_stages: List[PipelineStage], num_mini_batch: int, update_repeats: int, total_train_steps: int, lr: Optional[float] = None, ): lr = cls.DEFAULT_LR if lr is None else lr num_steps = cls.ROLLOUT_STEPS metric_accumulate_interval = ( cls.METRIC_ACCUMULATE_INTERVAL() ) # Log every 10 max length tasks save_interval = int(cls.TOTAL_RL_TRAIN_STEPS / cls.NUM_CKPTS_TO_SAVE) gamma = 0.99 use_gae = "reinforce_loss" not in named_losses gae_lambda = 0.99 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses=named_losses, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=None, should_log=cls.SHOULD_LOG, pipeline_stages=pipeline_stages, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=cls.TOTAL_RL_TRAIN_STEPS)} # type: ignore ), ) @classmethod def machine_params( cls, mode="train", gpu_id="default", n_train_processes="default", **kwargs ): if mode == "train": if n_train_processes == "default": nprocesses = cls.NUM_TRAIN_SAMPLERS else: nprocesses = n_train_processes elif mode == "valid": nprocesses = 0 elif mode == "test": nprocesses = min( 100 if torch.cuda.is_available() else 8, cls.NUM_TEST_TASKS ) else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") if gpu_id == "default": devices = [] if cls.GPU_ID is None else [cls.GPU_ID] else: devices = [gpu_id] return MachineParams(nprocesses=nprocesses, devices=devices) @classmethod def create_model(cls, **kwargs) -> nn.Module: sensors = cls.get_sensors() return BabyAIRecurrentACModel( action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())), observation_space=SensorSuite(sensors).observation_spaces, use_instr=cls.USE_INSTR, use_memory=True, arch=cls.ARCH, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return BabyAITaskSampler(**kwargs) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return { "env_builder": self.LEVEL, "sensors": self.get_sensors(), "seed": seeds[process_ind] if seeds is not None else None, } def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: raise RuntimeError def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: max_tasks = self.NUM_TEST_TASKS // total_processes + ( process_ind < (self.NUM_TEST_TASKS % total_processes) ) task_seeds_list = [ 2**31 - 1 + self.TEST_SEED_OFFSET + process_ind + total_processes * i for i in range(max_tasks) ] # print(max_tasks, process_ind, total_processes, task_seeds_list) assert len(task_seeds_list) == 0 or ( min(task_seeds_list) >= 0 and max(task_seeds_list) <= 2**32 - 1 ) train_sampler_args = self.train_task_sampler_args( process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) return { **train_sampler_args, "task_seeds_list": task_seeds_list, "max_tasks": max_tasks, "deterministic_sampling": True, "sensors": [ s for s in train_sampler_args["sensors"] if "Expert" not in str(type(s)) ], } ================================================ FILE: projects/babyai_baselines/experiments/go_to_local/__init__.py ================================================ ================================================ FILE: projects/babyai_baselines/experiments/go_to_local/a2c.py ================================================ import torch from allenact.utils.experiment_utils import PipelineStage from projects.babyai_baselines.experiments.go_to_local.base import ( BaseBabyAIGoToLocalExperimentConfig, ) class A2CBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig): """A2C only.""" NUM_TRAIN_SAMPLERS: int = ( 128 * 6 if torch.cuda.is_available() else BaseBabyAIGoToLocalExperimentConfig.NUM_TRAIN_SAMPLERS ) ROLLOUT_STEPS: int = 16 USE_LR_DECAY = False DEFAULT_LR = 1e-4 @classmethod def tag(cls): return "BabyAIGoToLocalA2C" @classmethod def training_pipeline(cls, **kwargs): total_training_steps = cls.TOTAL_RL_TRAIN_STEPS a2c_info = cls.rl_loss_default("a2c", steps=total_training_steps) return cls._training_pipeline( named_losses={ "a2c_loss": a2c_info["loss"], }, pipeline_stages=[ PipelineStage( loss_names=["a2c_loss"], max_stage_steps=total_training_steps, ), ], num_mini_batch=a2c_info["num_mini_batch"], update_repeats=a2c_info["update_repeats"], total_train_steps=total_training_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_local/base.py ================================================ from abc import ABC from typing import Dict, List, Optional, Union, Any, cast import gym import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.storage import ExperienceStorage from allenact.base_abstractions.misc import Loss from allenact.base_abstractions.sensor import SensorSuite from allenact.utils.experiment_utils import ( Builder, LinearDecay, PipelineStage, TrainingPipeline, ) from allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel from allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask from projects.babyai_baselines.experiments.base import BaseBabyAIExperimentConfig class BaseBabyAIGoToLocalExperimentConfig(BaseBabyAIExperimentConfig, ABC): """Base experimental config.""" LEVEL: Optional[str] = "BabyAI-GoToLocal-v0" TOTAL_RL_TRAIN_STEPS = int(15e6) TOTAL_IL_TRAIN_STEPS = int(7.5e6) ROLLOUT_STEPS: int = 128 NUM_TRAIN_SAMPLERS: int = 128 if torch.cuda.is_available() else 4 PPO_NUM_MINI_BATCH = 4 NUM_CKPTS_TO_SAVE = 20 NUM_TEST_TASKS: int = 1000 USE_LR_DECAY: bool = True # ARCH = "cnn1" # ARCH = "cnn2" ARCH = "expert_filmcnn" USE_INSTR = True INSTR_LEN: int = 5 INCLUDE_AUXILIARY_HEAD = False @classmethod def METRIC_ACCUMULATE_INTERVAL(cls): return cls.NUM_TRAIN_SAMPLERS * 64 @classmethod def _training_pipeline( # type:ignore cls, named_losses: Dict[str, Union[Loss, Builder]], pipeline_stages: List[PipelineStage], num_mini_batch: int, update_repeats: int, total_train_steps: int, lr: Optional[float] = None, named_storages: Optional[Dict[str, Union[ExperienceStorage, Builder]]] = None, ): lr = cls.DEFAULT_LR num_steps = cls.ROLLOUT_STEPS metric_accumulate_interval = ( cls.METRIC_ACCUMULATE_INTERVAL() ) # Log every 10 max length tasks save_interval = int(total_train_steps / cls.NUM_CKPTS_TO_SAVE) gamma = 0.99 use_gae = "reinforce_loss" not in named_losses gae_lambda = 0.99 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses=named_losses, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=None, should_log=cls.SHOULD_LOG, pipeline_stages=pipeline_stages, named_storages=named_storages, lr_scheduler_builder=( Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)} # type: ignore ) if cls.USE_LR_DECAY else None ), ) @classmethod def create_model(cls, **kwargs) -> nn.Module: sensors = cls.get_sensors() return BabyAIRecurrentACModel( action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())), observation_space=SensorSuite(sensors).observation_spaces, use_instr=cls.USE_INSTR, use_memory=True, arch=cls.ARCH, instr_dim=256, lang_model="attgru", memory_dim=2048, include_auxiliary_head=cls.INCLUDE_AUXILIARY_HEAD, ) def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: raise RuntimeError("No validation processes for these tasks") ================================================ FILE: projects/babyai_baselines/experiments/go_to_local/bc.py ================================================ from allenact.utils.experiment_utils import PipelineStage from projects.babyai_baselines.experiments.go_to_local.base import ( BaseBabyAIGoToLocalExperimentConfig, ) class PPOBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig): """Behavior clone then PPO.""" USE_EXPERT = True @classmethod def tag(cls): return "BabyAIGoToLocalBC" @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) imitation_info = cls.rl_loss_default("imitation") return cls._training_pipeline( named_losses={ "imitation_loss": imitation_info["loss"], }, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], max_stage_steps=total_train_steps, ), ], num_mini_batch=min( info["num_mini_batch"] for info in [ppo_info, imitation_info] ), update_repeats=min( info["update_repeats"] for info in [ppo_info, imitation_info] ), total_train_steps=total_train_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.py ================================================ import torch from allenact.utils.experiment_utils import PipelineStage, LinearDecay from projects.babyai_baselines.experiments.go_to_local.base import ( BaseBabyAIGoToLocalExperimentConfig, ) class BCTeacherForcingBabyAIGoToLocalExperimentConfig( BaseBabyAIGoToLocalExperimentConfig ): """Behavior clone with teacher forcing.""" USE_EXPERT = True GPU_ID = 0 if torch.cuda.is_available() else None @classmethod def METRIC_ACCUMULATE_INTERVAL(cls): return 1 @classmethod def tag(cls): return "BabyAIGoToLocalBCTeacherForcing" @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) imitation_info = cls.rl_loss_default("imitation") return cls._training_pipeline( named_losses={ "imitation_loss": imitation_info["loss"], }, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=1.0, steps=total_train_steps, ), max_stage_steps=total_train_steps, ), ], num_mini_batch=min( info["num_mini_batch"] for info in [ppo_info, imitation_info] ), update_repeats=min( info["update_repeats"] for info in [ppo_info, imitation_info] ), total_train_steps=total_train_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_local/dagger.py ================================================ from allenact.utils.experiment_utils import PipelineStage, LinearDecay from projects.babyai_baselines.experiments.go_to_local.base import ( BaseBabyAIGoToLocalExperimentConfig, ) class DaggerBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig): """Find goal in lighthouse env using imitation learning. Training with Dagger. """ USE_EXPERT = True @classmethod def tag(cls): return "BabyAIGoToLocalDagger" @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS loss_info = cls.rl_loss_default("imitation") return cls._training_pipeline( named_losses={"imitation_loss": loss_info["loss"]}, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=total_train_steps // 2, ), max_stage_steps=total_train_steps, ) ], num_mini_batch=loss_info["num_mini_batch"], update_repeats=loss_info["update_repeats"], total_train_steps=total_train_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_local/distributed_bc_offpolicy.py ================================================ import os from typing import Optional from typing import Sequence import torch from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage from allenact.utils.experiment_utils import ( PipelineStage, StageComponent, TrainingSettings, ) from allenact_plugins.babyai_plugin.babyai_constants import ( BABYAI_EXPERT_TRAJECTORIES_DIR, ) from allenact_plugins.minigrid_plugin.minigrid_offpolicy import ( MiniGridOffPolicyExpertCELoss, MiniGridExpertTrajectoryStorage, ) from projects.tutorials.minigrid_offpolicy_tutorial import ( BCOffPolicyBabyAIGoToLocalExperimentConfig, ) class DistributedBCOffPolicyBabyAIGoToLocalExperimentConfig( BCOffPolicyBabyAIGoToLocalExperimentConfig ): """Distributed Off policy imitation.""" @classmethod def tag(cls): return "DistributedBabyAIGoToLocalBCOffPolicy" @classmethod def machine_params( cls, mode="train", gpu_id="default", n_train_processes="default", **kwargs ): res = super().machine_params(mode, gpu_id, n_train_processes, **kwargs) if res["nprocesses"] > 0 and torch.cuda.is_available(): ngpu_to_use = min(torch.cuda.device_count(), 2) res["nprocesses"] = [res["nprocesses"] // ngpu_to_use] * ngpu_to_use res["gpu_ids"] = list(range(ngpu_to_use)) return res @classmethod def expert_ce_loss_kwargs_generator( cls, worker_id: int, rollouts_per_worker: Sequence[int], seed: Optional[int] ): return dict(num_workers=len(rollouts_per_worker), current_worker=worker_id) @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) num_mini_batch = ppo_info["num_mini_batch"] update_repeats = ppo_info["update_repeats"] return cls._training_pipeline( named_losses={ "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss( total_episodes_in_epoch=int(1e6) ), }, named_storages={ "onpolicy": RolloutBlockStorage(), "minigrid_offpolicy_expert": MiniGridExpertTrajectoryStorage( data_path=os.path.join( BABYAI_EXPERT_TRAJECTORIES_DIR, "BabyAI-GoToLocal-v0{}.pkl".format( "" if torch.cuda.is_available() else "-small" ), ), num_samplers=cls.NUM_TRAIN_SAMPLERS, rollout_len=cls.ROLLOUT_STEPS, instr_len=cls.INSTR_LEN, ), }, pipeline_stages=[ PipelineStage( loss_names=["offpolicy_expert_ce_loss"], max_stage_steps=total_train_steps, stage_components=[ StageComponent( uuid="offpolicy", storage_uuid="minigrid_offpolicy_expert", loss_names=["offpolicy_expert_ce_loss"], training_settings=TrainingSettings( update_repeats=num_mini_batch * update_repeats, num_mini_batch=1, ), ) ], ), ], num_mini_batch=0, update_repeats=0, total_train_steps=total_train_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_local/distributed_bc_teacher_forcing.py ================================================ import torch from .bc_teacher_forcing import BCTeacherForcingBabyAIGoToLocalExperimentConfig class DistributedBCTeacherForcingBabyAIGoToLocalExperimentConfig( BCTeacherForcingBabyAIGoToLocalExperimentConfig ): """Distributed behavior clone with teacher forcing.""" USE_EXPERT = True GPU_ID = 0 if torch.cuda.is_available() else None @classmethod def METRIC_ACCUMULATE_INTERVAL(cls): return 1 @classmethod def tag(cls): return "BabyAIGoToLocalBCTeacherForcingDistributed" @classmethod def machine_params( cls, mode="train", gpu_id="default", n_train_processes="default", **kwargs ): res = super().machine_params(mode, gpu_id, n_train_processes, **kwargs) if res["nprocesses"] > 0 and torch.cuda.is_available(): ngpu_to_use = min(torch.cuda.device_count(), 2) res["nprocesses"] = [res["nprocesses"] // ngpu_to_use] * ngpu_to_use res["gpu_ids"] = list(range(ngpu_to_use)) return res ================================================ FILE: projects/babyai_baselines/experiments/go_to_local/ppo.py ================================================ import torch from allenact.utils.experiment_utils import PipelineStage from projects.babyai_baselines.experiments.go_to_local.base import ( BaseBabyAIGoToLocalExperimentConfig, ) class PPOBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig): """PPO only.""" NUM_TRAIN_SAMPLERS: int = ( 128 * 12 if torch.cuda.is_available() else BaseBabyAIGoToLocalExperimentConfig.NUM_TRAIN_SAMPLERS ) ROLLOUT_STEPS: int = 32 USE_LR_DECAY = False DEFAULT_LR = 1e-4 @classmethod def tag(cls): return "BabyAIGoToLocalPPO" @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_RL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps) return cls._training_pipeline( named_losses={ "ppo_loss": ppo_info["loss"], }, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss"], max_stage_steps=total_train_steps, ), ], num_mini_batch=ppo_info["num_mini_batch"], update_repeats=ppo_info["update_repeats"], total_train_steps=total_train_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_obj/__init__.py ================================================ ================================================ FILE: projects/babyai_baselines/experiments/go_to_obj/a2c.py ================================================ from allenact.utils.experiment_utils import PipelineStage from projects.babyai_baselines.experiments.go_to_obj.base import ( BaseBabyAIGoToObjExperimentConfig, ) class A2CBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig): """A2C only.""" TOTAL_RL_TRAIN_STEPS = int(1e5) @classmethod def tag(cls): return "BabyAIGoToObjA2C" @classmethod def training_pipeline(cls, **kwargs): total_training_steps = cls.TOTAL_RL_TRAIN_STEPS a2c_info = cls.rl_loss_default("a2c", steps=total_training_steps) return cls._training_pipeline( named_losses={ "a2c_loss": a2c_info["loss"], }, pipeline_stages=[ PipelineStage( loss_names=["a2c_loss"], max_stage_steps=total_training_steps, ), ], num_mini_batch=a2c_info["num_mini_batch"], update_repeats=a2c_info["update_repeats"], total_train_steps=total_training_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_obj/base.py ================================================ from abc import ABC from typing import Dict, List, Optional, Union, cast import gym import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.base_abstractions.misc import Loss from allenact.base_abstractions.sensor import SensorSuite from allenact.utils.experiment_utils import ( Builder, LinearDecay, PipelineStage, TrainingPipeline, ) from allenact_plugins.babyai_plugin.babyai_models import BabyAIRecurrentACModel from allenact_plugins.babyai_plugin.babyai_tasks import BabyAITask from projects.babyai_baselines.experiments.base import BaseBabyAIExperimentConfig class BaseBabyAIGoToObjExperimentConfig(BaseBabyAIExperimentConfig, ABC): """Base experimental config.""" LEVEL: Optional[str] = "BabyAI-GoToObj-v0" TOTAL_RL_TRAIN_STEPS = int(5e4) TOTAL_IL_TRAIN_STEPS = int(2e4) ROLLOUT_STEPS: int = 32 NUM_TRAIN_SAMPLERS: int = 16 PPO_NUM_MINI_BATCH = 2 NUM_TEST_TASKS: int = 50 USE_LR_DECAY: bool = False DEFAULT_LR = 1e-3 ARCH = "cnn1" # ARCH = "cnn2" # ARCH = "expert_filmcnn" USE_INSTR = False INSTR_LEN: int = -1 @classmethod def METRIC_ACCUMULATE_INTERVAL(cls): return cls.NUM_TRAIN_SAMPLERS * 128 @classmethod def _training_pipeline( # type:ignore cls, named_losses: Dict[str, Union[Loss, Builder]], pipeline_stages: List[PipelineStage], num_mini_batch: int, update_repeats: int, total_train_steps: int, lr: Optional[float] = None, **kwargs, ): lr = cls.DEFAULT_LR num_steps = cls.ROLLOUT_STEPS metric_accumulate_interval = ( cls.METRIC_ACCUMULATE_INTERVAL() ) # Log every 10 max length tasks save_interval = 2**31 gamma = 0.99 use_gae = "reinforce_loss" not in named_losses gae_lambda = 0.99 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses=named_losses, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=None, should_log=cls.SHOULD_LOG, pipeline_stages=pipeline_stages, lr_scheduler_builder=( Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)} # type: ignore ) if cls.USE_LR_DECAY else None ), **kwargs, ) @classmethod def create_model(cls, **kwargs) -> nn.Module: sensors = cls.get_sensors() return BabyAIRecurrentACModel( action_space=gym.spaces.Discrete(len(BabyAITask.class_action_names())), observation_space=SensorSuite(sensors).observation_spaces, use_instr=cls.USE_INSTR, use_memory=True, arch=cls.ARCH, instr_dim=8, lang_model="gru", memory_dim=128, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_obj/bc.py ================================================ from allenact.utils.experiment_utils import PipelineStage from projects.babyai_baselines.experiments.go_to_obj.base import ( BaseBabyAIGoToObjExperimentConfig, ) class PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig): """Behavior clone then PPO.""" USE_EXPERT = True @classmethod def tag(cls): return "BabyAIGoToObjBC" @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) imitation_info = cls.rl_loss_default("imitation") return cls._training_pipeline( named_losses={ "imitation_loss": imitation_info["loss"], }, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], max_stage_steps=total_train_steps, ), ], num_mini_batch=min( info["num_mini_batch"] for info in [ppo_info, imitation_info] ), update_repeats=min( info["update_repeats"] for info in [ppo_info, imitation_info] ), total_train_steps=total_train_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.py ================================================ from allenact.utils.experiment_utils import PipelineStage, LinearDecay from projects.babyai_baselines.experiments.go_to_obj.base import ( BaseBabyAIGoToObjExperimentConfig, ) class PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig): """Behavior clone (with teacher forcing) then PPO.""" USE_EXPERT = True @classmethod def tag(cls): return "BabyAIGoToObjBCTeacherForcing" @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) imitation_info = cls.rl_loss_default("imitation") return cls._training_pipeline( named_losses={ "imitation_loss": imitation_info["loss"], }, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=1.0, steps=total_train_steps, ), max_stage_steps=total_train_steps, ), ], num_mini_batch=min( info["num_mini_batch"] for info in [ppo_info, imitation_info] ), update_repeats=min( info["update_repeats"] for info in [ppo_info, imitation_info] ), total_train_steps=total_train_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_obj/dagger.py ================================================ from allenact.utils.experiment_utils import PipelineStage, LinearDecay from projects.babyai_baselines.experiments.go_to_obj.base import ( BaseBabyAIGoToObjExperimentConfig, ) class DaggerBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig): """Find goal in lighthouse env using imitation learning. Training with Dagger. """ USE_EXPERT = True @classmethod def tag(cls): return "BabyAIGoToObjDagger" @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS loss_info = cls.rl_loss_default("imitation") return cls._training_pipeline( named_losses={"imitation_loss": loss_info["loss"]}, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=total_train_steps // 2, ), max_stage_steps=total_train_steps, ) ], num_mini_batch=loss_info["num_mini_batch"], update_repeats=loss_info["update_repeats"], total_train_steps=total_train_steps, ) ================================================ FILE: projects/babyai_baselines/experiments/go_to_obj/ppo.py ================================================ from allenact.utils.experiment_utils import PipelineStage from projects.babyai_baselines.experiments.go_to_obj.base import ( BaseBabyAIGoToObjExperimentConfig, ) class PPOBabyAIGoToObjExperimentConfig(BaseBabyAIGoToObjExperimentConfig): """PPO only.""" @classmethod def tag(cls): return "BabyAIGoToObjPPO" @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_RL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps) return cls._training_pipeline( named_losses={ "ppo_loss": ppo_info["loss"], }, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss"], max_stage_steps=total_train_steps, ), ], num_mini_batch=ppo_info["num_mini_batch"], update_repeats=ppo_info["update_repeats"], total_train_steps=total_train_steps, ) ================================================ FILE: projects/gym_baselines/README.md ================================================ # Baseline models Gym (for MuJoCo environments) This project contains the code for training baseline models for the tasks under the [MuJoCo](https://gym.openai.com/envs/#mujoco) group of Gym environments, included ["Ant-v2"](https://gym.openai.com/envs/Ant-v2/), ["HalfCheetah-v2"](https://gym.openai.com/envs/HalfCheetah-v2/), ["Hopper-v2"](https://gym.openai.com/envs/Hopper-v2/), ["Humanoid-v2"](https://gym.openai.com/envs/Humanoid-v2/), ["InvertedDoublePendulum-v2"](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["InvertedPendulum-v2"](https://gym.openai.com/envs/InvertedPendulum-v2/), [Reacher-v2](https://gym.openai.com/envs/InvertedDoublePendulum-v2/), ["Swimmer-v2"](https://gym.openai.com/envs/Swimmer-v2/), and [Walker2d-v2"](https://gym.openai.com/envs/Walker2d-v2/). Provided are experiment configs for training a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](https://allenact.org/api/allenact_plugins/gym_plugin/gym_models/#memorylessactorcritic), with a [Gaussian distribution](https://allenact.org/api/allenact_plugins/gym_plugin/gym_distributions/#gaussiandistr) to sample actions for all continuous-control environments under the `MuJoCo` group of `Gym` environments. The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf) Reinforcement Learning Algorithm. To train an experiment run the following command from the `allenact` root directory: ```bash python main.py -o ``` Where `` is the path of the directory where we want the model weights and logs to be stored and `` is the path to the python file containing the experiment configuration. An example usage of this command would be: ```bash python main.py projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py -o /YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo ``` This trains a lightweight implementation with separate MLPs for actors and critic with a Gaussian distribution to sample actions in the "Ant-v2" environment, and stores the model weights and logs to `/YOUR/DESIRED/MUJOCO/OUTPUT/SAVE/PATH/gym_mujoco_ant_ddppo`. ## Results In our experiments, the rewards for MuJoCo environments we obtained after training using PPO are similar to those reported by OpenAI Gym Baselines(1M steps). The Humanoid environment is compared with the original PPO paper where training 50M steps using PPO. Due to the time constraint, we only tested our baseline across two seeds so far. | Environment | Gym Baseline Reward | Ours Reward | | ----------- | ------------------- | ----------- | |[Ant-v2](https://gym.openai.com/envs/Ant-v2/)| 1083.2 |1098.6(reached 4719 in 25M steps) | | [HalfCheetah-v2](https://gym.openai.com/envs/HalfCheetah-v2/) | 1795.43 | 1741(reached 4019 in 18M steps) | |[Hopper-v2](https://gym.openai.com/envs/Hopper-v2/)|2316.16|2266| |[Humanoid-v2](https://gym.openai.com/envs/Humanoid-v2/)|4000+|4500+(reached 6500 in 70M steps)| | [InvertedPendulum-v2](https://gym.openai.com/envs/InvertedPendulum-v2/) | 809.43 | 1000 | |[Reacher-v2](https://gym.openai.com/envs/Reacher-v2/)|-6.71|-7.045| |[Swimmer-v2](https://gym.openai.com/envs/Swimmer-v2/)|111.19|124.7| |[Walker2d](https://gym.openai.com/envs/Walker2d-v2/)|3424.95|2723 in 10M steps| ================================================ FILE: projects/gym_baselines/__init__.py ================================================ ================================================ FILE: projects/gym_baselines/experiments/__init__.py ================================================ ================================================ FILE: projects/gym_baselines/experiments/gym_base.py ================================================ from abc import ABC from typing import Dict, Sequence, Optional, List, Any from allenact.base_abstractions.experiment_config import ExperimentConfig from allenact.base_abstractions.sensor import Sensor class GymBaseConfig(ExperimentConfig, ABC): SENSORS: Optional[Sequence[Sensor]] = None def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: raise NotImplementedError def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="train", seeds=seeds ) def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="valid", seeds=seeds ) def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds) ================================================ FILE: projects/gym_baselines/experiments/gym_humanoid_base.py ================================================ from abc import ABC from typing import Dict, Any from allenact.utils.viz_utils import VizSuite, AgentViewViz from projects.gym_baselines.experiments.gym_base import GymBaseConfig class GymHumanoidBaseConfig(GymBaseConfig, ABC): @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: visualizer = None if mode == "test": visualizer = VizSuite( mode=mode, video_viz=AgentViewViz( label="episode_vid", max_clip_length=400, vector_task_source=("render", {"mode": "rgb_array"}), fps=30, ), ) return { "nprocesses": 8 if mode == "train" else 1, # rollout "devices": [], "visualizer": visualizer, } ================================================ FILE: projects/gym_baselines/experiments/gym_humanoid_ddppo.py ================================================ from abc import ABC from typing import cast import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.ppo import PPO from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from projects.gym_baselines.experiments.gym_humanoid_base import GymHumanoidBaseConfig class GymHumanoidPPOConfig(GymHumanoidBaseConfig, ABC): @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: lr = 1e-4 ppo_steps = int(8e7) # convergence may be after 1e8 clip_param = 0.1 value_loss_coef = 0.5 entropy_coef = 0.0 num_mini_batch = 4 # optimal 64 update_repeats = 10 max_grad_norm = 0.5 num_steps = 2048 gamma = 0.99 use_gae = True gae_lambda = 0.95 advance_scene_rollout_period = None save_interval = 200000 metric_accumulate_interval = 50000 return TrainingPipeline( named_losses=dict( ppo_loss=PPO( clip_param=clip_param, value_loss_coef=value_loss_coef, entropy_coef=entropy_coef, ), ), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps), ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=advance_scene_rollout_period, save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, lr_scheduler_builder=Builder( LambdaLR, { "lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=1) }, # constant learning rate ), ) ================================================ FILE: projects/gym_baselines/experiments/gym_mujoco_base.py ================================================ from abc import ABC from typing import Dict, Any from allenact.utils.viz_utils import VizSuite, AgentViewViz from projects.gym_baselines.experiments.gym_base import GymBaseConfig class GymMoJoCoBaseConfig(GymBaseConfig, ABC): @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: visualizer = None if mode == "test": visualizer = VizSuite( mode=mode, video_viz=AgentViewViz( label="episode_vid", max_clip_length=400, vector_task_source=("render", {"mode": "rgb_array"}), fps=30, ), ) return { "nprocesses": 8 if mode == "train" else 1, # rollout "devices": [], "visualizer": visualizer, } ================================================ FILE: projects/gym_baselines/experiments/gym_mujoco_ddppo.py ================================================ from abc import ABC from typing import cast import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.ppo import PPO from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from projects.gym_baselines.experiments.gym_mujoco_base import GymMoJoCoBaseConfig class GymMuJoCoPPOConfig(GymMoJoCoBaseConfig, ABC): @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: lr = 3e-4 ppo_steps = int(3e7) clip_param = 0.2 value_loss_coef = 0.5 entropy_coef = 0.0 num_mini_batch = 4 # optimal 64 update_repeats = 10 max_grad_norm = 0.5 num_steps = 2048 gamma = 0.99 use_gae = True gae_lambda = 0.95 advance_scene_rollout_period = None save_interval = 200000 metric_accumulate_interval = 50000 return TrainingPipeline( named_losses=dict( ppo_loss=PPO( clip_param=clip_param, value_loss_coef=value_loss_coef, entropy_coef=entropy_coef, ), ), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps), ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=advance_scene_rollout_period, save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)}, ), ) ================================================ FILE: projects/gym_baselines/experiments/mujoco/__init__.py ================================================ ================================================ FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_ant_ddppo.py ================================================ from typing import Dict, List, Any import gym import torch.nn as nn from allenact.base_abstractions.experiment_config import TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig class GymMuJoCoAntConfig(GymMuJoCoPPOConfig): SENSORS = [ GymMuJoCoSensor(gym_env_name="Ant-v2", uuid="gym_mujoco_data"), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ action_space = gym.spaces.Box(-3.0, 3.0, (8,), "float32") return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=action_space, # specific action_space observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="Ant-v2", **kwargs) def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["Ant-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) @classmethod def tag(cls) -> str: return "Gym-MuJoCo-Ant-v2-PPO" ================================================ FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_halfcheetah_ddppo.py ================================================ from typing import Dict, List, Any import gym import torch.nn as nn from allenact.base_abstractions.experiment_config import TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig class GymMuJoCoHalfCheetahConfig(GymMuJoCoPPOConfig): SENSORS = [ GymMuJoCoSensor(gym_env_name="HalfCheetah-v2", uuid="gym_mujoco_data"), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ action_space = gym.spaces.Box(-1.0, 1.0, (6,), "float32") return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=action_space, # specific action_space observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="HalfCheetah-v2", **kwargs) def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["HalfCheetah-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) @classmethod def tag(cls) -> str: return "Gym-MuJoCo-HalfCheetah-v2-PPO" ================================================ FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_hopper_ddppo.py ================================================ from typing import Dict, List, Any import gym import torch.nn as nn from allenact.base_abstractions.experiment_config import TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig class GymMuJoCoHopperConfig(GymMuJoCoPPOConfig): SENSORS = [ GymMuJoCoSensor(gym_env_name="Hopper-v2", uuid="gym_mujoco_data"), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ action_space = gym.spaces.Box(-1.0, 1.0, (3,), "float32") return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=action_space, # specific action_space observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="Hopper-v2", **kwargs) def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["Hopper-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) @classmethod def tag(cls) -> str: return "Gym-MuJoCo-Hopper-v2-PPO" ================================================ FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_humanoid_ddppo.py ================================================ from typing import Dict, List, Any import gym import torch.nn as nn from allenact.base_abstractions.experiment_config import TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from projects.gym_baselines.experiments.gym_humanoid_ddppo import GymHumanoidPPOConfig class GymMuJoCoHumanoidConfig(GymHumanoidPPOConfig): SENSORS = [ GymMuJoCoSensor(gym_env_name="Humanoid-v2", uuid="gym_mujoco_data"), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ action_space = gym.spaces.Box( -0.4000000059604645, 0.4000000059604645, (17,), "float32" ) return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=action_space, # specific action_space observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="Humanoid-v2", **kwargs) def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["Humanoid-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) @classmethod def tag(cls) -> str: return "Gym-MuJoCo-Humanoid-v2-PPO" ================================================ FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_inverteddoublependulum_ddppo.py ================================================ from typing import Dict, List, Any import gym import torch.nn as nn from allenact.base_abstractions.experiment_config import TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig class GymMuJoInvertedDoublePendulumConfig(GymMuJoCoPPOConfig): SENSORS = [ GymMuJoCoSensor( gym_env_name="InvertedDoublePendulum-v2", uuid="gym_mujoco_data" ), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ action_space = gym.spaces.Box(-1.0, 1.0, (1,), "float32") return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=action_space, # specific action_space observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="InvertedDoublePendulum-v2", **kwargs) def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["InvertedDoublePendulum-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) @classmethod def tag(cls) -> str: return "Gym-MuJoCo-InvertedDoublePendulum-v2-PPO" ================================================ FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_invertedpendulum_ddppo.py ================================================ from typing import Dict, List, Any import gym import torch.nn as nn from allenact.base_abstractions.experiment_config import TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig class GymMuJoCoInvertedPendulumConfig(GymMuJoCoPPOConfig): SENSORS = [ GymMuJoCoSensor(gym_env_name="InvertedPendulum-v2", uuid="gym_mujoco_data"), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ action_space = gym.spaces.Box(-3.0, 3.0, (1,), "float32") return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=action_space, # specific action_space observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="InvertedPendulum-v2", **kwargs) def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["InvertedPendulum-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) @classmethod def tag(cls) -> str: return "Gym-MuJoCo-InvertedPendulum-v2-PPO" ================================================ FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_reacher_ddppo.py ================================================ from typing import Dict, List, Any import gym import torch.nn as nn from allenact.base_abstractions.experiment_config import TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig class GymMuJoCoReacherConfig(GymMuJoCoPPOConfig): SENSORS = [ GymMuJoCoSensor(gym_env_name="Reacher-v2", uuid="gym_mujoco_data"), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ action_space = gym.spaces.Box(-1.0, 1.0, (2,), "float32") return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=action_space, # specific action_space observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="Reacher-v2", **kwargs) def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["Reacher-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) @classmethod def tag(cls) -> str: return "Gym-MuJoCo-Reacher-v2-PPO" ================================================ FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_swimmer_ddppo.py ================================================ from typing import Dict, List, Any import gym import torch.nn as nn from allenact.base_abstractions.experiment_config import TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig class GymMuJoCoSwimmerConfig(GymMuJoCoPPOConfig): SENSORS = [ GymMuJoCoSensor(gym_env_name="Swimmer-v2", uuid="gym_mujoco_data"), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ action_space = gym.spaces.Box(-1.0, 1.0, (2,), "float32") return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=action_space, # specific action_space observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="Swimmer-v2", **kwargs) def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["Swimmer-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) @classmethod def tag(cls) -> str: return "Gym-MuJoCo-Swimmer-v2-PPO" ================================================ FILE: projects/gym_baselines/experiments/mujoco/gym_mujoco_walker2d_ddppo.py ================================================ from typing import Dict, List, Any import gym import torch.nn as nn from allenact.base_abstractions.experiment_config import TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from projects.gym_baselines.experiments.gym_mujoco_ddppo import GymMuJoCoPPOConfig class GymMuJoCoWalkerConfig(GymMuJoCoPPOConfig): SENSORS = [ GymMuJoCoSensor(gym_env_name="Walker2d-v2", uuid="gym_mujoco_data"), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ action_space = gym.spaces.Box(-1.0, 1.0, (6,), "float32") return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=action_space, # specific action_space observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="Walker2d-v2", **kwargs) def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["Walker2d-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) @classmethod def tag(cls) -> str: return "Gym-MuJoCo-Walker2d-v2-PPO" ================================================ FILE: projects/gym_baselines/models/__init__.py ================================================ ================================================ FILE: projects/gym_baselines/models/gym_models.py ================================================ """ Note: I add this file just for the format consistence with other baselines in the project, so it is just the same as `allenact_plugins.gym_models.py` so far. However, if it is in the Gym Robotics, some modification is need. For example, for `state_dim`: if input_uuid == 'gym_robotics_data': # consider that the observation space is Dict for robotics env state_dim = observation_space[self.input_uuid]['observation'].shape[0] else: assert len(observation_space[self.input_uuid].shape) == 1 state_dim = observation_space[self.input_uuid].shape[0] """ ================================================ FILE: projects/manipulathor_baselines/__init__.py ================================================ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/__init__.py ================================================ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/__init__.py ================================================ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_base.py ================================================ from abc import ABC from typing import Optional, Sequence, Union from allenact.base_abstractions.experiment_config import ExperimentConfig from allenact.base_abstractions.preprocessor import Preprocessor from allenact.base_abstractions.sensor import Sensor from allenact.utils.experiment_utils import Builder class ArmPointNavBaseConfig(ExperimentConfig, ABC): """The base object navigation configuration file.""" ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None SENSORS: Optional[Sequence[Sensor]] = None STEP_SIZE = 0.25 ROTATION_DEGREES = 45.0 VISIBILITY_DISTANCE = 1.0 STOCHASTIC = False CAMERA_WIDTH = 224 CAMERA_HEIGHT = 224 SCREEN_SIZE = 224 MAX_STEPS = 200 def __init__(self): self.REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "pickup_success_reward": 5.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, # we are not using this "failed_action_penalty": -0.03, } @classmethod def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return tuple() ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_ddppo.py ================================================ import torch.optim as optim from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, LinearDecay, ) from torch.optim.lr_scheduler import LambdaLR from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import ( ArmPointNavBaseConfig, ) class ArmPointNavMixInPPOConfig(ArmPointNavBaseConfig): def training_pipeline(self, **kwargs): ppo_steps = int(300000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = self.MAX_STEPS save_interval = 500000 # from 50k log_interval = 1000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_mixin_simplegru.py ================================================ from typing import Sequence, Union import gym import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import Builder from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import ( ArmPointNavBaseConfig, ) from projects.manipulathor_baselines.armpointnav_baselines.models.arm_pointnav_models import ( ArmPointNavBaselineActorCritic, ) class ArmPointNavMixInSimpleGRUConfig(ArmPointNavBaseConfig): TASK_SAMPLER: TaskSampler @classmethod def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: preprocessors = [] return preprocessors @classmethod def create_model(cls, **kwargs) -> nn.Module: return ArmPointNavBaselineActorCritic( action_space=gym.spaces.Discrete( len(cls.TASK_SAMPLER._TASK_TYPE.class_action_names()) ), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, hidden_size=512, ) ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.py ================================================ import platform from abc import ABC from math import ceil from typing import Dict, Any, List, Optional, Sequence import gym import numpy as np import torch from allenact.base_abstractions.experiment_config import MachineParams from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import evenly_distribute_count_into_bins from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import ( SimpleArmPointNavGeneralSampler, ) from allenact_plugins.manipulathor_plugin.manipulathor_viz import ( ImageVisualizer, TestMetricLogger, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_base import ( ArmPointNavBaseConfig, ) class ArmPointNavThorBaseConfig(ArmPointNavBaseConfig, ABC): """The base config for all iTHOR PointNav experiments.""" TASK_SAMPLER = SimpleArmPointNavGeneralSampler VISUALIZE = False if platform.system() == "Darwin": VISUALIZE = True NUM_PROCESSES: Optional[int] = None TRAIN_GPU_IDS = list(range(torch.cuda.device_count())) SAMPLER_GPU_IDS = TRAIN_GPU_IDS VALID_GPU_IDS = [torch.cuda.device_count() - 1] TEST_GPU_IDS = [torch.cuda.device_count() - 1] TRAIN_DATASET_DIR: Optional[str] = None VAL_DATASET_DIR: Optional[str] = None CAP_TRAINING = None TRAIN_SCENES: Optional[List[str]] = None VAL_SCENES: Optional[List[str]] = None TEST_SCENES: Optional[List[str]] = None OBJECT_TYPES: Optional[Sequence[str]] = None VALID_SAMPLES_IN_SCENE = 1 TEST_SAMPLES_IN_SCENE = 1 NUMBER_OF_TEST_PROCESS = 10 def __init__(self): super().__init__() assert ( self.CAMERA_WIDTH == 224 and self.CAMERA_HEIGHT == 224 and self.VISIBILITY_DISTANCE == 1 and self.STEP_SIZE == 0.25 ) self.ENV_ARGS = ENV_ARGS def machine_params(self, mode="train", **kwargs): sampler_devices: Sequence[int] = [] if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else self.TRAIN_GPU_IDS * workers_per_device ) nprocesses = ( 1 if not torch.cuda.is_available() else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) ) sampler_devices = self.SAMPLER_GPU_IDS elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS elif mode == "test": nprocesses = self.NUMBER_OF_TEST_PROCESS if torch.cuda.is_available() else 1 gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensors = [*self.SENSORS] if mode != "train": sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)] sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(sensors).observation_spaces, preprocessors=self.preprocessors(), ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sampler_devices=( sampler_devices if mode == "train" else gpu_ids ), # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: from datetime import datetime now = datetime.now() exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f") if cls.VISUALIZE: visualizers = [ ImageVisualizer(exp_name=exp_name_w_time), TestMetricLogger(exp_name=exp_name_w_time), ] kwargs["visualizers"] = visualizers kwargs["objects"] = cls.OBJECT_TYPES kwargs["exp_name"] = exp_name_w_time return cls.TASK_SAMPLER(**kwargs) @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes: List[str], process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "env_args": self.ENV_ARGS, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete( len(self.TASK_SAMPLER._TASK_TYPE.class_action_names()) ), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, } def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.TRAIN_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = "manual" res["sampler_mode"] = "train" res["cap_training"] = self.CAP_TRAINING res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None ) return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]], seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.VALID_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = self.VALID_SAMPLES_IN_SCENE res["sampler_mode"] = "val" res["cap_training"] = self.CAP_TRAINING res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None ) return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]], seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.TEST_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = self.TEST_SAMPLES_IN_SCENE res["sampler_mode"] = "test" res["env_args"] = {} res["cap_training"] = self.CAP_TRAINING res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None ) return res ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/__init__.py ================================================ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_depth.py ================================================ from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS from allenact_plugins.manipulathor_plugin.manipulathor_sensors import ( DepthSensorThor, RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor, PickedUpObjSensor, ) from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import ( ArmPointNavTaskSampler, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import ( ArmPointNavMixInPPOConfig, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import ( ArmPointNavMixInSimpleGRUConfig, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import ( ArmPointNaviThorBaseConfig, ) class ArmPointNavDepth( ArmPointNaviThorBaseConfig, ArmPointNavMixInPPOConfig, ArmPointNavMixInSimpleGRUConfig, ): """An Object Navigation experiment configuration in iThor with RGB input.""" SENSORS = [ DepthSensorThor( height=ArmPointNaviThorBaseConfig.SCREEN_SIZE, width=ArmPointNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), RelativeAgentArmToObjectSensor(), RelativeObjectToGoalSensor(), PickedUpObjSensor(), ] MAX_STEPS = 200 TASK_SAMPLER = ArmPointNavTaskSampler def __init__(self): super().__init__() assert ( self.CAMERA_WIDTH == 224 and self.CAMERA_HEIGHT == 224 and self.VISIBILITY_DISTANCE == 1 and self.STEP_SIZE == 0.25 ) self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": True} @classmethod def tag(cls): return cls.__name__ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_disjoint_depth.py ================================================ import gym import torch.nn as nn from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import ( ArmPointNavTaskSampler, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_depth import ( ArmPointNavDepth, ) from projects.manipulathor_baselines.armpointnav_baselines.models.disjoint_arm_pointnav_models import ( DisjointArmPointNavBaselineActorCritic, ) class ArmPointNavDisjointDepth(ArmPointNavDepth): """An Object Navigation experiment configuration in iThor with RGB input.""" TASK_SAMPLER = ArmPointNavTaskSampler def __init__(self): super().__init__() assert ( self.CAMERA_WIDTH == 224 and self.CAMERA_HEIGHT == 224 and self.VISIBILITY_DISTANCE == 1 and self.STEP_SIZE == 0.25 ) self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": True} @classmethod def create_model(cls, **kwargs) -> nn.Module: return DisjointArmPointNavBaselineActorCritic( action_space=gym.spaces.Discrete( len(cls.TASK_SAMPLER._TASK_TYPE.class_action_names()) ), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, hidden_size=512, ) @classmethod def tag(cls): return cls.__name__ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_ithor_base.py ================================================ from abc import ABC from allenact_plugins.manipulathor_plugin.armpointnav_constants import ( TRAIN_OBJECTS, TEST_OBJECTS, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_thor_base import ( ArmPointNavThorBaseConfig, ) class ArmPointNaviThorBaseConfig(ArmPointNavThorBaseConfig, ABC): """The base config for all iTHOR ObjectNav experiments.""" NUM_PROCESSES = 40 # add all the arguments here TOTAL_NUMBER_SCENES = 30 TRAIN_SCENES = [ "FloorPlan{}_physics".format(str(i)) for i in range(1, TOTAL_NUMBER_SCENES + 1) if (i % 3 == 1 or i % 3 == 0) and i != 28 ] # last scenes are really bad TEST_SCENES = [ "FloorPlan{}_physics".format(str(i)) for i in range(1, TOTAL_NUMBER_SCENES + 1) if i % 3 == 2 and i % 6 == 2 ] VALID_SCENES = [ "FloorPlan{}_physics".format(str(i)) for i in range(1, TOTAL_NUMBER_SCENES + 1) if i % 3 == 2 and i % 6 == 5 ] ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES assert ( len(ALL_SCENES) == TOTAL_NUMBER_SCENES - 1 and len(set(ALL_SCENES)) == TOTAL_NUMBER_SCENES - 1 ) OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS)) UNSEEN_OBJECT_TYPES = tuple(sorted(TEST_OBJECTS)) ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_no_vision.py ================================================ from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS from allenact_plugins.manipulathor_plugin.manipulathor_sensors import ( NoVisionSensorThor, RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor, PickedUpObjSensor, ) from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import ( ArmPointNavTaskSampler, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import ( ArmPointNavMixInPPOConfig, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import ( ArmPointNavMixInSimpleGRUConfig, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import ( ArmPointNaviThorBaseConfig, ) class ArmPointNavNoVision( ArmPointNaviThorBaseConfig, ArmPointNavMixInPPOConfig, ArmPointNavMixInSimpleGRUConfig, ): """An Object Navigation experiment configuration in iThor with RGB input.""" SENSORS = [ NoVisionSensorThor( height=ArmPointNaviThorBaseConfig.SCREEN_SIZE, width=ArmPointNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=False, uuid="rgb_lowres", ), RelativeAgentArmToObjectSensor(), RelativeObjectToGoalSensor(), PickedUpObjSensor(), ] MAX_STEPS = 200 TASK_SAMPLER = ArmPointNavTaskSampler # def __init__(self): super().__init__() assert ( self.CAMERA_WIDTH == 224 and self.CAMERA_HEIGHT == 224 and self.VISIBILITY_DISTANCE == 1 and self.STEP_SIZE == 0.25 ) self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": False} @classmethod def tag(cls): return cls.__name__ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgb.py ================================================ from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS from allenact_plugins.manipulathor_plugin.manipulathor_sensors import ( RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor, PickedUpObjSensor, ) from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import ( ArmPointNavTaskSampler, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import ( ArmPointNavMixInPPOConfig, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import ( ArmPointNavMixInSimpleGRUConfig, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import ( ArmPointNaviThorBaseConfig, ) class ArmPointNavRGB( ArmPointNaviThorBaseConfig, ArmPointNavMixInPPOConfig, ArmPointNavMixInSimpleGRUConfig, ): """An Object Navigation experiment configuration in iThor with RGB input.""" SENSORS = [ RGBSensorThor( height=ArmPointNaviThorBaseConfig.SCREEN_SIZE, width=ArmPointNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), RelativeAgentArmToObjectSensor(), RelativeObjectToGoalSensor(), PickedUpObjSensor(), ] MAX_STEPS = 200 TASK_SAMPLER = ArmPointNavTaskSampler # def __init__(self): super().__init__() assert ( self.CAMERA_WIDTH == 224 and self.CAMERA_HEIGHT == 224 and self.VISIBILITY_DISTANCE == 1 and self.STEP_SIZE == 0.25 ) self.ENV_ARGS = {**ENV_ARGS} @classmethod def tag(cls): return cls.__name__ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/experiments/ithor/armpointnav_rgbdepth.py ================================================ from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from allenact_plugins.manipulathor_plugin.manipulathor_constants import ENV_ARGS from allenact_plugins.manipulathor_plugin.manipulathor_sensors import ( DepthSensorThor, RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor, PickedUpObjSensor, ) from allenact_plugins.manipulathor_plugin.manipulathor_task_samplers import ( ArmPointNavTaskSampler, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_ddppo import ( ArmPointNavMixInPPOConfig, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.armpointnav_mixin_simplegru import ( ArmPointNavMixInSimpleGRUConfig, ) from projects.manipulathor_baselines.armpointnav_baselines.experiments.ithor.armpointnav_ithor_base import ( ArmPointNaviThorBaseConfig, ) class ArmPointNavRGBDepth( ArmPointNaviThorBaseConfig, ArmPointNavMixInPPOConfig, ArmPointNavMixInSimpleGRUConfig, ): """An Object Navigation experiment configuration in iThor with RGB input.""" SENSORS = [ DepthSensorThor( height=ArmPointNaviThorBaseConfig.SCREEN_SIZE, width=ArmPointNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), RGBSensorThor( height=ArmPointNaviThorBaseConfig.SCREEN_SIZE, width=ArmPointNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), RelativeAgentArmToObjectSensor(), RelativeObjectToGoalSensor(), PickedUpObjSensor(), ] MAX_STEPS = 200 TASK_SAMPLER = ArmPointNavTaskSampler # def __init__(self): super().__init__() assert ( self.CAMERA_WIDTH == 224 and self.CAMERA_HEIGHT == 224 and self.VISIBILITY_DISTANCE == 1 and self.STEP_SIZE == 0.25 ) self.ENV_ARGS = {**ENV_ARGS, "renderDepthImage": True} @classmethod def tag(cls): return cls.__name__ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/models/__init__.py ================================================ ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.py ================================================ """Baseline models for use in the Arm Point Navigation task. Arm Point Navigation is currently available as a Task in ManipulaTHOR. """ from typing import Tuple, Optional import gym import torch from gym.spaces.dict import Dict as SpaceDict from allenact.algorithms.onpolicy_sync.policy import ( ActorCriticModel, LinearCriticHead, LinearActorHead, DistributionType, Memory, ObservationType, ) from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput from allenact.embodiedai.models.basic_models import SimpleCNN, RNNStateEncoder from projects.manipulathor_baselines.armpointnav_baselines.models.manipulathor_net_utils import ( input_embedding_net, ) class ArmPointNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]): """Baseline recurrent actor critic model for armpointnav task. # Attributes action_space : The space of actions available to the agent. Currently only discrete actions are allowed (so this space will always be of type `gym.spaces.Discrete`). observation_space : The observation space expected by the agent. This observation space should include (optionally) 'rgb' images and 'depth' images. hidden_size : The hidden size of the GRU RNN. object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal object type. """ def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, hidden_size=512, obj_state_embedding_size=512, trainable_masked_hidden_state: bool = False, num_rnn_layers=1, rnn_type="GRU", ): """Initializer. See class documentation for parameter definitions. """ super().__init__(action_space=action_space, observation_space=observation_space) self._hidden_size = hidden_size self.object_type_embedding_size = obj_state_embedding_size sensor_names = self.observation_space.spaces.keys() self.visual_encoder = SimpleCNN( self.observation_space, self._hidden_size, rgb_uuid="rgb_lowres" if "rgb_lowres" in sensor_names else None, depth_uuid="depth_lowres" if "depth_lowres" in sensor_names else None, ) if "rgb_lowres" in sensor_names and "depth_lowres" in sensor_names: input_visual_feature_num = 2 elif "rgb_lowres" in sensor_names: input_visual_feature_num = 1 elif "depth_lowres" in sensor_names: input_visual_feature_num = 1 else: raise NotImplementedError self.state_encoder = RNNStateEncoder( self._hidden_size * input_visual_feature_num + obj_state_embedding_size, self._hidden_size, trainable_masked_hidden_state=trainable_masked_hidden_state, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor = LinearActorHead(self._hidden_size, action_space.n) self.critic = LinearCriticHead(self._hidden_size) relative_dist_embedding_size = torch.Tensor([3, 100, obj_state_embedding_size]) self.relative_dist_embedding = input_embedding_net( relative_dist_embedding_size.long().tolist(), dropout=0 ) self.train() @property def recurrent_hidden_state_size(self) -> int: """The recurrent hidden state size of the model.""" return self._hidden_size @property def num_recurrent_layers(self) -> int: """Number of recurrent hidden layers.""" return self.state_encoder.num_recurrent_layers def _recurrent_memory_specification(self): return dict( rnn=( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) ) def get_relative_distance_embedding( self, state_tensor: torch.Tensor ) -> torch.FloatTensor: return self.relative_dist_embedding(state_tensor) def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: """Processes input batched observations to produce new actor and critic values. Processes input batched observations (along with prior hidden states, previous actions, and masks denoting which recurrent hidden states should be masked) and returns an `ActorCriticOutput` object containing the model's policy (distribution over actions) and evaluation of the current state (value). # Parameters observations : Batched input observations. memory : `Memory` containing the hidden states from initial timepoints. prev_actions : Tensor of previous actions taken. masks : Masks applied to hidden states. See `RNNStateEncoder`. # Returns Tuple of the `ActorCriticOutput` and recurrent hidden state. """ arm2obj_dist = self.get_relative_distance_embedding( observations["relative_agent_arm_to_obj"] ) obj2goal_dist = self.get_relative_distance_embedding( observations["relative_obj_to_goal"] ) perception_embed = self.visual_encoder(observations) pickup_bool = observations["pickedup_object"] after_pickup = pickup_bool == 1 distances = arm2obj_dist distances[after_pickup] = obj2goal_dist[after_pickup] x = [distances, perception_embed] x_cat = torch.cat(x, dim=-1) x_out, rnn_hidden_states = self.state_encoder( x_cat, memory.tensor("rnn"), masks ) actor_out = self.actor(x_out) critic_out = self.critic(x_out) actor_critic_output = ActorCriticOutput( distributions=actor_out, values=critic_out, extras={} ) updated_memory = memory.set_tensor("rnn", rnn_hidden_states) return ( actor_critic_output, updated_memory, ) ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/models/base_models.py ================================================ import torch import torch.nn as nn class LinearActorHeadNoCategory(nn.Module): def __init__(self, num_inputs: int, num_outputs: int): super().__init__() self.linear = nn.Linear(num_inputs, num_outputs) nn.init.orthogonal_(self.linear.weight, gain=0.01) nn.init.constant_(self.linear.bias, 0) def forward(self, x: torch.FloatTensor): # type: ignore x = self.linear(x) # type:ignore assert len(x.shape) == 3 return x ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.py ================================================ """Baseline models for use in the Arm Point Navigation task. Arm Point Navigation is currently available as a Task in ManipulaTHOR. """ from typing import Tuple, Optional import gym import torch from gym.spaces.dict import Dict as SpaceDict from allenact.algorithms.onpolicy_sync.policy import ( ActorCriticModel, LinearCriticHead, DistributionType, Memory, ObservationType, ) from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ActorCriticOutput from allenact.embodiedai.models.basic_models import SimpleCNN, RNNStateEncoder from projects.manipulathor_baselines.armpointnav_baselines.models.base_models import ( LinearActorHeadNoCategory, ) from projects.manipulathor_baselines.armpointnav_baselines.models.manipulathor_net_utils import ( input_embedding_net, ) class DisjointArmPointNavBaselineActorCritic(ActorCriticModel[CategoricalDistr]): """Disjoint Baseline recurrent actor critic model for armpointnav. # Attributes action_space : The space of actions available to the agent. Currently only discrete actions are allowed (so this space will always be of type `gym.spaces.Discrete`). observation_space : The observation space expected by the agent. This observation space should include (optionally) 'rgb' images and 'depth' images and is required to have a component corresponding to the goal `goal_sensor_uuid`. goal_sensor_uuid : The uuid of the sensor of the goal object. See `GoalObjectTypeThorSensor` as an example of such a sensor. hidden_size : The hidden size of the GRU RNN. object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal object type. """ def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, hidden_size=512, obj_state_embedding_size=512, trainable_masked_hidden_state: bool = False, num_rnn_layers=1, rnn_type="GRU", ): """Initializer. See class documentation for parameter definitions. """ super().__init__(action_space=action_space, observation_space=observation_space) self._hidden_size = hidden_size self.object_type_embedding_size = obj_state_embedding_size self.visual_encoder_pick = SimpleCNN( self.observation_space, self._hidden_size, rgb_uuid=None, depth_uuid="depth_lowres", ) self.visual_encoder_drop = SimpleCNN( self.observation_space, self._hidden_size, rgb_uuid=None, depth_uuid="depth_lowres", ) self.state_encoder = RNNStateEncoder( self._hidden_size + obj_state_embedding_size, self._hidden_size, trainable_masked_hidden_state=trainable_masked_hidden_state, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor_pick = LinearActorHeadNoCategory(self._hidden_size, action_space.n) self.critic_pick = LinearCriticHead(self._hidden_size) self.actor_drop = LinearActorHeadNoCategory(self._hidden_size, action_space.n) self.critic_drop = LinearCriticHead(self._hidden_size) # self.object_state_embedding = nn.Embedding(num_embeddings=6, embedding_dim=obj_state_embedding_size) relative_dist_embedding_size = torch.Tensor([3, 100, obj_state_embedding_size]) self.relative_dist_embedding_pick = input_embedding_net( relative_dist_embedding_size.long().tolist(), dropout=0 ) self.relative_dist_embedding_drop = input_embedding_net( relative_dist_embedding_size.long().tolist(), dropout=0 ) self.train() @property def recurrent_hidden_state_size(self) -> int: """The recurrent hidden state size of the model.""" return self._hidden_size @property def num_recurrent_layers(self) -> int: """Number of recurrent hidden layers.""" return self.state_encoder.num_recurrent_layers def _recurrent_memory_specification(self): return dict( rnn=( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) ) def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: """Processes input batched observations to produce new actor and critic values. Processes input batched observations (along with prior hidden states, previous actions, and masks denoting which recurrent hidden states should be masked) and returns an `ActorCriticOutput` object containing the model's policy (distribution over actions) and evaluation of the current state (value). # Parameters observations : Batched input observations. memory : `Memory` containing the hidden states from initial timepoints. prev_actions : Tensor of previous actions taken. masks : Masks applied to hidden states. See `RNNStateEncoder`. # Returns Tuple of the `ActorCriticOutput` and recurrent hidden state. """ arm2obj_dist = self.relative_dist_embedding_pick( observations["relative_agent_arm_to_obj"] ) obj2goal_dist = self.relative_dist_embedding_drop( observations["relative_obj_to_goal"] ) perception_embed_pick = self.visual_encoder_pick(observations) perception_embed_drop = self.visual_encoder_drop(observations) pickup_bool = observations["pickedup_object"] after_pickup = pickup_bool == 1 distances = arm2obj_dist distances[after_pickup] = obj2goal_dist[after_pickup] perception_embed = perception_embed_pick perception_embed[after_pickup] = perception_embed_drop[after_pickup] x = [distances, perception_embed] x_cat = torch.cat(x, dim=-1) # type: ignore x_out, rnn_hidden_states = self.state_encoder( x_cat, memory.tensor("rnn"), masks ) actor_out_pick = self.actor_pick(x_out) critic_out_pick = self.critic_pick(x_out) actor_out_drop = self.actor_drop(x_out) critic_out_drop = self.critic_drop(x_out) actor_out = actor_out_pick actor_out[after_pickup] = actor_out_drop[after_pickup] critic_out = critic_out_pick critic_out[after_pickup] = critic_out_drop[after_pickup] actor_out = CategoricalDistr(logits=actor_out) actor_critic_output = ActorCriticOutput( distributions=actor_out, values=critic_out, extras={} ) updated_memory = memory.set_tensor("rnn", rnn_hidden_states) return ( actor_critic_output, updated_memory, ) ================================================ FILE: projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.py ================================================ import pdb import torch.nn as nn import torch.nn.functional as F def upshuffle( in_planes, out_planes, upscale_factor, kernel_size=3, stride=1, padding=1 ): return nn.Sequential( nn.Conv2d( in_planes, out_planes * upscale_factor**2, kernel_size=kernel_size, stride=stride, padding=padding, ), nn.PixelShuffle(upscale_factor), nn.LeakyReLU(), ) def upshufflenorelu( in_planes, out_planes, upscale_factor, kernel_size=3, stride=1, padding=1 ): return nn.Sequential( nn.Conv2d( in_planes, out_planes * upscale_factor**2, kernel_size=kernel_size, stride=stride, padding=padding, ), nn.PixelShuffle(upscale_factor), ) def combine_block_w_bn(in_planes, out_planes): return nn.Sequential( nn.Conv2d(in_planes, out_planes, 1, 1), nn.BatchNorm2d(out_planes), nn.LeakyReLU(), ) def conv2d_block(in_planes, out_planes, kernel_size, stride=1, padding=1): return nn.Sequential( nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride, padding=padding), nn.BatchNorm2d(out_planes), nn.LeakyReLU(), nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(out_planes), ) def combine_block_w_do(in_planes, out_planes, dropout=0.0): return nn.Sequential( nn.Conv2d(in_planes, out_planes, 1, 1), nn.LeakyReLU(), nn.Dropout(dropout), ) def combine_block_no_do(in_planes, out_planes): return nn.Sequential( nn.Conv2d(in_planes, out_planes, 1, 1), nn.LeakyReLU(), ) def linear_block(in_features, out_features, dropout=0.0): return nn.Sequential( nn.Linear(in_features, out_features), nn.LeakyReLU(), nn.Dropout(dropout), ) def linear_block_norelu(in_features, out_features): return nn.Sequential( nn.Linear(in_features, out_features), ) def input_embedding_net(list_of_feature_sizes, dropout=0.0): modules = [] for i in range(len(list_of_feature_sizes) - 1): input_size, output_size = list_of_feature_sizes[i : i + 2] if i + 2 == len(list_of_feature_sizes): modules.append(linear_block_norelu(input_size, output_size)) else: modules.append(linear_block(input_size, output_size, dropout=dropout)) return nn.Sequential(*modules) def _upsample_add(x, y): _, _, H, W = y.size() return F.upsample(x, size=(H, W), mode="bilinear") + y def replace_all_relu_w_leakyrelu(model): pdb.set_trace() print("Not sure if using this is a good idea") modules = model._modules for m in modules.keys(): module = modules[m] if isinstance(module, nn.ReLU): model._modules[m] = nn.LeakyReLU() elif isinstance(module, nn.Module): model._modules[m] = replace_all_relu_w_leakyrelu(module) return model def replace_all_leakyrelu_w_relu(model): modules = model._modules for m in modules.keys(): module = modules[m] if isinstance(module, nn.LeakyReLU): model._modules[m] = nn.ReLU() elif isinstance(module, nn.Module): model._modules[m] = replace_all_leakyrelu_w_relu(module) return model def replace_all_bn_w_groupnorm(model): pdb.set_trace() print("Not sure if using this is a good idea") modules = model._modules for m in modules.keys(): module = modules[m] if isinstance(module, nn.BatchNorm2d) or isinstance(module, nn.BatchNorm1d): feature_number = module.num_features model._modules[m] = nn.GroupNorm(32, feature_number) elif isinstance(module, nn.BatchNorm3d): raise Exception("Not implemented") elif isinstance(module, nn.Module): model._modules[m] = replace_all_bn_w_groupnorm(module) return model def flat_temporal(tensor, batch_size, sequence_length): tensor_shape = [s for s in tensor.shape] assert tensor_shape[0] == batch_size and tensor_shape[1] == sequence_length result_shape = [batch_size * sequence_length] + tensor_shape[2:] return tensor.contiguous().view(result_shape) def unflat_temporal(tensor, batch_size, sequence_length): tensor_shape = [s for s in tensor.shape] assert tensor_shape[0] == batch_size * sequence_length result_shape = [batch_size, sequence_length] + tensor_shape[1:] return tensor.contiguous().view(result_shape) ================================================ FILE: projects/objectnav_baselines/README.md ================================================ # Baseline models ObjectNav (for RoboTHOR/iTHOR) This project contains the code for training baseline models for the ObjectNav task. In ObjectNav, the agent spawns at a location in an environment and is tasked to explore the environment until it finds an object of a certain type (such as TV or Basketball). Once the agent is confident that it has the object within sight it executes the `END` action which terminates the episode. If the agent is within a set distance to the target (in our case 1.0 meters) and the target is visible within its observation frame the agent succeeded, otherwise it failed. Provided are experiment configs for training a simple convolutional model with an GRU using `RGB`, `Depth` or `RGB-D` (i.e. `RGB+Depth`) as inputs in [RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/). The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf) Reinforcement Learning Algorithm. For the RoboTHOR environment we also have and experiment (`objectnav_robothor_rgb_resnetgru_dagger.py`) showing how a model can be trained using DAgger, a form of imitation learning. To train an experiment run the following command from the `allenact` root directory: ```bash python main.py -o -c ``` Where `` is the path of the directory where we want the model weights and logs to be stored and `` is the path to the python file containing the experiment configuration. An example usage of this command would be: ```bash python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet_ddppo.py -o storage/objectnav-robothor-rgb ``` This trains a simple convolutional neural network with a GRU using RGB input passed through a pretrained ResNet-18 visual encoder on the PointNav task in the RoboTHOR environment and stores the model weights and logs to `storage/pointnav-robothor-rgb`. ## RoboTHOR ObjectNav 2021 Challenge The experiment configs found under the `projects/objectnav_baselines/experiments/robothor` directory are designed to conform to the requirements of the [RoboTHOR ObjectNav 2021 Challenge](https://ai2thor.allenai.org/robothor/cvpr-2021-challenge). ### Training a baseline To train a baseline ResNet->GRU model taking RGB-D inputs, run the following command ```bash python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet_ddppo.py -o storage/objectnav-robothor-rgbd ``` By default, when using a machine with a GPU, the above experiment will attempt to train using 60 parallel processes across all available GPUs. See the `TRAIN_GPU_IDS` constant in `experiments/objectnav_thor_base.py` and the `NUM_PROCESSES` constant in `experiments/robothor/objectnav_robothor_base.py` if you'd like to change which GPUs are used or how many processes are run respectively. ### Downloading our pretrained model checkpoint We provide a pretrained model obtained allowing the above command to run for all 300M training steps and then selecting the model checkpoint with best validation-set performance (for us occuring at ~170M training steps). You can download this model checkpoint by running ```bash bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-objectnav-challenge-2021 ``` from the top-level directory. This will download the pretrained model weights and save them at the path ```bash pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt ``` ### Running inference on the pretrained model You can run inference on the above pretrained model (on the test dataset) by running ```bash export SAVED_MODEL_PATH=pretrained_model_ckpts/robothor-objectnav-challenge-2021/Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO/2021-02-09_22-35-15/exp_Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO_0.2.0a_300M__stage_00__steps_000170207237.pt python main.py projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnetgru_ddppo.py -c $SAVED_MODEL_PATH --eval ``` To discourage "cheating", the test dataset has been scrubbed of the information needed to actually compute the success rate / SPL of your model and so running the above will only save the trajectories your models take. To evaluate these trajectories you will have to submit them to our leaderboard, see [here for more details](https://github.com/allenai/robothor-challenge/). If you'd like to get a sense of if your model is doing well before submitting to the leaderboard, you can obtain the success rate / SPL of it on our validation dataset. To do this, you can simply comment-out the line ```python TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/test") ``` within the `projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py` file and rerun the above `python main.py ...` command (when the test dataset is not given, the code defaults to using the validation set). ================================================ FILE: projects/objectnav_baselines/__init__.py ================================================ ================================================ FILE: projects/objectnav_baselines/experiments/__init__.py ================================================ ================================================ FILE: projects/objectnav_baselines/experiments/clip/__init__.py ================================================ ================================================ FILE: projects/objectnav_baselines/experiments/clip/mixins.py ================================================ from typing import Sequence, Union, Type, Tuple, Optional, Dict, Any import attr import gym import numpy as np import torch import torch.nn as nn from allenact.base_abstractions.distributions import CategoricalDistr from allenact.base_abstractions.misc import ( ObservationType, Memory, ActorCriticOutput, DistributionType, ) from allenact.base_abstractions.preprocessor import Preprocessor from allenact.base_abstractions.sensor import Sensor from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.experiment_utils import Builder from allenact.utils.misc_utils import prepare_locals_for_super from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.navigation_plugin.objectnav.models import ( ResnetTensorNavActorCritic, ) class LookDownFirstResnetTensorNavActorCritic(ResnetTensorNavActorCritic): def __init__(self, look_down_action_index: int, **kwargs): super().__init__(**kwargs) self.look_down_action_index = look_down_action_index self.register_buffer( "look_down_delta", torch.zeros(1, 1, self.action_space.n), persistent=False ) self.look_down_delta[0, 0, self.look_down_action_index] = 99999 def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: ac_out, memory = super(LookDownFirstResnetTensorNavActorCritic, self).forward( **prepare_locals_for_super(locals()) ) logits = ac_out.distributions.logits * masks + self.look_down_delta * ( 1 - masks ) ac_out = ActorCriticOutput( distributions=CategoricalDistr(logits=logits), values=ac_out.values, extras=ac_out.extras, ) return ac_out, memory @attr.s(kw_only=True) class ClipResNetPreprocessGRUActorCriticMixin: sensors: Sequence[Sensor] = attr.ib() clip_model_type: str = attr.ib() screen_size: int = attr.ib() goal_sensor_type: Type[Optional[Sensor]] = attr.ib() pool: bool = attr.ib(default=False) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: preprocessors = [] rgb_sensor = next((s for s in self.sensors if isinstance(s, RGBSensor)), None) assert ( np.linalg.norm( np.array(rgb_sensor._norm_means) - np.array(ClipResNetPreprocessor.CLIP_RGB_MEANS) ) < 1e-5 ) assert ( np.linalg.norm( np.array(rgb_sensor._norm_sds) - np.array(ClipResNetPreprocessor.CLIP_RGB_STDS) ) < 1e-5 ) if rgb_sensor is not None: preprocessors.append( ClipResNetPreprocessor( rgb_input_uuid=rgb_sensor.uuid, clip_model_type=self.clip_model_type, pool=self.pool, output_uuid="rgb_clip_resnet", input_img_height_width=(rgb_sensor.height, rgb_sensor.width), ) ) depth_sensor = next( (s for s in self.sensors if isinstance(s, DepthSensor)), None ) if depth_sensor is not None: preprocessors.append( ClipResNetPreprocessor( rgb_input_uuid=depth_sensor.uuid, clip_model_type=self.clip_model_type, pool=self.pool, output_uuid="depth_clip_resnet", input_img_height_width=(depth_sensor.height, depth_sensor.width), ) ) return preprocessors def create_model( self, num_actions: int, add_prev_actions: bool, look_down_first: bool = False, look_down_action_index: Optional[int] = None, hidden_size: int = 512, rnn_type="GRU", model_kwargs: Optional[Dict[str, Any]] = None, **kwargs ) -> nn.Module: has_rgb = any(isinstance(s, RGBSensor) for s in self.sensors) has_depth = any(isinstance(s, DepthSensor) for s in self.sensors) goal_sensor_uuid = next( (s.uuid for s in self.sensors if isinstance(s, self.goal_sensor_type)), None, ) if model_kwargs is None: model_kwargs = {} model_kwargs = dict( action_space=gym.spaces.Discrete(num_actions), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid=goal_sensor_uuid, rgb_resnet_preprocessor_uuid="rgb_clip_resnet" if has_rgb else None, depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, hidden_size=hidden_size, goal_dims=32, add_prev_actions=add_prev_actions, rnn_type=rnn_type, **model_kwargs ) if not look_down_first: return ResnetTensorNavActorCritic(**model_kwargs) else: return LookDownFirstResnetTensorNavActorCritic( look_down_action_index=look_down_action_index, **model_kwargs ) ================================================ FILE: projects/objectnav_baselines/experiments/habitat/__init__.py ================================================ ================================================ FILE: projects/objectnav_baselines/experiments/habitat/clip/__init__.py ================================================ ================================================ FILE: projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from torch.distributions.utils import lazy_property from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.habitat_plugin.habitat_sensors import ( RGBSensorHabitat, TargetObjectSensorHabitat, ) from projects.objectnav_baselines.experiments.clip.mixins import ( ClipResNetPreprocessGRUActorCriticMixin, ) from projects.objectnav_baselines.experiments.habitat.objectnav_habitat_base import ( ObjectNavHabitatBaseConfig, ) from projects.objectnav_baselines.mixins import ObjectNavPPOMixin class ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig( ObjectNavHabitatBaseConfig ): """An Object Navigation experiment configuration in Habitat.""" CLIP_MODEL_TYPE = "RN50" def __init__(self, lr: float, **kwargs): super().__init__(**kwargs) self.lr = lr self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, clip_model_type=self.CLIP_MODEL_TYPE, screen_size=self.SCREEN_SIZE, goal_sensor_type=TargetObjectSensorHabitat, ) @lazy_property def SENSORS(self): return [ RGBSensorHabitat( height=ObjectNavHabitatBaseConfig.SCREEN_SIZE, width=ObjectNavHabitatBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, ), TargetObjectSensorHabitat(len(self.DEFAULT_OBJECT_CATEGORIES_TO_IND)), ] def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( lr=self.lr, auxiliary_uuids=self.auxiliary_uuids, multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, add_prev_actions=self.add_prev_actions, auxiliary_uuids=self.auxiliary_uuids, **kwargs, ) def tag(self): return ( f"{super(ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig, self).tag()}" f"-RGB-ClipResNet50GRU-DDPPO-lr{self.lr}" ) ================================================ FILE: projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py ================================================ import torch import torch.optim as optim from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.utils.experiment_utils import ( Builder, TrainingPipeline, PipelineStage, TrainingSettings, ) from projects.objectnav_baselines.experiments.habitat.clip.objectnav_habitat_rgb_clipresnet50gru_ddppo import ( ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig, ) from projects.objectnav_baselines.mixins import update_with_auxiliary_losses class ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig( ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig ): def __init__(self, lr=1e-4, **kwargs): super().__init__(lr, **kwargs) self.lr = lr def training_pipeline(self, **kwargs) -> TrainingPipeline: auxiliary_uuids = self.auxiliary_uuids multiple_beliefs = False normalize_advantage = False advance_scene_rollout_period = self.ADVANCE_SCENE_ROLLOUT_PERIOD log_interval_small = ( self.num_train_processes * 32 * 10 if torch.cuda.is_available() else 1 ) log_interval_med = ( self.num_train_processes * 64 * 5 if torch.cuda.is_available() else 1 ) log_interval_large = ( self.num_train_processes * 128 * 5 if torch.cuda.is_available() else 1 ) batch_steps_0 = int(10e6) batch_steps_1 = int(10e6) batch_steps_2 = int(1e9) - batch_steps_0 - batch_steps_1 lr = self.lr num_mini_batch = 1 update_repeats = 4 save_interval = 5000000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 named_losses = { "ppo_loss": (PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0) } named_losses = update_with_auxiliary_losses( named_losses=named_losses, auxiliary_uuids=auxiliary_uuids, multiple_beliefs=multiple_beliefs, ) return TrainingPipeline( save_interval=save_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, named_losses={key: val[0] for key, val in named_losses.items()}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=advance_scene_rollout_period, pipeline_stages=[ PipelineStage( loss_names=list(named_losses.keys()), max_stage_steps=batch_steps_0, training_settings=TrainingSettings( num_steps=32, metric_accumulate_interval=log_interval_small ), ), PipelineStage( loss_names=list(named_losses.keys()), max_stage_steps=batch_steps_1, training_settings=TrainingSettings( num_steps=64, metric_accumulate_interval=log_interval_med, ), ), PipelineStage( loss_names=list(named_losses.keys()), max_stage_steps=batch_steps_2, training_settings=TrainingSettings( num_steps=128, metric_accumulate_interval=log_interval_large, ), ), ], lr_scheduler_builder=None, ) def tag(self): return ( super( ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig, self ) .tag() .replace("-DDPPO-lr", "-DDPPO-IncRollouts-lr") ) ================================================ FILE: projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py ================================================ import glob import math import os import warnings from abc import ABC from typing import Dict, Any, List, Optional, Sequence, Union, Tuple import gym import numpy as np import torch from torch.distributions.utils import lazy_property # noinspection PyUnresolvedReferences import habitat from allenact.base_abstractions.experiment_config import MachineParams from allenact.base_abstractions.preprocessor import ( SensorPreprocessorGraph, Preprocessor, ) from allenact.base_abstractions.sensor import SensorSuite from allenact.base_abstractions.task import TaskSampler from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.experiment_utils import evenly_distribute_count_into_bins, Builder from allenact.utils.system import get_logger from allenact_plugins.habitat_plugin.habitat_constants import ( HABITAT_DATASETS_DIR, HABITAT_CONFIGS_DIR, HABITAT_SCENE_DATASETS_DIR, ) from allenact_plugins.habitat_plugin.habitat_task_samplers import ObjectNavTaskSampler from allenact_plugins.habitat_plugin.habitat_tasks import ObjectNavTask from allenact_plugins.habitat_plugin.habitat_utils import ( get_habitat_config, construct_env_configs, ) from projects.objectnav_baselines.experiments.objectnav_base import ObjectNavBaseConfig def create_objectnav_config( config_yaml_path: str, mode: str, scenes_path: str, simulator_gpu_ids: Sequence[int], rotation_degrees: float, step_size: float, max_steps: int, num_processes: int, camera_width: int, camera_height: int, using_rgb: bool, using_depth: bool, training: bool, num_episode_sample: int, horizontal_fov: Optional[int] = None, ) -> habitat.Config: config = get_habitat_config(config_yaml_path) config.defrost() config.NUM_PROCESSES = num_processes config.SIMULATOR_GPU_IDS = simulator_gpu_ids config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR config.DATASET.DATA_PATH = scenes_path config.SIMULATOR.AGENT_0.SENSORS = [] if using_rgb: config.SIMULATOR.AGENT_0.SENSORS.append("RGB_SENSOR") if using_depth: config.SIMULATOR.AGENT_0.SENSORS.append("DEPTH_SENSOR") config.SIMULATOR.RGB_SENSOR.WIDTH = camera_width config.SIMULATOR.RGB_SENSOR.HEIGHT = camera_height config.SIMULATOR.DEPTH_SENSOR.WIDTH = camera_width config.SIMULATOR.DEPTH_SENSOR.HEIGHT = camera_height config.SIMULATOR.SEMANTIC_SENSOR.WIDTH = camera_width config.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = camera_height if horizontal_fov is not None: config.SIMULATOR.RGB_SENSOR.HFOV = horizontal_fov config.SIMULATOR.DEPTH_SENSOR.HFOV = horizontal_fov config.SIMULATOR.SEMANTIC_SENSOR.HFOV = horizontal_fov assert rotation_degrees == config.SIMULATOR.TURN_ANGLE assert step_size == config.SIMULATOR.FORWARD_STEP_SIZE assert max_steps == config.ENVIRONMENT.MAX_EPISODE_STEPS config.SIMULATOR.MAX_EPISODE_STEPS = max_steps assert config.TASK.TYPE == "ObjectNav-v1" assert config.TASK.SUCCESS.SUCCESS_DISTANCE == 0.1 assert config.TASK.DISTANCE_TO_GOAL.DISTANCE_TO == "VIEW_POINTS" config.TASK.SENSORS = ["OBJECTGOAL_SENSOR", "COMPASS_SENSOR", "GPS_SENSOR"] config.TASK.GOAL_SENSOR_UUID = "objectgoal" config.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL", "SOFT_SPL"] if not training: config.SEED = 0 config.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False if num_episode_sample > 0: config.ENVIRONMENT.ITERATOR_OPTIONS.NUM_EPISODE_SAMPLE = num_episode_sample config.MODE = mode config.freeze() return config class ObjectNavHabitatBaseConfig(ObjectNavBaseConfig, ABC): """The base config for all Habitat ObjectNav experiments.""" # selected auxiliary uuids ## if comment all the keys, then it's vanilla DD-PPO _AUXILIARY_UUIDS = [ # InverseDynamicsLoss.UUID, # TemporalDistanceLoss.UUID, # CPCA1Loss.UUID, # CPCA4Loss.UUID, # CPCA8Loss.UUID, # CPCA16Loss.UUID, ] MULTIPLE_BELIEFS = False BELIEF_FUSION = ( # choose one None # AttentiveFusion # AverageFusion # SoftmaxFusion ) FAILED_END_REWARD = -1.0 ACTION_SPACE = gym.spaces.Discrete(len(ObjectNavTask.class_action_names())) DEFAULT_NUM_TRAIN_PROCESSES = ( 5 * torch.cuda.device_count() if torch.cuda.is_available() else 1 ) DEFAULT_NUM_TEST_PROCESSES = 11 DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count())) DEFAULT_VALID_GPU_IDS = [torch.cuda.device_count() - 1] DEFAULT_TEST_GPU_IDS = tuple(range(torch.cuda.device_count())) def __init__( self, scene_dataset: str, # Should be "mp3d" or "hm3d" debug: bool = False, num_train_processes: Optional[int] = None, num_test_processes: Optional[int] = None, test_on_validation: bool = False, run_valid: bool = True, train_gpu_ids: Optional[Sequence[int]] = None, val_gpu_ids: Optional[Sequence[int]] = None, test_gpu_ids: Optional[Sequence[int]] = None, add_prev_actions: bool = False, look_constraints: Optional[Tuple[int, int]] = None, **kwargs, ): super().__init__(**kwargs) self.scene_dataset = scene_dataset self.debug = debug assert look_constraints is None or all( lc in [0, 1, 2, 3] for lc in look_constraints ), "Look constraints limit the number of times agents can look up/down when starting from the horizon line." assert ( look_constraints is None or look_constraints[1] > 0 ), "The agent must be allowed to look down from the horizon at least once." self.look_constraints = look_constraints def v_or_default(v, default): return v if v is not None else default self.num_train_processes = v_or_default( num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES ) self.num_test_processes = v_or_default( num_test_processes, (10 if torch.cuda.is_available() else 1) ) self.test_on_validation = test_on_validation self.run_valid = run_valid self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS) self.val_gpu_ids = v_or_default( val_gpu_ids, self.DEFAULT_VALID_GPU_IDS if run_valid else [] ) self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS) self.add_prev_actions = add_prev_actions self.auxiliary_uuids = self._AUXILIARY_UUIDS def _create_config( self, mode: str, scenes_path: str, num_processes: int, simulator_gpu_ids: Sequence[int], training: bool = True, num_episode_sample: int = -1, ): return create_objectnav_config( config_yaml_path=self.BASE_CONFIG_YAML_PATH, mode=mode, scenes_path=scenes_path, simulator_gpu_ids=simulator_gpu_ids, rotation_degrees=self.ROTATION_DEGREES, step_size=self.STEP_SIZE, max_steps=self.MAX_STEPS, num_processes=num_processes, camera_width=self.CAMERA_WIDTH, camera_height=self.CAMERA_HEIGHT, horizontal_fov=self.HORIZONTAL_FIELD_OF_VIEW, using_rgb=any(isinstance(s, RGBSensor) for s in self.SENSORS), using_depth=any(isinstance(s, DepthSensor) for s in self.SENSORS), training=training, num_episode_sample=num_episode_sample, ) @lazy_property def DEFAULT_OBJECT_CATEGORIES_TO_IND(self): if self.scene_dataset == "mp3d": return { "chair": 0, "table": 1, "picture": 2, "cabinet": 3, "cushion": 4, "sofa": 5, "bed": 6, "chest_of_drawers": 7, "plant": 8, "sink": 9, "toilet": 10, "stool": 11, "towel": 12, "tv_monitor": 13, "shower": 14, "bathtub": 15, "counter": 16, "fireplace": 17, "gym_equipment": 18, "seating": 19, "clothes": 20, } elif self.scene_dataset == "hm3d": return { "chair": 0, "bed": 1, "plant": 2, "toilet": 3, "tv_monitor": 4, "sofa": 5, } else: raise NotImplementedError @lazy_property def TASK_DATA_DIR_TEMPLATE(self): return os.path.join( HABITAT_DATASETS_DIR, f"objectnav/{self.scene_dataset}/v1/{{}}/{{}}.json.gz" ) @lazy_property def BASE_CONFIG_YAML_PATH(self): return os.path.join( HABITAT_CONFIGS_DIR, f"tasks/objectnav_{self.scene_dataset}.yaml" ) @lazy_property def TRAIN_CONFIG(self): return self._create_config( mode="train", scenes_path=self.train_scenes_path(), num_processes=self.num_train_processes, simulator_gpu_ids=self.train_gpu_ids, training=True, ) @lazy_property def VALID_CONFIG(self): return self._create_config( mode="validate", scenes_path=self.valid_scenes_path(), num_processes=1, simulator_gpu_ids=self.val_gpu_ids, training=False, num_episode_sample=200, ) @lazy_property def TEST_CONFIG(self): return self._create_config( mode="validate", scenes_path=self.test_scenes_path(), num_processes=self.num_test_processes, simulator_gpu_ids=self.test_gpu_ids, training=False, ) @lazy_property def TRAIN_CONFIGS_PER_PROCESS(self): configs = construct_env_configs(self.TRAIN_CONFIG, allow_scene_repeat=True) if len(self.train_gpu_ids) >= 2: scenes_dir = configs[0].DATASET.SCENES_DIR memory_use_per_config = [] for config in configs: assert ( len(config.DATASET.CONTENT_SCENES) == 1 ), config.DATASET.CONTENT_SCENES scene_name = config.DATASET.CONTENT_SCENES[0] paths = glob.glob( os.path.join( scenes_dir, self.scene_dataset, "**", f"{scene_name}.*" ), recursive=True, ) if self.scene_dataset == "mp3d": assert len(paths) == 4 else: assert len(paths) == 2 memory_use_per_config.append(sum(os.path.getsize(p) for p in paths)) max_configs_per_device = math.ceil(len(configs) / len(self.train_gpu_ids)) mem_per_device = np.array([0.0 for _ in range(len(self.train_gpu_ids))]) configs_per_device = [[] for _ in range(len(mem_per_device))] for mem, config in sorted( list(zip(memory_use_per_config, configs)), key=lambda x: x[0] ): ind = int(np.argmin(mem_per_device)) config.defrost() config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = self.train_gpu_ids[ind] config.freeze() configs_per_device[ind].append(config) mem_per_device[ind] += mem if len(configs_per_device[ind]) >= max_configs_per_device: mem_per_device[ind] = float("inf") configs_per_device.sort(key=lambda x: len(x)) configs = sum(configs_per_device, []) if self.debug: warnings.warn( "IN DEBUG MODE, WILL ONLY USE `1LXtFkjw3qL` SCENE IN MP3D OR `1S7LAXRdDqK` scene in HM3D!!!" ) for config in configs: config.defrost() if self.scene_dataset == "mp3d": config.DATASET.CONTENT_SCENES = ["1LXtFkjw3qL"] elif self.scene_dataset == "hm3d": config.DATASET.CONTENT_SCENES = ["1S7LAXRdDqK"] else: raise NotImplementedError config.freeze() return configs @lazy_property def TEST_CONFIG_PER_PROCESS(self): return construct_env_configs(self.TEST_CONFIG, allow_scene_repeat=False) def train_scenes_path(self): return self.TASK_DATA_DIR_TEMPLATE.format(*(["train"] * 2)) def valid_scenes_path(self): return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2)) def test_scenes_path(self): get_logger().warning("Running tests on the validation set!") return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2)) # return self.TASK_DATA_DIR_TEMPLATE.format(*(["test"] * 2)) def tag(self): t = f"ObjectNav-Habitat-{self.scene_dataset.upper()}" if self.add_prev_actions: t = f"{t}-PrevActions" if self.look_constraints is not None: t = f"{t}-Look{','.join(map(str, self.look_constraints))}" return t def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return tuple() def machine_params(self, mode="train", **kwargs): has_gpus = torch.cuda.is_available() if not has_gpus: gpu_ids = [] nprocesses = 1 elif mode == "train": gpu_ids = self.train_gpu_ids nprocesses = self.num_train_processes elif mode == "valid": gpu_ids = self.val_gpu_ids nprocesses = 1 if self.run_valid else 0 elif mode == "test": gpu_ids = self.test_gpu_ids nprocesses = self.num_test_processes else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") if has_gpus: nprocesses = evenly_distribute_count_into_bins(nprocesses, len(gpu_ids)) sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces, preprocessors=self.preprocessors(), ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sensor_preprocessor_graph=sensor_preprocessor_graph, ) def make_sampler_fn(self, **kwargs) -> TaskSampler: return ObjectNavTaskSampler( task_kwargs={ "look_constraints": self.look_constraints, }, **{"failed_end_reward": self.FAILED_END_REWARD, **kwargs}, # type: ignore ) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": self.ACTION_SPACE, } def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes != 1: raise NotImplementedError( "In validation, `total_processes` must equal 1 for habitat tasks" ) return { "env_config": self.VALID_CONFIG, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete( len(ObjectNavTask.class_action_names()) ), } def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TEST_CONFIG_PER_PROCESS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete( len(ObjectNavTask.class_action_names()) ), } ================================================ FILE: projects/objectnav_baselines/experiments/ithor/__init__.py ================================================ ================================================ FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_base.py ================================================ import os from abc import ABC import torch from projects.objectnav_baselines.experiments.objectnav_thor_base import ( ObjectNavThorBaseConfig, ) class ObjectNaviThorBaseConfig(ObjectNavThorBaseConfig, ABC): """The base config for all iTHOR ObjectNav experiments.""" THOR_COMMIT_ID = "9549791ce2e7f472063a10abb1fb7664159fec23" AGENT_MODE = "default" DEFAULT_NUM_TRAIN_PROCESSES = 40 if torch.cuda.is_available() else 1 TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train") VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val") TARGET_TYPES = tuple( sorted( [ "AlarmClock", "Apple", "Book", "Bowl", "Box", "Candle", "GarbageCan", "HousePlant", "Laptop", "SoapBottle", "Television", "Toaster", ], ) ) ================================================ FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnet18gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor from projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import ( ObjectNaviThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ResNetPreprocessGRUActorCriticMixin, ObjectNavPPOMixin, ) class ObjectNaviThorDepthPPOExperimentConfig(ObjectNaviThorBaseConfig): """An Object Navigation experiment configuration in iThor with Depth input.""" SENSORS = ( DepthSensorThor( height=ObjectNaviThorBaseConfig.SCREEN_SIZE, width=ObjectNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNaviThorBaseConfig.TARGET_TYPES, ), ) def __init__(self, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, resnet_type="RN18", screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, **kwargs ) def tag(self): return "ObjectNav-iTHOR-Depth-ResNet18GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnet18gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import ( GoalObjectTypeThorSensor, RGBSensorThor, ) from projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import ( ObjectNaviThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ResNetPreprocessGRUActorCriticMixin, ObjectNavPPOMixin, ) class ObjectNaviThorRGBPPOExperimentConfig(ObjectNaviThorBaseConfig): """An Object Navigation experiment configuration in iThor with RGB input.""" SENSORS = [ RGBSensorThor( height=ObjectNaviThorBaseConfig.SCREEN_SIZE, width=ObjectNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNaviThorBaseConfig.TARGET_TYPES, ), ] def __init__(self, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, resnet_type="RN18", screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, **kwargs ) @classmethod def tag(cls): return "ObjectNav-iTHOR-RGB-ResNet18GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnet18gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import ( RGBSensorThor, GoalObjectTypeThorSensor, ) from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor from projects.objectnav_baselines.experiments.ithor.objectnav_ithor_base import ( ObjectNaviThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ResNetPreprocessGRUActorCriticMixin, ObjectNavPPOMixin, ) class ObjectNaviThorRGBDPPOExperimentConfig(ObjectNaviThorBaseConfig): """An Object Navigation experiment configuration in iTHOR with RGBD input.""" SENSORS = [ RGBSensorThor( height=ObjectNaviThorBaseConfig.SCREEN_SIZE, width=ObjectNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), DepthSensorThor( height=ObjectNaviThorBaseConfig.SCREEN_SIZE, width=ObjectNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNaviThorBaseConfig.TARGET_TYPES, ), ] def __init__(self, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, resnet_type="RN18", screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, **kwargs ) def tag(self): return "ObjectNav-iTHOR-RGBD-ResNet18GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/experiments/objectnav_base.py ================================================ from abc import ABC from typing import Optional, Sequence, Union from allenact.base_abstractions.experiment_config import ExperimentConfig from allenact.base_abstractions.preprocessor import Preprocessor from allenact.base_abstractions.sensor import Sensor from allenact.utils.experiment_utils import Builder class ObjectNavBaseConfig(ExperimentConfig, ABC): """The base object navigation configuration file.""" STEP_SIZE = 0.25 ROTATION_DEGREES = 30.0 VISIBILITY_DISTANCE = 1.0 STOCHASTIC = True HORIZONTAL_FIELD_OF_VIEW = 79 CAMERA_WIDTH = 400 CAMERA_HEIGHT = 300 SCREEN_SIZE = 224 MAX_STEPS = 500 ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None SENSORS: Sequence[Sensor] = [] def __init__(self): self.REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, } def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return tuple() ================================================ FILE: projects/objectnav_baselines/experiments/objectnav_thor_base.py ================================================ import glob import os import platform from abc import ABC from math import ceil from typing import Dict, Any, List, Optional, Sequence, Tuple, cast import ai2thor import ai2thor.build import gym import numpy as np import torch from packaging import version from allenact.base_abstractions.experiment_config import MachineParams from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import evenly_distribute_count_into_bins from allenact.utils.system import get_logger from allenact_plugins.ithor_plugin.ithor_util import ( horizontal_to_vertical_fov, get_open_x_displays, ) from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor from allenact_plugins.robothor_plugin.robothor_task_samplers import ( ObjectNavDatasetTaskSampler, ) from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask from projects.objectnav_baselines.experiments.objectnav_base import ObjectNavBaseConfig if ( ai2thor.__version__ not in ["0.0.1", None] and not ai2thor.__version__.startswith("0+") and version.parse(ai2thor.__version__) < version.parse("3.2.0") ): raise ImportError( "To run the AI2-THOR ObjectNav baseline experiments you must use" " ai2thor version 3.2.0 or higher." ) import ai2thor.platform class ObjectNavThorBaseConfig(ObjectNavBaseConfig, ABC): """The base config for all AI2-THOR ObjectNav experiments.""" DEFAULT_NUM_TRAIN_PROCESSES: Optional[int] = None DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count())) DEFAULT_VALID_GPU_IDS = (torch.cuda.device_count() - 1,) DEFAULT_TEST_GPU_IDS = (torch.cuda.device_count() - 1,) TRAIN_DATASET_DIR: Optional[str] = None VAL_DATASET_DIR: Optional[str] = None TEST_DATASET_DIR: Optional[str] = None AGENT_MODE = "default" TARGET_TYPES: Optional[Sequence[str]] = None THOR_COMMIT_ID: Optional[str] = None DEFAULT_THOR_IS_HEADLESS: bool = False ACTION_SPACE = gym.spaces.Discrete(len(ObjectNavTask.class_action_names())) def __init__( self, num_train_processes: Optional[int] = None, num_test_processes: Optional[int] = None, test_on_validation: bool = False, train_gpu_ids: Optional[Sequence[int]] = None, val_gpu_ids: Optional[Sequence[int]] = None, test_gpu_ids: Optional[Sequence[int]] = None, randomize_train_materials: bool = False, headless: bool = False, ): super().__init__() def v_or_default(v, default): return v if v is not None else default self.num_train_processes = v_or_default( num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES ) self.num_test_processes = v_or_default( num_test_processes, (10 if torch.cuda.is_available() else 1) ) self.test_on_validation = test_on_validation self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS) self.val_gpu_ids = v_or_default(val_gpu_ids, self.DEFAULT_VALID_GPU_IDS) self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS) self.headless = v_or_default(headless, self.DEFAULT_THOR_IS_HEADLESS) self.sampler_devices = self.train_gpu_ids self.randomize_train_materials = randomize_train_materials def env_args(self): assert self.THOR_COMMIT_ID is not None return dict( width=self.CAMERA_WIDTH, height=self.CAMERA_HEIGHT, commit_id=( self.THOR_COMMIT_ID if not self.headless else ai2thor.build.COMMIT_ID ), stochastic=True, continuousMode=True, applyActionNoise=self.STOCHASTIC, rotateStepDegrees=self.ROTATION_DEGREES, visibilityDistance=self.VISIBILITY_DISTANCE, gridSize=self.STEP_SIZE, snapToGrid=False, agentMode=self.AGENT_MODE, fieldOfView=horizontal_to_vertical_fov( horizontal_fov_in_degrees=self.HORIZONTAL_FIELD_OF_VIEW, width=self.CAMERA_WIDTH, height=self.CAMERA_HEIGHT, ), include_private_scenes=False, renderDepthImage=any(isinstance(s, DepthSensorThor) for s in self.SENSORS), ) def machine_params(self, mode="train", **kwargs): sampler_devices: Sequence[torch.device] = [] devices: Sequence[torch.device] if mode == "train": workers_per_device = 1 devices = ( [torch.device("cpu")] if not torch.cuda.is_available() else cast(Tuple, self.train_gpu_ids) * workers_per_device ) nprocesses = evenly_distribute_count_into_bins( self.num_train_processes, max(len(devices), 1) ) sampler_devices = self.sampler_devices elif mode == "valid": nprocesses = 1 devices = ( [torch.device("cpu")] if not torch.cuda.is_available() else self.val_gpu_ids ) elif mode == "test": devices = ( [torch.device("cpu")] if not torch.cuda.is_available() else self.test_gpu_ids ) nprocesses = evenly_distribute_count_into_bins( self.num_test_processes, max(len(devices), 1) ) else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensors = [*self.SENSORS] if mode != "train": sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)] sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(sensors).observation_spaces, preprocessors=self.preprocessors(), ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=devices, sampler_devices=( sampler_devices if mode == "train" else devices ), # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return ObjectNavDatasetTaskSampler(**kwargs) @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, devices: Optional[List[int]], seeds: Optional[List[int]], deterministic_cudnn: bool, include_expert_sensor: bool = True, allow_oversample: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)] if len(scenes) == 0: raise RuntimeError( ( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done." ).format(scenes_dir) ) oversample_warning = ( f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})." " You can avoid this by setting a number of workers divisible by the number of scenes" ) if total_processes > len(scenes): # oversample some scenes -> bias if not allow_oversample: raise RuntimeError( f"Cannot have `total_processes > len(scenes)`" f" ({total_processes} > {len(scenes)}) when `allow_oversample` is `False`." ) if total_processes % len(scenes) != 0: get_logger().warning(oversample_warning) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] elif len(scenes) % total_processes != 0: get_logger().warning(oversample_warning) inds = self._partition_inds(len(scenes), total_processes) if not self.headless: x_display: Optional[str] = None if platform.system() == "Linux": x_displays = get_open_x_displays(throw_error_if_empty=True) if len([d for d in devices if d != torch.device("cpu")]) > len( x_displays ): get_logger().warning( f"More GPU devices found than X-displays (devices: `{x_displays}`, x_displays: `{x_displays}`)." f" This is not necessarily a bad thing but may mean that you're not using GPU memory as" f" efficiently as possible. Consider following the instructions here:" f" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin" f" describing how to start an X-display on every GPU." ) x_display = x_displays[process_ind % len(x_displays)] device_dict = dict(x_display=x_display) else: device_dict = dict( gpu_device=devices[process_ind % len(devices)], platform=ai2thor.platform.CloudRendering, ) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "object_types": self.TARGET_TYPES, "max_steps": self.MAX_STEPS, "sensors": [ s for s in self.SENSORS if (include_expert_sensor or not isinstance(s, ExpertActionSensor)) ], "action_space": self.ACTION_SPACE, "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, "env_args": {**self.env_args(), **device_dict}, } def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( scenes_dir=os.path.join(self.TRAIN_DATASET_DIR, "episodes"), process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, allow_oversample=True, ) res["scene_directory"] = self.TRAIN_DATASET_DIR res["loop_dataset"] = True res["allow_flipping"] = True res["randomize_materials_in_training"] = self.randomize_train_materials return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( scenes_dir=os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, include_expert_sensor=False, allow_oversample=False, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if self.test_on_validation or self.TEST_DATASET_DIR is None: if not self.test_on_validation: get_logger().warning( "`test_on_validation` is set to `True` and thus we will run evaluation on the validation set instead." " Be careful as the saved metrics json and tensorboard files **will still be labeled as" " 'test' rather than 'valid'**." ) else: get_logger().warning( "No test dataset dir detected, running test on validation set instead." " Be careful as the saved metrics json and tensorboard files *will still be labeled as" " 'test' rather than 'valid'**." ) return self.valid_task_sampler_args( process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) else: res = self._get_sampler_args_for_scene_split( scenes_dir=os.path.join(self.TEST_DATASET_DIR, "episodes"), process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, include_expert_sensor=False, allow_oversample=False, ) res["env_args"]["all_metadata_available"] = False res["rewards_config"] = {**res["rewards_config"], "shaping_weight": 0} res["scene_directory"] = self.TEST_DATASET_DIR res["loop_dataset"] = False return res ================================================ FILE: projects/objectnav_baselines/experiments/robothor/__init__.py ================================================ ================================================ FILE: projects/objectnav_baselines/experiments/robothor/beta/README.md ================================================ # Beta experiments This folder contains "beta" experiments, e.g. training experiments meant to be used to test new features. These experiments may have bugs or not train well. ================================================ FILE: projects/objectnav_baselines/experiments/robothor/beta/__init__.py ================================================ ================================================ FILE: projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_resnetgru_ddppo_and_gbc.py ================================================ import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.grouped_action_imitation import ( GroupedActionImitation, ) from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, LinearDecay, ) from allenact_plugins.ithor_plugin.ithor_sensors import ( RGBSensorThor, GoalObjectTypeThorSensor, ) from allenact_plugins.ithor_plugin.ithor_sensors import TakeEndActionThorNavSensor from allenact_plugins.robothor_plugin import robothor_constants from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ResNetPreprocessGRUActorCriticMixin class ObjectNavRoboThorResNet18GRURGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig): """An Object Navigation experiment configuration in RoboThor with RGB input.""" SENSORS = ( # type:ignore RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), TakeEndActionThorNavSensor( nactions=len(ObjectNavTask.class_action_names()), uuid="expert_group_action" ), ) def __init__(self, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, resnet_type="RN18", screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) def preprocessors(self): return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs): return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, **kwargs ) def training_pipeline(self, **kwargs): ppo_steps = int(300000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 action_strs = ObjectNavTask.class_action_names() non_end_action_inds_set = { i for i, a in enumerate(action_strs) if a != robothor_constants.END } end_action_ind_set = {action_strs.index(robothor_constants.END)} return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(**PPOConfig), "grouped_action_imitation": GroupedActionImitation( nactions=len(ObjectNavTask.class_action_names()), action_groups=[non_end_action_inds_set, end_action_ind_set], ), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss", "grouped_action_imitation"], max_stage_steps=ppo_steps, ) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) def tag(self): return "ObjectNav-RoboTHOR-RGB-ResNet18GRU-DDPPOAndGBC" ================================================ FILE: projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py ================================================ from typing import Union, Optional, Any import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage # noinspection PyUnresolvedReferences from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task from allenact.embodiedai.storage.vdr_storage import ( DiscreteVisualDynamicsReplayStorage, InverseDynamicsVDRLoss, ) from allenact.utils.experiment_utils import Builder, TrainingSettings from allenact.utils.experiment_utils import ( PipelineStage, LinearDecay, StageComponent, ) from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from allenact_plugins.ithor_plugin.ithor_sensors import ( RGBSensorThor, GoalObjectTypeThorSensor, ) from allenact_plugins.robothor_plugin.robothor_environment import RoboThorEnvironment from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ObjectNavUnfrozenResNetWithGRUActorCriticMixin, update_with_auxiliary_losses, ) def compute_inv_dyn_action_logits( model, img0, img1, ): rgb_uuid = model.visual_encoder.rgb_uuid img0_enc = model.visual_encoder({rgb_uuid: img0.unsqueeze(0)}).squeeze(0) img1_enc = model.visual_encoder({rgb_uuid: img1.unsqueeze(0)}).squeeze(0) return model.inv_dyn_mlp(torch.cat((img0_enc, img1_enc), dim=1)) class LastActionSuccessSensor( Sensor[ Union[IThorEnvironment, RoboThorEnvironment], Union[Task[IThorEnvironment], Task[RoboThorEnvironment]], ] ): def __init__(self, uuid: str = "last_action_success", **kwargs: Any): super().__init__( uuid=uuid, observation_space=gym.spaces.MultiBinary(1), **kwargs ) def get_observation( self, env: Union[IThorEnvironment, RoboThorEnvironment], task: Optional[Task], *args: Any, **kwargs: Any ) -> Any: return 1 * task.last_action_success class VisibleObjectTypesSensor( Sensor[ Union[IThorEnvironment, RoboThorEnvironment], Union[Task[IThorEnvironment], Task[RoboThorEnvironment]], ] ): def __init__(self, uuid: str = "visible_objects", **kwargs: Any): super().__init__( uuid=uuid, observation_space=gym.spaces.Box( low=0, high=1, shape=(len(ObjectNavRoboThorBaseConfig.TARGET_TYPES),) ), **kwargs ) self.type_to_index = { tt: i for i, tt in enumerate(ObjectNavRoboThorBaseConfig.TARGET_TYPES) } def get_observation( self, env: Union[IThorEnvironment, RoboThorEnvironment], task: Optional[Task], *args: Any, **kwargs: Any ) -> Any: out = np.zeros((len(self.type_to_index),)) for o in env.controller.last_event.metadata["objects"]: if o["visible"] and o["objectType"] in self.type_to_index: out[self.type_to_index[o["objectType"]]] = 1.0 return out class ObjectNavRoboThorVdrTmpRGBExperimentConfig(ObjectNavRoboThorBaseConfig): SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), LastActionSuccessSensor(), VisibleObjectTypesSensor(), ] def __init__(self, **kwargs): super().__init__(**kwargs) self.model_creation_handler = ObjectNavUnfrozenResNetWithGRUActorCriticMixin( backbone="gnresnet18", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs): # PPO ppo_steps = int(300000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 if torch.cuda.is_available() else 1 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 auxiliary_uuids = tuple() multiple_beliefs = False named_losses = {"ppo_loss": (PPO(**PPOConfig), 1.0)} named_losses = update_with_auxiliary_losses( named_losses=named_losses, auxiliary_uuids=auxiliary_uuids, multiple_beliefs=multiple_beliefs, ) default_ts = TrainingSettings( num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, save_interval=save_interval, metric_accumulate_interval=log_interval, ) named_losses = { **named_losses, "inv_dyn_vdr": ( InverseDynamicsVDRLoss( compute_action_logits_fn=compute_inv_dyn_action_logits, img0_key="img0", img1_key="img1", action_key="action", ), 1.0, ), } sorted_loss_names = list(sorted(named_losses.keys())) return TrainingPipeline( training_settings=default_ts, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), named_losses={k: v[0] for k, v in named_losses.items()}, named_storages={ "onpolicy": RolloutBlockStorage(init_size=num_steps), "discrete_vdr": DiscreteVisualDynamicsReplayStorage( image_uuid="rgb_lowres", action_success_uuid="last_action_success", extra_targets=["visible_objects"], nactions=6, num_to_store_per_action=200 if torch.cuda.is_available() else 10, max_to_save_per_episode=6, target_batch_size=256 if torch.cuda.is_available() else 128, ), }, pipeline_stages=[ PipelineStage( loss_names=sorted_loss_names, max_stage_steps=ppo_steps, loss_weights=[ named_losses[loss_name][1] for loss_name in sorted_loss_names ], stage_components=[ StageComponent( uuid="onpolicy", storage_uuid="onpolicy", loss_names=[ ln for ln in sorted_loss_names if ln != "inv_dyn_vdr" ], ), StageComponent( uuid="vdr", storage_uuid="discrete_vdr", loss_names=["inv_dyn_vdr"], training_settings=TrainingSettings( num_mini_batch=1, update_repeats=1, ), ), ], ) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) def create_model(self, **kwargs) -> nn.Module: model = self.model_creation_handler.create_model(**kwargs) model.inv_dyn_mlp = nn.Sequential( nn.Linear(1024, 256), nn.ReLU(inplace=True), nn.Linear(256, 6), ) return model def tag(self): return "Objectnav-RoboTHOR-RGB-UnfrozenResNet18GRU-VDR" ================================================ FILE: projects/objectnav_baselines/experiments/robothor/clip/__init__.py ================================================ ================================================ FILE: projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.ithor_plugin.ithor_sensors import ( GoalObjectTypeThorSensor, RGBSensorThor, ) from projects.objectnav_baselines.experiments.clip.mixins import ( ClipResNetPreprocessGRUActorCriticMixin, ) from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ObjectNavPPOMixin class ObjectNavRoboThorClipRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig): """An Object Navigation experiment configuration in RoboThor with RGB input.""" CLIP_MODEL_TYPE = "RN50" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ] def __init__(self, add_prev_actions: bool = False, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, clip_model_type=self.CLIP_MODEL_TYPE, screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) self.add_prev_actions = add_prev_actions def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, add_prev_actions=self.add_prev_actions, **kwargs ) @classmethod def tag(cls): return "ObjectNav-RoboTHOR-RGB-ClipResNet50GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.ithor_plugin.ithor_sensors import ( GoalObjectTypeThorSensor, RGBSensorThor, ) from projects.objectnav_baselines.experiments.clip.mixins import ( ClipResNetPreprocessGRUActorCriticMixin, ) from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ObjectNavPPOMixin class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig): """An Object Navigation experiment configuration in RoboThor with RGB input.""" CLIP_MODEL_TYPE = "RN50x16" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ] def __init__(self, add_prev_actions: bool = False, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, clip_model_type=self.CLIP_MODEL_TYPE, screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) self.add_prev_actions = add_prev_actions def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, add_prev_actions=self.add_prev_actions, **kwargs ) @classmethod def tag(cls): return "ObjectNav-RoboTHOR-RGB-ClipResNet50x16GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_base.py ================================================ import os from abc import ABC from typing import Optional, List, Any, Dict import torch from allenact.utils.misc_utils import prepare_locals_for_super from projects.objectnav_baselines.experiments.objectnav_thor_base import ( ObjectNavThorBaseConfig, ) class ObjectNavRoboThorBaseConfig(ObjectNavThorBaseConfig, ABC): """The base config for all RoboTHOR ObjectNav experiments.""" THOR_COMMIT_ID = "bad5bc2b250615cb766ffb45d455c211329af17e" THOR_COMMIT_ID_FOR_RAND_MATERIALS = "9549791ce2e7f472063a10abb1fb7664159fec23" AGENT_MODE = "locobot" DEFAULT_NUM_TRAIN_PROCESSES = 60 if torch.cuda.is_available() else 1 TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/train") VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/val") TEST_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-objectnav/test") TARGET_TYPES = tuple( sorted( [ "AlarmClock", "Apple", "BaseballBat", "BasketBall", "Bowl", "GarbageCan", "HousePlant", "Laptop", "Mug", "SprayBottle", "Television", "Vase", ] ) ) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: kwargs = super(ObjectNavRoboThorBaseConfig, self).train_task_sampler_args( **prepare_locals_for_super(locals()) ) if self.randomize_train_materials: kwargs["env_args"]["commit_id"] = self.THOR_COMMIT_ID_FOR_RAND_MATERIALS return kwargs ================================================ FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_depth_resnet18gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ResNetPreprocessGRUActorCriticMixin, ObjectNavPPOMixin, ) class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig): """An Object Navigation experiment configuration in RoboThor with Depth input.""" SENSORS = ( DepthSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ) def __init__(self, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, resnet_type="RN18", screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, **kwargs ) def tag(self): return "ObjectNav-RoboTHOR-Depth-ResNet18GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_dagger.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.base_abstractions.sensor import ExpertActionSensor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import ( GoalObjectTypeThorSensor, RGBSensorThor, ) from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ResNetPreprocessGRUActorCriticMixin, ObjectNavDAggerMixin, ) class ObjectNavRoboThorRGBDAggerExperimentConfig(ObjectNavRoboThorBaseConfig): """An Object Navigation experiment configuration in RoboThor with RGB input.""" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ExpertActionSensor( nactions=len(ObjectNavTask.class_action_names()), ), ] def __init__(self, **kwargs): super().__init__(**kwargs) self.REWARD_CONFIG["shaping"] = 0 self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, resnet_type="RN18", screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavDAggerMixin.training_pipeline( advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, **kwargs ) @classmethod def tag(cls): return "ObjectNav-RoboTHOR-RGB-ResNet18GRU-DAgger" ================================================ FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import ( GoalObjectTypeThorSensor, RGBSensorThor, ) from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ResNetPreprocessGRUActorCriticMixin, ObjectNavPPOMixin, ) class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig): """An Object Navigation experiment configuration in RoboThor with RGB input.""" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ] def __init__(self, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, resnet_type="RN18", screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, **kwargs ) @classmethod def tag(cls): return "ObjectNav-RoboTHOR-RGB-ResNet18GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet50gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import ( GoalObjectTypeThorSensor, RGBSensorThor, ) from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ResNetPreprocessGRUActorCriticMixin, ObjectNavPPOMixin, ) class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig): """An Object Navigation experiment configuration in RoboThor with RGB input.""" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ] def __init__(self, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, resnet_type="RN50", screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, **kwargs ) def tag(self): return "ObjectNav-RoboTHOR-RGB-ResNet50GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_unfrozenresnet18gru_ddppo.py ================================================ import torch.nn as nn from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import ( RGBSensorThor, GoalObjectTypeThorSensor, ) from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ObjectNavUnfrozenResNetWithGRUActorCriticMixin, ObjectNavPPOMixin, ) class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig): """An Object Navigation experiment configuration in RoboThor with RGB input without preprocessing by frozen ResNet (instead, a trainable ResNet).""" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ] def __init__(self, **kwargs): super().__init__(**kwargs) self.model_creation_handler = ObjectNavUnfrozenResNetWithGRUActorCriticMixin( backbone="gnresnet18", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs) -> nn.Module: return self.model_creation_handler.create_model(**kwargs) def tag(self): return "ObjectNav-RoboTHOR-RGB-UnfrozenResNet18GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgbd_resnet18gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import ( RGBSensorThor, GoalObjectTypeThorSensor, ) from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_base import ( ObjectNavRoboThorBaseConfig, ) from projects.objectnav_baselines.mixins import ( ResNetPreprocessGRUActorCriticMixin, ObjectNavPPOMixin, ) class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig): """An Object Navigation experiment configuration in RoboThor with RGBD input.""" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), DepthSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ] def __init__(self, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, resnet_type="RN18", screen_size=self.SCREEN_SIZE, goal_sensor_type=GoalObjectTypeThorSensor, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return ObjectNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, **kwargs ) def tag(self): return "ObjectNav-RoboTHOR-RGBD-ResNet18GRU-DDPPO" ================================================ FILE: projects/objectnav_baselines/mixins.py ================================================ from typing import Sequence, Union, Optional, Dict, Tuple, Type import attr import gym import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from torchvision import models from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ) from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.preprocessor import Preprocessor from allenact.base_abstractions.sensor import Sensor from allenact.embodiedai.aux_losses.losses import ( InverseDynamicsLoss, TemporalDistanceLoss, CPCA1Loss, CPCA2Loss, CPCA4Loss, CPCA8Loss, CPCA16Loss, MultiAuxTaskNegEntropyLoss, CPCA1SoftMaxLoss, CPCA2SoftMaxLoss, CPCA4SoftMaxLoss, CPCA8SoftMaxLoss, CPCA16SoftMaxLoss, ) from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.experiment_utils import ( Builder, TrainingPipeline, PipelineStage, LinearDecay, ) from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor from allenact_plugins.navigation_plugin.objectnav.models import ( ResnetTensorNavActorCritic, ObjectNavActorCritic, ) from allenact_plugins.robothor_plugin.robothor_tasks import ObjectNavTask @attr.s(kw_only=True) class ResNetPreprocessGRUActorCriticMixin: sensors: Sequence[Sensor] = attr.ib() resnet_type: str = attr.ib() screen_size: int = attr.ib() goal_sensor_type: Type[Sensor] = attr.ib() def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: preprocessors = [] if self.resnet_type in ["RN18", "RN34"]: output_shape = (512, 7, 7) elif self.resnet_type in ["RN50", "RN101", "RN152"]: output_shape = (2048, 7, 7) else: raise NotImplementedError( f"`RESNET_TYPE` must be one 'RNx' with x equaling one of" f" 18, 34, 50, 101, or 152." ) rgb_sensor = next((s for s in self.sensors if isinstance(s, RGBSensor)), None) if rgb_sensor is not None: preprocessors.append( ResNetPreprocessor( input_height=self.screen_size, input_width=self.screen_size, output_width=output_shape[2], output_height=output_shape[1], output_dims=output_shape[0], pool=False, torchvision_resnet_model=getattr( models, f"resnet{self.resnet_type.replace('RN', '')}" ), input_uuids=[rgb_sensor.uuid], output_uuid="rgb_resnet_imagenet", ) ) depth_sensor = next( (s for s in self.sensors if isinstance(s, DepthSensor)), None ) if depth_sensor is not None: preprocessors.append( ResNetPreprocessor( input_height=self.screen_size, input_width=self.screen_size, output_width=output_shape[2], output_height=output_shape[1], output_dims=output_shape[0], pool=False, torchvision_resnet_model=getattr( models, f"resnet{self.resnet_type.replace('RN', '')}" ), input_uuids=[depth_sensor.uuid], output_uuid="depth_resnet_imagenet", ) ) return preprocessors def create_model(self, **kwargs) -> nn.Module: has_rgb = any(isinstance(s, RGBSensor) for s in self.sensors) has_depth = any(isinstance(s, DepthSensor) for s in self.sensors) goal_sensor_uuid = next( (s.uuid for s in self.sensors if isinstance(s, self.goal_sensor_type)), None, ) return ResnetTensorNavActorCritic( action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid=goal_sensor_uuid, rgb_resnet_preprocessor_uuid="rgb_resnet_imagenet" if has_rgb else None, depth_resnet_preprocessor_uuid=( "depth_resnet_imagenet" if has_depth else None ), hidden_size=512, goal_dims=32, ) @attr.s(kw_only=True) class ObjectNavUnfrozenResNetWithGRUActorCriticMixin: backbone: str = attr.ib() sensors: Sequence[Sensor] = attr.ib() auxiliary_uuids: Sequence[str] = attr.ib() add_prev_actions: bool = attr.ib() multiple_beliefs: bool = attr.ib() belief_fusion: Optional[str] = attr.ib() def create_model(self, **kwargs) -> nn.Module: rgb_uuid = next( (s.uuid for s in self.sensors if isinstance(s, RGBSensor)), None ) depth_uuid = next( (s.uuid for s in self.sensors if isinstance(s, DepthSensor)), None ) goal_sensor_uuid = next( (s.uuid for s in self.sensors if isinstance(s, GoalObjectTypeThorSensor)) ) return ObjectNavActorCritic( action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, goal_sensor_uuid=goal_sensor_uuid, hidden_size=( 192 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512 ), backbone=self.backbone, resnet_baseplanes=32, object_type_embedding_dim=32, num_rnn_layers=1, rnn_type="GRU", add_prev_actions=self.add_prev_actions, action_embed_size=6, auxiliary_uuids=self.auxiliary_uuids, multiple_beliefs=self.multiple_beliefs, beliefs_fusion=self.belief_fusion, ) class ObjectNavDAggerMixin: @staticmethod def training_pipeline( advance_scene_rollout_period: Optional[int] = None, ) -> TrainingPipeline: training_steps = int(300000000) tf_steps = int(5e6) anneal_steps = int(5e6) il_no_tf_steps = training_steps - tf_steps - anneal_steps assert il_no_tf_steps > 0 lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 if torch.cuda.is_available() else 1 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "imitation_loss": Imitation(), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=advance_scene_rollout_period, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], max_stage_steps=tf_steps, teacher_forcing=LinearDecay( startp=1.0, endp=1.0, steps=tf_steps, ), ), PipelineStage( loss_names=["imitation_loss"], max_stage_steps=anneal_steps + il_no_tf_steps, teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=anneal_steps, ), ), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=training_steps)}, ), ) def update_with_auxiliary_losses( named_losses: Dict[str, Tuple[AbstractActorCriticLoss, float]], auxiliary_uuids: Sequence[str], multiple_beliefs: bool, ) -> Dict[str, Tuple[AbstractActorCriticLoss, float]]: # auxliary losses aux_loss_total_weight = 2.0 # Total losses total_aux_losses: Dict[str, Tuple[AbstractActorCriticLoss, float]] = { InverseDynamicsLoss.UUID: ( InverseDynamicsLoss( subsample_rate=0.2, subsample_min_num=10, # TODO: test its effects ), 0.05 * aux_loss_total_weight, # should times 2 ), TemporalDistanceLoss.UUID: ( TemporalDistanceLoss( num_pairs=8, epsiode_len_min=5, # TODO: test its effects ), 0.2 * aux_loss_total_weight, # should times 2 ), CPCA1Loss.UUID: ( CPCA1Loss( subsample_rate=0.2, ), # TODO: test its effects 0.05 * aux_loss_total_weight, # should times 2 ), CPCA2Loss.UUID: ( CPCA2Loss( subsample_rate=0.2, ), # TODO: test its effects 0.05 * aux_loss_total_weight, # should times 2 ), CPCA4Loss.UUID: ( CPCA4Loss( subsample_rate=0.2, ), # TODO: test its effects 0.05 * aux_loss_total_weight, # should times 2 ), CPCA8Loss.UUID: ( CPCA8Loss( subsample_rate=0.2, ), # TODO: test its effects 0.05 * aux_loss_total_weight, # should times 2 ), CPCA16Loss.UUID: ( CPCA16Loss( subsample_rate=0.2, ), # TODO: test its effects 0.05 * aux_loss_total_weight, # should times 2 ), CPCA1SoftMaxLoss.UUID: ( CPCA1SoftMaxLoss( subsample_rate=1.0, ), 0.05 * aux_loss_total_weight, # should times 2 ), CPCA2SoftMaxLoss.UUID: ( CPCA2SoftMaxLoss( subsample_rate=1.0, ), 0.05 * aux_loss_total_weight, # should times 2 ), CPCA4SoftMaxLoss.UUID: ( CPCA4SoftMaxLoss( subsample_rate=1.0, ), 0.05 * aux_loss_total_weight, # should times 2 ), CPCA8SoftMaxLoss.UUID: ( CPCA8SoftMaxLoss( subsample_rate=1.0, ), 0.05 * aux_loss_total_weight, # should times 2 ), CPCA16SoftMaxLoss.UUID: ( CPCA16SoftMaxLoss( subsample_rate=1.0, ), 0.05 * aux_loss_total_weight, # should times 2 ), } named_losses.update({uuid: total_aux_losses[uuid] for uuid in auxiliary_uuids}) if multiple_beliefs: # add weight entropy loss automatically named_losses[MultiAuxTaskNegEntropyLoss.UUID] = ( MultiAuxTaskNegEntropyLoss(auxiliary_uuids), 0.01, ) return named_losses class ObjectNavPPOMixin: @staticmethod def training_pipeline( auxiliary_uuids: Sequence[str], multiple_beliefs: bool, normalize_advantage: bool = True, advance_scene_rollout_period: Optional[int] = None, lr=3e-4, num_mini_batch=1, update_repeats=4, num_steps=128, save_interval=5000000, log_interval=10000 if torch.cuda.is_available() else 1, gamma=0.99, use_gae=True, gae_lambda=0.95, max_grad_norm=0.5, anneal_lr: bool = True, extra_losses: Optional[Dict[str, Tuple[AbstractActorCriticLoss, float]]] = None, ) -> TrainingPipeline: ppo_steps = int(300000000) named_losses = { "ppo_loss": ( PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0, ), **({} if extra_losses is None else extra_losses), } named_losses = update_with_auxiliary_losses( named_losses=named_losses, auxiliary_uuids=auxiliary_uuids, multiple_beliefs=multiple_beliefs, ) return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={key: val[0] for key, val in named_losses.items()}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=advance_scene_rollout_period, pipeline_stages=[ PipelineStage( loss_names=list(named_losses.keys()), max_stage_steps=ppo_steps, loss_weights=[val[1] for val in named_losses.values()], ) ], lr_scheduler_builder=( Builder(LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}) if anneal_lr else None ), ) ================================================ FILE: projects/pointnav_baselines/README.md ================================================ # Baseline models for the Point Navigation task in the Habitat, RoboTHOR and iTHOR environments This project contains the code for training baseline models on the PointNav task. In this setting the agent spawns at a location in an environment and is tasked to move to another location. The agent is given a "compass" that tells it the distance and bearing to the target position at every frame. Once the agent is confident that it has reached the end it executes the `END` action which terminates the episode. If the agent is within a set distance to the target (in our case 0.2 meters) the agent succeeded, else it failed. Provided are experiment configs for training a simple convolutional model with an GRU using `RGB`, `Depth` or `RGBD` as inputs in [Habitat](https://github.com/facebookresearch/habitat-sim), [RoboTHOR](https://ai2thor.allenai.org/robothor/) and [iTHOR](https://ai2thor.allenai.org/ithor/). The experiments are set up to train models using the [DD-PPO](https://arxiv.org/pdf/1911.00357.pdf) Reinforcement Learning Algorithm. To train an experiment run the following command from the `allenact` root directory: ```bash python main.py -o -c -b ``` Where `` is the path of the directory where we want the model weights and logs to be stored, `` is the directory where our experiment file is located and `` is the name of the python module containing the experiment. An example usage of this command would be: ```bash python main.py -o storage/pointnav-robothor-depth -b projects/pointnav_baselines/experiments/robothor/ pointnav_robothor_depth_simpleconvgru_ddppo ``` This trains a simple convolutional neural network with a GRU using Depth input on the PointNav task in the RoboTHOR environment and stores the model weights and logs to `storage/pointnav-robothor-rgb`. ================================================ FILE: projects/pointnav_baselines/__init__.py ================================================ ================================================ FILE: projects/pointnav_baselines/experiments/__init__.py ================================================ ================================================ FILE: projects/pointnav_baselines/experiments/habitat/__init__.py ================================================ ================================================ FILE: projects/pointnav_baselines/experiments/habitat/clip/__init__.py ================================================ ================================================ FILE: projects/pointnav_baselines/experiments/habitat/clip/pointnav_habitat_rgb_clipresnet50gru_ddppo.py ================================================ from typing import Sequence, Union import torch.nn as nn from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder, TrainingPipeline from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.habitat_plugin.habitat_sensors import ( RGBSensorHabitat, TargetCoordinatesSensorHabitat, ) from projects.objectnav_baselines.experiments.clip.mixins import ( ClipResNetPreprocessGRUActorCriticMixin, ) from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import ( PointNavHabitatBaseConfig, ) from projects.pointnav_baselines.mixins import PointNavPPOMixin class PointNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig(PointNavHabitatBaseConfig): """An Point Navigation experiment configuration in Habitat with Depth input.""" CLIP_MODEL_TYPE = "RN50" SENSORS = [ RGBSensorHabitat( height=PointNavHabitatBaseConfig.SCREEN_SIZE, width=PointNavHabitatBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, ), TargetCoordinatesSensorHabitat(coordinate_dims=2), ] def __init__(self, add_prev_actions: bool = False, **kwargs): super().__init__(**kwargs) self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin( sensors=self.SENSORS, clip_model_type=self.CLIP_MODEL_TYPE, screen_size=self.SCREEN_SIZE, goal_sensor_type=TargetCoordinatesSensorHabitat, ) self.add_prev_actions = add_prev_actions def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=False, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() def create_model(self, **kwargs) -> nn.Module: return self.preprocessing_and_model.create_model( num_actions=self.ACTION_SPACE.n, add_prev_actions=self.add_prev_actions, **kwargs, ) @classmethod def tag(cls): return "PointNav-Habitat-RGB-ClipResNet50GRU-DDPPO" ================================================ FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_base.py ================================================ import os from abc import ABC from typing import Dict, Any, List, Optional, Sequence, Union import gym import torch # noinspection PyUnresolvedReferences import habitat from allenact.base_abstractions.experiment_config import MachineParams from allenact.base_abstractions.preprocessor import ( SensorPreprocessorGraph, Preprocessor, ) from allenact.base_abstractions.sensor import SensorSuite from allenact.base_abstractions.task import TaskSampler from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.experiment_utils import evenly_distribute_count_into_bins, Builder from allenact.utils.system import get_logger from allenact_plugins.habitat_plugin.habitat_constants import ( HABITAT_DATASETS_DIR, HABITAT_CONFIGS_DIR, HABITAT_SCENE_DATASETS_DIR, ) from allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler from allenact_plugins.habitat_plugin.habitat_tasks import PointNavTask from allenact_plugins.habitat_plugin.habitat_utils import ( get_habitat_config, construct_env_configs, ) from projects.pointnav_baselines.experiments.pointnav_base import PointNavBaseConfig def create_pointnav_config( config_yaml_path: str, mode: str, scenes_path: str, simulator_gpu_ids: Sequence[int], distance_to_goal: float, rotation_degrees: float, step_size: float, max_steps: int, num_processes: int, camera_width: int, camera_height: int, using_rgb: bool, using_depth: bool, training: bool, num_episode_sample: int, ) -> habitat.Config: config = get_habitat_config(config_yaml_path) config.defrost() config.NUM_PROCESSES = num_processes config.SIMULATOR_GPU_IDS = simulator_gpu_ids config.DATASET.SCENES_DIR = HABITAT_SCENE_DATASETS_DIR config.DATASET.DATA_PATH = scenes_path config.SIMULATOR.AGENT_0.SENSORS = [] if using_rgb: config.SIMULATOR.AGENT_0.SENSORS.append("RGB_SENSOR") if using_depth: config.SIMULATOR.AGENT_0.SENSORS.append("DEPTH_SENSOR") config.SIMULATOR.RGB_SENSOR.WIDTH = camera_width config.SIMULATOR.RGB_SENSOR.HEIGHT = camera_height config.SIMULATOR.DEPTH_SENSOR.WIDTH = camera_width config.SIMULATOR.DEPTH_SENSOR.HEIGHT = camera_height config.SIMULATOR.TURN_ANGLE = rotation_degrees config.SIMULATOR.FORWARD_STEP_SIZE = step_size config.ENVIRONMENT.MAX_EPISODE_STEPS = max_steps config.TASK.TYPE = "Nav-v0" config.TASK.SUCCESS_DISTANCE = distance_to_goal config.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"] config.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR" config.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2 config.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass" config.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"] config.TASK.SPL.TYPE = "SPL" config.TASK.SPL.SUCCESS_DISTANCE = distance_to_goal config.TASK.SUCCESS.SUCCESS_DISTANCE = distance_to_goal if not training: config.SEED = 0 config.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False if num_episode_sample > 0: config.ENVIRONMENT.ITERATOR_OPTIONS.NUM_EPISODE_SAMPLE = num_episode_sample config.MODE = mode config.freeze() return config class PointNavHabitatBaseConfig(PointNavBaseConfig, ABC): """The base config for all Habitat PointNav experiments.""" # selected auxiliary uuids ## if comment all the keys, then it's vanilla DD-PPO AUXILIARY_UUIDS = [ # InverseDynamicsLoss.UUID, # TemporalDistanceLoss.UUID, # CPCA1Loss.UUID, # CPCA4Loss.UUID, # CPCA8Loss.UUID, # CPCA16Loss.UUID, ] ADD_PREV_ACTIONS = False MULTIPLE_BELIEFS = False BELIEF_FUSION = ( # choose one None # AttentiveFusion # AverageFusion # SoftmaxFusion ) FAILED_END_REWARD = -1.0 TASK_DATA_DIR_TEMPLATE = os.path.join( HABITAT_DATASETS_DIR, "pointnav/gibson/v1/{}/{}.json.gz" ) BASE_CONFIG_YAML_PATH = os.path.join( HABITAT_CONFIGS_DIR, "tasks/pointnav_gibson.yaml" ) ACTION_SPACE = gym.spaces.Discrete(len(PointNavTask.class_action_names())) DEFAULT_NUM_TRAIN_PROCESSES = ( 5 * torch.cuda.device_count() if torch.cuda.is_available() else 1 ) DEFAULT_NUM_TEST_PROCESSES = 10 DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count())) DEFAULT_VALID_GPU_IDS = [torch.cuda.device_count() - 1] DEFAULT_TEST_GPU_IDS = [torch.cuda.device_count() - 1] def __init__( self, debug: bool = False, num_train_processes: Optional[int] = None, num_test_processes: Optional[int] = None, test_on_validation: bool = False, run_valid: bool = True, train_gpu_ids: Optional[Sequence[int]] = None, val_gpu_ids: Optional[Sequence[int]] = None, test_gpu_ids: Optional[Sequence[int]] = None, **kwargs, ): super().__init__(**kwargs) def v_or_default(v, default): return v if v is not None else default self.num_train_processes = v_or_default( num_train_processes, self.DEFAULT_NUM_TRAIN_PROCESSES ) self.num_test_processes = v_or_default( num_test_processes, (10 if torch.cuda.is_available() else 1) ) self.test_on_validation = test_on_validation self.run_valid = run_valid self.train_gpu_ids = v_or_default(train_gpu_ids, self.DEFAULT_TRAIN_GPU_IDS) self.val_gpu_ids = v_or_default( val_gpu_ids, self.DEFAULT_VALID_GPU_IDS if run_valid else [] ) self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS) def create_config( mode: str, scenes_path: str, num_processes: int, simulator_gpu_ids: Sequence[int], training: bool = True, num_episode_sample: int = -1, ): return create_pointnav_config( config_yaml_path=self.BASE_CONFIG_YAML_PATH, mode=mode, scenes_path=scenes_path, simulator_gpu_ids=simulator_gpu_ids, distance_to_goal=self.DISTANCE_TO_GOAL, rotation_degrees=self.ROTATION_DEGREES, step_size=self.STEP_SIZE, max_steps=self.MAX_STEPS, num_processes=num_processes, camera_width=self.CAMERA_WIDTH, camera_height=self.CAMERA_HEIGHT, using_rgb=any(isinstance(s, RGBSensor) for s in self.SENSORS), using_depth=any(isinstance(s, DepthSensor) for s in self.SENSORS), training=training, num_episode_sample=num_episode_sample, ) self.TRAIN_CONFIG = create_config( mode="train", scenes_path=self.train_scenes_path(), num_processes=self.num_train_processes, simulator_gpu_ids=self.train_gpu_ids, training=True, ) self.VALID_CONFIG = create_config( mode="validate", scenes_path=self.valid_scenes_path(), num_processes=1, simulator_gpu_ids=self.val_gpu_ids, training=False, num_episode_sample=200, ) self.TEST_CONFIG = create_config( mode="validate", scenes_path=self.test_scenes_path(), num_processes=self.num_test_processes, simulator_gpu_ids=self.test_gpu_ids, training=False, ) self.TRAIN_CONFIGS_PER_PROCESS = construct_env_configs( self.TRAIN_CONFIG, allow_scene_repeat=True ) if debug: get_logger().warning("IN DEBUG MODE, WILL ONLY USE `Adrian` SCENE!!!") for config in self.TRAIN_CONFIGS_PER_PROCESS: config.defrost() config.DATASET.CONTENT_SCENES = ["Adrian"] config.freeze() self.TEST_CONFIG_PER_PROCESS = construct_env_configs( self.TEST_CONFIG, allow_scene_repeat=False ) def train_scenes_path(self): return self.TASK_DATA_DIR_TEMPLATE.format(*(["train"] * 2)) def valid_scenes_path(self): return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2)) def test_scenes_path(self): get_logger().warning("Running tests on the validation set!") return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2)) # return self.TASK_DATA_DIR_TEMPLATE.format(*(["test"] * 2)) @classmethod def tag(cls): return "PointNav" def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return tuple() def machine_params(self, mode="train", **kwargs): has_gpus = torch.cuda.is_available() if not has_gpus: gpu_ids = [] nprocesses = 1 elif mode == "train": gpu_ids = self.train_gpu_ids nprocesses = self.num_train_processes elif mode == "valid": gpu_ids = self.val_gpu_ids nprocesses = 1 if self.run_valid else 0 elif mode == "test": gpu_ids = self.test_gpu_ids nprocesses = self.num_test_processes else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") if has_gpus: nprocesses = evenly_distribute_count_into_bins(nprocesses, len(gpu_ids)) sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces, preprocessors=self.preprocessors(), ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sensor_preprocessor_graph=sensor_preprocessor_graph, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavTaskSampler( **{"failed_end_reward": cls.FAILED_END_REWARD, **kwargs} # type: ignore ) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TRAIN_CONFIGS_PER_PROCESS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": self.ACTION_SPACE, "distance_to_goal": self.DISTANCE_TO_GOAL, } def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes != 1: raise NotImplementedError( "In validation, `total_processes` must equal 1 for habitat tasks" ) return { "env_config": self.VALID_CONFIG, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, } def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TEST_CONFIG_PER_PROCESS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, } ================================================ FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.py ================================================ from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.habitat_plugin.habitat_sensors import ( DepthSensorHabitat, TargetCoordinatesSensorHabitat, ) from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import ( PointNavHabitatBaseConfig, ) from projects.pointnav_baselines.mixins import ( PointNavPPOMixin, PointNavUnfrozenResNetWithGRUActorCriticMixin, ) class PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig( PointNavHabitatBaseConfig, ): """An Point Navigation experiment configuration in Habitat with Depth input.""" SENSORS = [ DepthSensorHabitat( height=PointNavHabitatBaseConfig.SCREEN_SIZE, width=PointNavHabitatBaseConfig.SCREEN_SIZE, use_normalization=True, ), TargetCoordinatesSensorHabitat(coordinate_dims=2), ] def __init__(self): super().__init__() self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin( backbone="simple_cnn", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=True, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs): return self.model_creation_handler.create_model(**kwargs) def tag(self): return "PointNav-Habitat-Depth-SimpleConv-DDPPO" ================================================ FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.py ================================================ from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.habitat_plugin.habitat_sensors import RGBSensorHabitat from allenact_plugins.habitat_plugin.habitat_sensors import ( TargetCoordinatesSensorHabitat, ) from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import ( PointNavHabitatBaseConfig, ) from projects.pointnav_baselines.mixins import PointNavPPOMixin from projects.pointnav_baselines.mixins import ( PointNavUnfrozenResNetWithGRUActorCriticMixin, ) class PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig( PointNavHabitatBaseConfig ): """An Point Navigation experiment configuration in Habitat with Depth input.""" SENSORS = [ RGBSensorHabitat( height=PointNavHabitatBaseConfig.SCREEN_SIZE, width=PointNavHabitatBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, ), TargetCoordinatesSensorHabitat(coordinate_dims=2), ] def __init__(self): super().__init__() self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin( backbone="simple_cnn", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=True, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs): return self.model_creation_handler.create_model(**kwargs) @classmethod def tag(cls): return "PointNav-Habitat-RGB-SimpleConv-DDPPO" ================================================ FILE: projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.py ================================================ from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.habitat_plugin.habitat_sensors import DepthSensorHabitat from allenact_plugins.habitat_plugin.habitat_sensors import RGBSensorHabitat from allenact_plugins.habitat_plugin.habitat_sensors import ( TargetCoordinatesSensorHabitat, ) from projects.pointnav_baselines.experiments.habitat.pointnav_habitat_base import ( PointNavHabitatBaseConfig, ) from projects.pointnav_baselines.mixins import PointNavPPOMixin from projects.pointnav_baselines.mixins import ( PointNavUnfrozenResNetWithGRUActorCriticMixin, ) class PointNavHabitatDepthDeterministiSimpleConvGRUDDPPOExperimentConfig( PointNavHabitatBaseConfig ): """An Point Navigation experiment configuration in Habitat with RGBD input.""" SENSORS = [ RGBSensorHabitat( height=PointNavHabitatBaseConfig.SCREEN_SIZE, width=PointNavHabitatBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, ), DepthSensorHabitat( height=PointNavHabitatBaseConfig.SCREEN_SIZE, width=PointNavHabitatBaseConfig.SCREEN_SIZE, use_normalization=True, ), TargetCoordinatesSensorHabitat(coordinate_dims=2), ] def __init__(self): super().__init__() self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin( backbone="simple_cnn", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=True, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs): return self.model_creation_handler.create_model(**kwargs) def tag(self): return "PointNav-Habitat-RGBD-SimpleConv-DDPPO" ================================================ FILE: projects/pointnav_baselines/experiments/ithor/__init__.py ================================================ ================================================ FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_base.py ================================================ import os from abc import ABC from projects.pointnav_baselines.experiments.pointnav_thor_base import ( PointNavThorBaseConfig, ) class PointNaviThorBaseConfig(PointNavThorBaseConfig, ABC): """The base config for all iTHOR PointNav experiments.""" NUM_PROCESSES = 40 TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-pointnav/train") VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-pointnav/val") ================================================ FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.py ================================================ from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.robothor_plugin.robothor_sensors import ( DepthSensorThor, GPSCompassSensorRoboThor, ) from projects.pointnav_baselines.mixins import ( PointNavUnfrozenResNetWithGRUActorCriticMixin, ) from projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import ( PointNaviThorBaseConfig, ) from projects.pointnav_baselines.mixins import PointNavPPOMixin class PointNaviThorDepthPPOExperimentConfig(PointNaviThorBaseConfig): """An Point Navigation experiment configuration in iThor with Depth input.""" SENSORS = [ DepthSensorThor( height=PointNaviThorBaseConfig.SCREEN_SIZE, width=PointNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GPSCompassSensorRoboThor(), ] def __init__(self): super().__init__() self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin( backbone="simple_cnn", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=True, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs): return self.model_creation_handler.create_model(**kwargs) def tag(self): return "PointNav-iTHOR-Depth-SimpleConv-DDPPO" ================================================ FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.py ================================================ from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor from projects.pointnav_baselines.mixins import ( PointNavUnfrozenResNetWithGRUActorCriticMixin, ) from projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import ( PointNaviThorBaseConfig, ) from projects.pointnav_baselines.mixins import PointNavPPOMixin class PointNaviThorRGBPPOExperimentConfig(PointNaviThorBaseConfig): """An Point Navigation experiment configuration in iThor with RGB input.""" SENSORS = [ RGBSensorThor( height=PointNaviThorBaseConfig.SCREEN_SIZE, width=PointNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GPSCompassSensorRoboThor(), ] def __init__(self): super().__init__() self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin( backbone="simple_cnn", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=True, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs): return self.model_creation_handler.create_model(**kwargs) def tag(self): return "PointNav-iTHOR-RGB-SimpleConv-DDPPO" ================================================ FILE: projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.py ================================================ from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor from projects.pointnav_baselines.mixins import ( PointNavUnfrozenResNetWithGRUActorCriticMixin, ) from projects.pointnav_baselines.experiments.ithor.pointnav_ithor_base import ( PointNaviThorBaseConfig, ) from projects.pointnav_baselines.mixins import PointNavPPOMixin class PointNaviThorRGBDPPOExperimentConfig(PointNaviThorBaseConfig): """An Point Navigation experiment configuration in iThor with RGBD input.""" SENSORS = [ RGBSensorThor( height=PointNaviThorBaseConfig.SCREEN_SIZE, width=PointNaviThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), DepthSensorThor( height=PointNaviThorBaseConfig.SCREEN_SIZE, width=PointNaviThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GPSCompassSensorRoboThor(), ] def __init__(self): super().__init__() self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin( backbone="simple_cnn", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=True, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs): return self.model_creation_handler.create_model(**kwargs) def tag(self): return "PointNav-iTHOR-RGBD-SimpleConv-DDPPO" ================================================ FILE: projects/pointnav_baselines/experiments/pointnav_base.py ================================================ from abc import ABC from typing import Optional, Sequence from allenact.base_abstractions.experiment_config import ExperimentConfig from allenact.base_abstractions.sensor import Sensor class PointNavBaseConfig(ExperimentConfig, ABC): """An Object Navigation experiment configuration in iThor.""" ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None SENSORS: Optional[Sequence[Sensor]] = None STEP_SIZE = 0.25 ROTATION_DEGREES = 30.0 DISTANCE_TO_GOAL = 0.2 STOCHASTIC = True CAMERA_WIDTH = 400 CAMERA_HEIGHT = 300 SCREEN_SIZE = 224 MAX_STEPS = 500 def __init__(self): self.REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "reached_max_steps_reward": 0.0, "shaping_weight": 1.0, } ================================================ FILE: projects/pointnav_baselines/experiments/pointnav_thor_base.py ================================================ import glob import os import platform from abc import ABC from math import ceil from typing import Dict, Any, List, Optional, Sequence import ai2thor import gym import numpy as np import torch from packaging import version from allenact.base_abstractions.experiment_config import MachineParams from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import evenly_distribute_count_into_bins from allenact.utils.system import get_logger from allenact_plugins.ithor_plugin.ithor_util import get_open_x_displays from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor from allenact_plugins.robothor_plugin.robothor_task_samplers import ( PointNavDatasetTaskSampler, ) from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask from projects.pointnav_baselines.experiments.pointnav_base import PointNavBaseConfig if ai2thor.__version__ not in ["0.0.1", None] and version.parse( ai2thor.__version__ ) < version.parse("2.7.2"): raise ImportError( "To run the PointNav baseline experiments you must use" " ai2thor version 2.7.1 or higher." ) class PointNavThorBaseConfig(PointNavBaseConfig, ABC): """The base config for all iTHOR PointNav experiments.""" NUM_PROCESSES: Optional[int] = None TRAIN_GPU_IDS = list(range(torch.cuda.device_count())) VALID_GPU_IDS = [torch.cuda.device_count() - 1] TEST_GPU_IDS = [torch.cuda.device_count() - 1] TRAIN_DATASET_DIR: Optional[str] = None VAL_DATASET_DIR: Optional[str] = None TARGET_TYPES: Optional[Sequence[str]] = None ACTION_SPACE = gym.spaces.Discrete(len(PointNavTask.class_action_names())) def __init__(self): super().__init__() self.ENV_ARGS = dict( width=self.CAMERA_WIDTH, height=self.CAMERA_HEIGHT, continuousMode=True, applyActionNoise=self.STOCHASTIC, rotateStepDegrees=self.ROTATION_DEGREES, gridSize=self.STEP_SIZE, snapToGrid=False, agentMode="bot", include_private_scenes=False, renderDepthImage=any(isinstance(s, DepthSensorThor) for s in self.SENSORS), ) def preprocessors(self): return tuple() def machine_params(self, mode="train", **kwargs): sampler_devices: Sequence[int] = [] if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else self.TRAIN_GPU_IDS * workers_per_device ) nprocesses = ( 1 if not torch.cuda.is_available() else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) ) sampler_devices = self.TRAIN_GPU_IDS elif mode == "valid": nprocesses = 1 if torch.cuda.is_available() else 0 gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS elif mode == "test": nprocesses = 10 gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces, preprocessors=self.preprocessors(), ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sampler_devices=( sampler_devices if mode == "train" else gpu_ids ), # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavDatasetTaskSampler(**kwargs) @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, devices: Optional[List[int]], seeds: Optional[List[int]], deterministic_cudnn: bool, include_expert_sensor: bool = True, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)] if len(scenes) == 0: raise RuntimeError( ( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done." ).format(scenes_dir) ) oversample_warning = ( f"Warning: oversampling some of the scenes ({scenes}) to feed all processes ({total_processes})." " You can avoid this by setting a number of workers divisible by the number of scenes" ) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: get_logger().warning(oversample_warning) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] elif len(scenes) % total_processes != 0: get_logger().warning(oversample_warning) inds = self._partition_inds(len(scenes), total_processes) x_display: Optional[str] = None if platform.system() == "Linux": x_displays = get_open_x_displays(throw_error_if_empty=True) if len([d for d in devices if d != torch.device("cpu")]) > len(x_displays): get_logger().warning( f"More GPU devices found than X-displays (devices: `{x_displays}`, x_displays: `{x_displays}`)." f" This is not necessarily a bad thing but may mean that you're not using GPU memory as" f" efficiently as possible. Consider following the instructions here:" f" https://allenact.org/installation/installation-framework/#installation-of-ithor-ithor-plugin" f" describing how to start an X-display on every GPU." ) x_display = x_displays[process_ind % len(x_displays)] return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "object_types": self.TARGET_TYPES, "max_steps": self.MAX_STEPS, "sensors": [ s for s in self.SENSORS if (include_expert_sensor or not isinstance(s, ExpertActionSensor)) ], "action_space": self.ACTION_SPACE, "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, "env_args": { **self.ENV_ARGS, "x_display": x_display, }, } def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.TRAIN_DATASET_DIR, "episodes"), process_ind, total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.TRAIN_DATASET_DIR res["loop_dataset"] = True res["allow_flipping"] = True return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, include_expert_sensor=False, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self.valid_task_sampler_args( process_ind=process_ind, total_processes=total_processes, devices=devices, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) ================================================ FILE: projects/pointnav_baselines/experiments/robothor/__init__.py ================================================ ================================================ FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_base.py ================================================ import os from abc import ABC from projects.pointnav_baselines.experiments.pointnav_thor_base import ( PointNavThorBaseConfig, ) class PointNavRoboThorBaseConfig(PointNavThorBaseConfig, ABC): """The base config for all iTHOR PointNav experiments.""" NUM_PROCESSES = 60 TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/train") VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/val") ================================================ FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py ================================================ from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.robothor_plugin.robothor_sensors import ( DepthSensorThor, GPSCompassSensorRoboThor, ) from projects.pointnav_baselines.mixins import ( PointNavUnfrozenResNetWithGRUActorCriticMixin, ) from projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import ( PointNavRoboThorBaseConfig, ) from projects.pointnav_baselines.mixins import PointNavPPOMixin class PointNavRoboThorRGBPPOExperimentConfig( PointNavRoboThorBaseConfig, ): """An Point Navigation experiment configuration in RoboTHOR with Depth input.""" SENSORS = [ DepthSensorThor( height=PointNavRoboThorBaseConfig.SCREEN_SIZE, width=PointNavRoboThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GPSCompassSensorRoboThor(), ] def __init__(self): super().__init__() self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin( backbone="simple_cnn", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=True, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs): return self.model_creation_handler.create_model(**kwargs) def tag(self): return "PointNav-RoboTHOR-Depth-SimpleConv-DDPPO" ================================================ FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py ================================================ from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor from projects.pointnav_baselines.mixins import ( PointNavUnfrozenResNetWithGRUActorCriticMixin, ) from projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import ( PointNavRoboThorBaseConfig, ) from projects.pointnav_baselines.mixins import PointNavPPOMixin class PointNavRoboThorRGBPPOExperimentConfig( PointNavRoboThorBaseConfig, ): """An Point Navigation experiment configuration in RoboThor with RGB input.""" SENSORS = [ RGBSensorThor( height=PointNavRoboThorBaseConfig.SCREEN_SIZE, width=PointNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GPSCompassSensorRoboThor(), ] def __init__(self): super().__init__() self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin( backbone="simple_cnn", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=True, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs): return self.model_creation_handler.create_model(**kwargs) def tag(self): return "PointNav-RoboTHOR-RGB-SimpleConv-DDPPO" ================================================ FILE: projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py ================================================ from allenact.utils.experiment_utils import TrainingPipeline from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor from projects.pointnav_baselines.mixins import ( PointNavUnfrozenResNetWithGRUActorCriticMixin, ) from projects.pointnav_baselines.experiments.robothor.pointnav_robothor_base import ( PointNavRoboThorBaseConfig, ) from projects.pointnav_baselines.mixins import PointNavPPOMixin class PointNavRoboThorRGBPPOExperimentConfig( PointNavRoboThorBaseConfig, ): """An Point Navigation experiment configuration in RoboThor with RGBD input.""" SENSORS = [ RGBSensorThor( height=PointNavRoboThorBaseConfig.SCREEN_SIZE, width=PointNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), DepthSensorThor( height=PointNavRoboThorBaseConfig.SCREEN_SIZE, width=PointNavRoboThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GPSCompassSensorRoboThor(), ] def __init__(self): super().__init__() self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin( backbone="simple_cnn", sensors=self.SENSORS, auxiliary_uuids=[], add_prev_actions=True, multiple_beliefs=False, belief_fusion=None, ) def training_pipeline(self, **kwargs) -> TrainingPipeline: return PointNavPPOMixin.training_pipeline( auxiliary_uuids=[], multiple_beliefs=False, normalize_advantage=True, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, ) def create_model(self, **kwargs): return self.model_creation_handler.create_model(**kwargs) def tag(self): return "PointNav-RoboTHOR-RGBD-SimpleConv-DDPPO" ================================================ FILE: projects/pointnav_baselines/mixins.py ================================================ from typing import Optional from typing import Sequence import attr import gym import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.sensor import Sensor from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.experiment_utils import ( Builder, TrainingPipeline, PipelineStage, LinearDecay, ) from projects.objectnav_baselines.mixins import update_with_auxiliary_losses # fmt: off try: # Habitat may not be installed, just create a fake class here in that case from allenact_plugins.habitat_plugin.habitat_sensors import TargetCoordinatesSensorHabitat except ImportError: class TargetCoordinatesSensorHabitat: #type:ignore pass # fmt: on from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask from allenact_plugins.navigation_plugin.pointnav.models import PointNavActorCritic @attr.s(kw_only=True) class PointNavUnfrozenResNetWithGRUActorCriticMixin: backbone: str = attr.ib() sensors: Sequence[Sensor] = attr.ib() auxiliary_uuids: Sequence[str] = attr.ib() add_prev_actions: bool = attr.ib() multiple_beliefs: bool = attr.ib() belief_fusion: Optional[str] = attr.ib() def create_model(self, **kwargs) -> nn.Module: rgb_uuid = next( (s.uuid for s in self.sensors if isinstance(s, RGBSensor)), None ) depth_uuid = next( (s.uuid for s in self.sensors if isinstance(s, DepthSensor)), None ) goal_sensor_uuid = next( ( s.uuid for s in self.sensors if isinstance( s, (GPSCompassSensorRoboThor, TargetCoordinatesSensorHabitat) ) ) ) return PointNavActorCritic( # Env and Tak action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, goal_sensor_uuid=goal_sensor_uuid, # RNN hidden_size=( 228 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512 ), num_rnn_layers=1, rnn_type="GRU", add_prev_actions=self.add_prev_actions, action_embed_size=4, # CNN backbone=self.backbone, resnet_baseplanes=32, embed_coordinates=False, coordinate_dims=2, # Aux auxiliary_uuids=self.auxiliary_uuids, multiple_beliefs=self.multiple_beliefs, beliefs_fusion=self.belief_fusion, ) class PointNavPPOMixin: @staticmethod def training_pipeline( auxiliary_uuids: Sequence[str], multiple_beliefs: bool, normalize_advantage: bool, advance_scene_rollout_period: Optional[int] = None, ) -> TrainingPipeline: ppo_steps = int(75000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 if torch.cuda.is_available() else 1 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 named_losses = { "ppo_loss": (PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0) } named_losses = update_with_auxiliary_losses( named_losses=named_losses, auxiliary_uuids=auxiliary_uuids, multiple_beliefs=multiple_beliefs, ) return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={key: val[0] for key, val in named_losses.items()}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=advance_scene_rollout_period, pipeline_stages=[ PipelineStage( loss_names=list(named_losses.keys()), max_stage_steps=ppo_steps, loss_weights=[val[1] for val in named_losses.values()], ) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) ================================================ FILE: projects/tutorials/__init__.py ================================================ ================================================ FILE: projects/tutorials/distributed_objectnav_tutorial.py ================================================ # literate: tutorials/distributed-objectnav-tutorial.md # %% """# Tutorial: Distributed training across multiple nodes.""" # %% """ **Note** The provided commands to execute in this tutorial assume include a configuration script to [clone the full library](../installation/installation-allenact.md#full-library). Setting up headless THOR might require superuser privileges. We also assume [NCCL](https://developer.nvidia.com/nccl) is available for communication across computation nodes and all nodes have a running `ssh` server. The below introduced experimental tools and commands for distributed training assume a Linux OS (tested on Ubuntu 18.04). In this tutorial, we: 1. Introduce the available API for training across multiple nodes, as well as experimental scripts for distributed configuration, training start and termination, and remote command execution. 1. Introduce the headless mode for [AI2-THOR](https://ai2thor.allenai.org/) in `AllenAct`. Note that, in contrast with previous tutorials using AI2-THOR, this time we don't require an xserver (in Linux) to be active. 1. Show a training example for RoboTHOR ObjectNav on a cluster, with each node having sufficient GPUs and GPU memory to host 60 experience samplers collecting rollout data. Thanks to the massive parallelization of experience collection and model training enabled by [DD-PPO](https://arxiv.org/abs/1911.00357), we can greatly speed up training by scaling across multiple nodes: ![training speedup](../img/multinode_training.jpg) ## The task: ObjectNav In ObjectNav, the goal for the agent is to navigate to an object (possibly unseen during training) of a known given class and signal task completion when it determines it has reached the goal. ## Implementation For this tutorial, we'll use the readily available `objectnav_baselines` project, which includes configurations for a wide variety of object navigation experiments for both iTHOR and RoboTHOR. Since those configuration files are defined for a single-node setup, we will mainly focus on the changes required in the `machine_params` and `training_pipeline` methods. Note that, in order to use the headless version of AI2-THOR, we currently need to install a specific THOR commit, different from the default one in `robothor_plugin`. Note that this command is included in the configuration script below, so **we don't need to run this**: ```bash pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48 ``` The experiment config starts as follows: """ # %% import math from typing import Optional, Sequence import torch import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.utils.experiment_utils import ( Builder, LinearDecay, MultiLinearDecay, TrainingPipeline, PipelineStage, ) from projects.objectnav_baselines.experiments.robothor.objectnav_robothor_rgb_resnet18gru_ddppo import ( ObjectNavRoboThorRGBPPOExperimentConfig as BaseConfig, ) class DistributedObjectNavRoboThorRGBPPOExperimentConfig(BaseConfig): def tag(self) -> str: return "DistributedObjectNavRoboThorRGBPPO" # %% """We override ObjectNavRoboThorBaseConfig's THOR_COMMIT_ID to match the installed headless one:""" # %% THOR_COMMIT_ID = "91139c909576f3bf95a187c5b02c6fd455d06b48" # %% """Also indicate that we're using headless THOR (for `task_sampler_args` methods):""" # %% THOR_IS_HEADLESS = True # %% """**Temporary hack** Disable the `commit_id` argument passed to the THOR `Controller`'s `init` method:""" # %% def env_args(self): res = super().env_args() res.pop("commit_id", None) return res # %% """ And, of course, define the number of nodes. This will be used by `machine_params` and `training_pipeline` below. We override the existing `ExperimentConfig`'s `init` method to include control on the number of nodes: """ # %% def __init__( self, distributed_nodes: int = 1, num_train_processes: Optional[int] = None, train_gpu_ids: Optional[Sequence[int]] = None, val_gpu_ids: Optional[Sequence[int]] = None, test_gpu_ids: Optional[Sequence[int]] = None, ): super().__init__( num_train_processes=num_train_processes, train_gpu_ids=train_gpu_ids, val_gpu_ids=val_gpu_ids, test_gpu_ids=test_gpu_ids, ) self.distributed_nodes = distributed_nodes # %% """ ### Machine parameters **Note:** We assume that all nodes are identical (same number and model of GPUs and drivers). The `machine_params` method will be invoked by `runner.py` with different arguments, e.g. to determine the configuration for validation or training. When working in distributed settings, `AllenAct` needs to know the total number of trainers across all nodes as well as the local number of trainers. This is accomplished through the introduction of a `machine_id` keyword argument, which will be used to define the training parameters as follows: """ # %% def machine_params(self, mode="train", **kwargs): params = super().machine_params(mode, **kwargs) if mode == "train": params.devices = params.devices * self.distributed_nodes params.nprocesses = params.nprocesses * self.distributed_nodes params.sampler_devices = params.sampler_devices * self.distributed_nodes if "machine_id" in kwargs: machine_id = kwargs["machine_id"] assert ( 0 <= machine_id < self.distributed_nodes ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]" local_worker_ids = list( range( len(self.train_gpu_ids) * machine_id, len(self.train_gpu_ids) * (machine_id + 1), ) ) params.set_local_worker_ids(local_worker_ids) # Confirm we're setting up train params nicely: print( f"devices {params.devices}" f"\nnprocesses {params.nprocesses}" f"\nsampler_devices {params.sampler_devices}" f"\nlocal_worker_ids {params.local_worker_ids}" ) elif mode == "valid": # Use all GPUs at their maximum capacity for training # (you may run validation in a separate machine) params.nprocesses = (0,) return params # %% """ In summary, we need to specify which indices in `devices`, `nprocesses` and `sampler_devices` correspond to the local `machine_id` node (whenever a `machine_id` is given as a keyword argument), otherwise we specify the global configuration. ### Training pipeline In preliminary ObjectNav experiments, we observe that small batches are useful during the initial training steps in terms of sample efficiency, whereas large batches are preferred during the rest of training. In order to scale to the larger amount of collected data in multi-node settings, we will proceed with a two-stage pipeline: 1. In the first stage, we'll enforce a number of updates per amount of collected data similar to the configuration with a single node by enforcing more batches per rollout (for about 30 million steps). 1. In the second stage we'll switch to a configuration with larger learning rate and batch size to be used up to the grand total of 300 million experience steps. We first define a helper method to generate a learning rate curve with decay for each stage: """ # %% @staticmethod def lr_scheduler(small_batch_steps, transition_steps, ppo_steps, lr_scaling): safe_small_batch_steps = int(small_batch_steps * 1.02) large_batch_and_lr_steps = ppo_steps - safe_small_batch_steps - transition_steps # Learning rate after small batch steps (assuming decay to 0) break1 = 1.0 - safe_small_batch_steps / ppo_steps # Initial learning rate for large batch (after transition from initial to large learning rate) break2 = lr_scaling * ( 1.0 - (safe_small_batch_steps + transition_steps) / ppo_steps ) return MultiLinearDecay( [ # Base learning rate phase for small batch (with linear decay towards 0) LinearDecay( steps=safe_small_batch_steps, startp=1.0, endp=break1, ), # Allow the optimizer to adapt its statistics to the changes with a larger learning rate LinearDecay( steps=transition_steps, startp=break1, endp=break2, ), # Scaled learning rate phase for large batch (with linear decay towards 0) LinearDecay( steps=large_batch_and_lr_steps, startp=break2, endp=0, ), ] ) # %% """ The training pipeline looks like: """ # %% def training_pipeline(self, **kwargs): # These params are identical to the baseline configuration for 60 samplers (1 machine) ppo_steps = int(300e6) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 if torch.cuda.is_available() else 1 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 # We add 30 million steps for small batch learning small_batch_steps = int(30e6) # And a short transition phase towards large learning rate # (see comment in the `lr_scheduler` helper method transition_steps = int(2 / 3 * self.distributed_nodes * 1e6) # Find exact number of samplers per GPU assert ( self.num_train_processes % len(self.train_gpu_ids) == 0 ), "Expected uniform number of samplers per GPU" samplers_per_gpu = self.num_train_processes // len(self.train_gpu_ids) # Multiply num_mini_batch by the largest divisor of # samplers_per_gpu to keep all batches of same size: num_mini_batch_multiplier = [ i for i in reversed( range(1, min(samplers_per_gpu // 2, self.distributed_nodes) + 1) ) if samplers_per_gpu % i == 0 ][0] # Multiply update_repeats so that the product of this factor and # num_mini_batch_multiplier is >= self.distributed_nodes: update_repeats_multiplier = int( math.ceil(self.distributed_nodes / num_mini_batch_multiplier) ) return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig, show_ratios=False)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ # We increase the number of batches for the first stage to reach an # equivalent number of updates per collected rollout data as in the # 1 node/60 samplers setting PipelineStage( loss_names=["ppo_loss"], max_stage_steps=small_batch_steps, num_mini_batch=num_mini_batch * num_mini_batch_multiplier, update_repeats=update_repeats * update_repeats_multiplier, ), # The we proceed with the base configuration (leading to larger # batches due to the increased number of samplers) PipelineStage( loss_names=["ppo_loss"], max_stage_steps=ppo_steps - small_batch_steps, ), ], # We use the MultiLinearDecay curve defined by the helper function, # setting the learning rate scaling as the square root of the number # of nodes. Linear scaling might also works, but we leave that # check to the reader. lr_scheduler_builder=Builder( LambdaLR, { "lr_lambda": self.lr_scheduler( small_batch_steps=small_batch_steps, transition_steps=transition_steps, ppo_steps=ppo_steps, lr_scaling=math.sqrt(self.distributed_nodes), ) }, ), ) # %% """ ## Multi-node configuration **Note:** In the following, we'll assume you don't have an available setup for distributed execution, such as [slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup and run distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for a rather basic usage pattern that might not suit your needs. If we haven't set up AllenAct with the headless version of Ai2-THOR in our nodes, we can define a configuration script similar to: ```bash #!/bin/bash # Prepare a virtualenv for allenact sudo apt-get install -y python3-venv python3 -mvenv ~/allenact_venv source ~/allenact_venv/bin/activate pip install -U pip wheel # Install AllenAct cd ~ git clone https://github.com/allenai/allenact.git cd allenact # Install AllenaAct + RoboTHOR plugin dependencies pip install -r requirements.txt pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt # Download + setup datasets bash datasets/download_navigation_datasets.sh robothor-objectnav # Install headless AI2-THOR and required libvulkan1 sudo apt-get install -y libvulkan1 pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+91139c909576f3bf95a187c5b02c6fd455d06b48 # Download AI2-THOR binaries python -c "from ai2thor.controller import Controller; c=Controller(); c.stop()" echo DONE ``` and save it as `headless_robothor_config.sh`. Note that some of the configuration steps in the script assume you have superuser privileges. Then, we can just copy this file to the first node in our cluster and run it with: ```bash source ``` If everything went well, we should be able to ```bash cd ~/allenact && source ~/allenact_venv/bin/activate ``` Note that we might need to install `libvulkan1` in each node (even if the AllenAct setup is shared across nodes) if it is not already available. ### Local filesystems If our cluster does not use a shared filesystem, we'll need to propagate the setup to the rest of nodes. Assuming we can just `ssh` with the current user to all nodes, we can propagate our config with ```bash scripts/dconfig.py --runs_on \ --config_script ``` and we can check the state of the installation with the `scripts/dcommand.py` tool: ```bash scripts/dcommand.py --runs_on \ --command 'tail -n 5 ~/log_allenact_distributed_config' ``` If everything went fine, all requirements are ready to start running our experiment. ## Run your experiment **Note:** In this section, we again assume you don't have an available setup for distributed execution, such as [slurm](https://slurm.schedmd.com/documentation.html). If you do have access to a better alternative to setup/run distributed processes, we encourage you to use that. The experimental distributed tools included here are intended for a rather basic usage pattern that might not suit your needs. Our experimental extension to AllenAct's `main.py` script allows using practically identical commands to the ones used in a single-node setup to start our experiments. From the root `allenact` directory, we can simply invoke ```bash scripts/dmain.py projects/tutorials/distributed_objectnav_tutorial.py \ --config_kwargs '{"distributed_nodes":3}' \ --runs_on \ --env_activate_path ~/allenact_venv/bin/activate \ --allenact_path ~/allenact \ --distributed_ip_and_port : ``` This script will do several things for you, including synchronization of the changes in the `allenact` directory to all machines, enabling virtual environments in each node, sharing the same random seed for all `main.py` instances, assigning `--machine_id` parameters required for multi-node training, and redirecting the process output to a log file under the output results folder. Note that by changing the value associated with the `distributed_nodes` key in the `config_kwargs` map and the `runs_on` list of IPs, we can easily scale our training to e.g. 1, 3, or 8 nodes as shown in the chart above. Note that for this call to work unmodified, you should have sufficient GPUs/GPU memory to host 60 samplers per node. ## Track and stop your experiment You might have noticed that, when your experiment started with the above command, a file was created under `~/.allenact`. This file includes IP addresses and screen session IDs for all nodes. It can be used by the already introduced `scripts/dcommand.py` script, if we omit the `--runs_on` argument, to call a command on each node via ssh; but most importantly it is used by the `scripts/dkill.py` script to terminate all screen sessions hosting our training processes. ### Experiment tracking A simple way to check all machines are training, assuming you have `nvidia-smi` installed in all nodes, is to just call ```bash scripts/dcommand.py ``` from the root `allenact` directory. If everything is working well, the GPU usage stats from `nvidia-smi` should reflect ongoing activity. You can also add different commands to be executed by each node. It is of course also possible to run tensorboard on any of the nodes, if that's your preference. ### Experiment termination Just call ```bash scripts/dkill.py ``` After killing all involved screen sessions, you will be asked about whether you also want to delete the "killfile" stored under the `~/.allenact` directory (which might be your preferred option once all processes are terminated). We hope this tutorial will help you start quickly testing new ideas! Even if we've only explored moderates settings of up to 480 experience samplers, you might want to consider some additional changes (like the [choice for the optimizer](https://arxiv.org/abs/2103.07013)) if you plan to run at larger scale. """ ================================================ FILE: projects/tutorials/gym_mujoco_tutorial.py ================================================ # literate: tutorials/gym-mujoco-tutorial.md # %% """# Tutorial: OpenAI gym MuJoCo environment.""" # %% """ **Note** The provided commands to execute in this tutorial assume you have [installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the `gym_plugin`. The latter can be installed by ```bash pip install -r allenact_plugins/gym_plugin/extra_requirements.txt ``` The environments for this tutorial use [MuJoCo](http://www.mujoco.org/)(**Mu**lti-**Jo**int dynamics in **Co**ntact) physics simulator, which is also required to be installed properly with instructions [here](https://github.com/openai/mujoco-py). ## The task For this tutorial, we'll focus on one of the continuous-control environments under the `mujoco` group of `gym` environments: [Ant-v2](https://gym.openai.com/envs/Ant-v2/). In this task, the goal is to make a four-legged creature, "ant", walk forward as fast as possible. A random agent of "Ant-v2" is shown below. ![The Ant-v2 task](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_random.gif). To achieve the goal, we need to provide continuous control for the agent moving forward with four legs with the `x` velocity as high as possible for at most 1000 episodes steps. The agent is failed, or done, if the `z` position is out of the range [0.2, 1.0]. The dimension of the action space is 8 and 111 for the dimension of the observation space that maps to different body parts, including 3D position `(x,y,z)`, orientation(quaternion `x`,`y`,`z`,`w`) of the torso, and the joint angles, 3D velocity `(x,y,z)`, 3D angular velocity `(x,y,z)`, and joint velocities. The rewards for the agent "ant" are composed of the forward rewards, healthy rewards, control cost, and contact cost. ## Implementation For this tutorial, we'll use the readily available `gym_plugin`, which includes a [wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a [task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and [task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a [sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym` environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). The experiment config, similar to the one used for the [Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows: """ # %% from typing import Dict, Optional, List, Any, cast import gym import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.ppo import PPO from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymMuJoCoSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from allenact.utils.viz_utils import VizSuite, AgentViewViz class HandManipulateTutorialExperimentConfig(ExperimentConfig): @classmethod def tag(cls) -> str: return "GymMuJoCoTutorial" # %% """ ### Sensors and Model As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide full observations from the state of the `gym` environment to our model. """ # %% SENSORS = [ GymMuJoCoSensor("Ant-v2", uuid="gym_mujoco_data"), ] # %% """ We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a [Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions. """ # %% @classmethod def create_model(cls, **kwargs) -> nn.Module: """We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, MemorylessActorCritic. Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a Gaussian distribution to sample actions. """ return MemorylessActorCritic( input_uuid="gym_mujoco_data", action_space=gym.spaces.Box( -3.0, 3.0, (8,), "float32" ), # 8 actors, each in the range [-3.0, 3.0] observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) # %% """ ### Task samplers We use an available `TaskSampler` implementation for `gym` environments that allows to sample [GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask): [GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created above, which contain a custom identifier for the actual observation space (`gym_mujoco_data`) also used by the model. """ # %% @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(gym_env_type="Ant-v2", **kwargs) # %% """ For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three modes, `train, valid, test`: """ # %% def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="train", seeds=seeds ) def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="valid", seeds=seeds ) def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds) # %% """ Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while, during testing (or validation), we sample a fixed number of tasks. """ # %% def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 4 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["Ant-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) # %% """ Note that we just sample 4 tasks for validation and testing in this case, which suffice to illustrate the model's success. ### Machine parameters In this tutorial, we just train the model on the CPU. We allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode. """ # %% @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: visualizer = None if mode == "test": visualizer = VizSuite( mode=mode, video_viz=AgentViewViz( label="episode_vid", max_clip_length=400, vector_task_source=("render", {"mode": "rgb_array"}), fps=30, ), ) return { "nprocesses": 8 if mode == "train" else 1, # rollout "devices": [], "visualizer": visualizer, } # %% """ ### Training pipeline The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate and 10 single-batch update repeats per rollout. The reward should exceed 4,000 in 20M steps in the test. In order to make the "ant" run with an obvious fast speed, we train the agents using PPO with 3e7 steps. """ # %% @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: lr = 3e-4 ppo_steps = int(3e7) clip_param = 0.2 value_loss_coef = 0.5 entropy_coef = 0.0 num_mini_batch = 4 # optimal 64 update_repeats = 10 max_grad_norm = 0.5 num_steps = 2048 gamma = 0.99 use_gae = True gae_lambda = 0.95 advance_scene_rollout_period = None save_interval = 200000 metric_accumulate_interval = 50000 return TrainingPipeline( named_losses=dict( ppo_loss=PPO( clip_param=clip_param, value_loss_coef=value_loss_coef, entropy_coef=entropy_coef, ), ), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps), ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=advance_scene_rollout_period, save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)}, ), ) # %% """ ## Training and validation We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_mujoco_tutorial.py`. To start training from scratch, we just need to invoke ```bash PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_mujoco_output -s 0 -e ``` from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the [Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters. If we have Tensorboard installed, we can track progress with ```bash tensorboard --logdir /PATH/TO/gym_mujoco_output ``` which will default to the URL [http://localhost:6006/](http://localhost:6006/). After 30,000,000 steps, the script will terminate. If everything went well, the `valid` success rate should be 1 and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a little below 1,000. ## Testing The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the subfolders in the path to the checkpoints, saved under the output folder. In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option: ```bash PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/gym_mujoco_output \ -s 0 \ -e \ --eval \ --checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE ``` If everything went well, the `test` success rate should converge to 1, the `test` success rate should be 1 and the mean reward to above 4,000 in 20,000,000 steps, while the average episode length should stay or a little below 1,000. The `gif` results can be seen in the image tab of Tensorboard while testing. The output should be something like this: ![results](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.png). And the `gif` results can be seen in the image tab of Tensorboard while testing. ![mp4 demo](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/gym-mujoco/ant_test.gif) If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display available: ```bash DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_mujoco_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/gym_mujoco_output \ -s 0 \ -e \ --eval \ --checkpoint /PATH/TO/gym_mujoco_output/checkpoints/GymMuJoCoTutorial/YOUR_START_DATE ``` """ ================================================ FILE: projects/tutorials/gym_tutorial.py ================================================ # literate: tutorials/gym-tutorial.md # %% """# Tutorial: OpenAI gym for continuous control.""" # %% """ **Note** The provided commands to execute in this tutorial assume you have [installed the full library](../installation/installation-allenact.md#full-library) and the requirements for the `gym_plugin`. The latter can be installed by ```bash pip install -r allenact_plugins/gym_plugin/extra_requirements.txt ``` In this tutorial, we: 1. Introduce the `gym_plugin`, which enables some of the tasks in [OpenAI's gym](https://gym.openai.com/) for training and inference within AllenAct. 1. Show an example of continuous control with an arbitrary action space covering 2 policies for one of the `gym` tasks. ## The task For this tutorial, we'll focus on one of the continuous-control environments under the `Box2D` group of `gym` environments: [LunarLanderContinuous-v2](https://gym.openai.com/envs/LunarLanderContinuous-v2/). In this task, the goal is to smoothly land a lunar module in a landing pad, as shown below. ![The LunarLanderContinuous-v2 task](../img/lunar_lander_continuous_demo.png). To achieve this goal, we need to provide continuous control for a main engine and directional one (2 real values). In order to solve the task, the expected reward is of at least 200 points. The controls for main and directional engines are both in the range [-1.0, 1.0] and the observation space is composed of 8 scalars indicating `x` and `y` positions, `x` and `y` velocities, lander angle and angular velocity, and left and right ground contact. Note that these 8 scalars provide a full observation of the state. ## Implementation For this tutorial, we'll use the readily available `gym_plugin`, which includes a [wrapper for `gym` environments](../api/allenact_plugins/gym_plugin/gym_environment.md#gymenvironment), a [task sampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler) and [task definition](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymcontinuousbox2dtask), a [sensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to wrap the observations provided by the `gym` environment, and a simple [model](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). The experiment config, similar to the one used for the [Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md), is defined as follows: """ # %% from typing import Dict, Optional, List, Any, cast import gym import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.ppo import PPO from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact_plugins.gym_plugin.gym_models import MemorylessActorCritic from allenact_plugins.gym_plugin.gym_sensors import GymBox2DSensor from allenact_plugins.gym_plugin.gym_tasks import GymTaskSampler from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from allenact.utils.viz_utils import VizSuite, AgentViewViz class GymTutorialExperimentConfig(ExperimentConfig): @classmethod def tag(cls) -> str: return "GymTutorial" # %% """ ### Sensors and Model As mentioned above, we'll use a [GymBox2DSensor](../api/allenact_plugins/gym_plugin/gym_sensors.md#gymbox2dsensor) to provide full observations from the state of the `gym` environment to our model. """ # %% SENSORS = [ GymBox2DSensor("LunarLanderContinuous-v2", uuid="gym_box_data"), ] # %% """ We define our `ActorCriticModel` agent using a lightweight implementation with separate MLPs for actors and critic, [MemorylessActorCritic](../api/allenact_plugins/gym_plugin/gym_models.md#memorylessactorcritic). Since this is a model for continuous control, note that the superclass of our model is `ActorCriticModel[GaussianDistr]` instead of `ActorCriticModel[CategoricalDistr]`, since we'll use a [Gaussian distribution](../api/allenact_plugins/gym_plugin/gym_distributions.md#gaussiandistr) to sample actions. """ # %% @classmethod def create_model(cls, **kwargs) -> nn.Module: return MemorylessActorCritic( input_uuid="gym_box_data", action_space=gym.spaces.Box( -1.0, 1.0, (2,) ), # 2 actors, each in the range [-1.0, 1.0] observation_space=SensorSuite(cls.SENSORS).observation_spaces, action_std=0.5, ) # %% """ ### Task samplers We use an available `TaskSampler` implementation for `gym` environments that allows to sample [GymTasks](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtask): [GymTaskSampler](../api/allenact_plugins/gym_plugin/gym_tasks.md#gymtasksampler). Even though it is possible to let the task sampler instantiate the proper sensor for the chosen task name (by passing `None`), we use the sensors we created above, which contain a custom identifier for the actual observation space (`gym_box_data`) also used by the model. """ # %% @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return GymTaskSampler(**kwargs) # %% """ For convenience, we will use a `_get_sampler_args` method to generate the task sampler arguments for all three modes, `train, valid, test`: """ # %% def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="train", seeds=seeds ) def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args( process_ind=process_ind, mode="valid", seeds=seeds ) def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test", seeds=seeds) # %% """ Similarly to what we do in the Minigrid navigation tutorial, the task sampler samples random tasks for ever, while, during testing (or validation), we sample a fixed number of tasks. """ # %% def _get_sampler_args( self, process_ind: int, mode: str, seeds: List[int] ) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 3 # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( gym_env_types=["LunarLanderContinuous-v2"], sensors=self.SENSORS, # sensors used to return observations to the agent max_tasks=max_tasks, # see above task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above seed=seeds[process_ind], ) # %% """ Note that we just sample 3 tasks for validation and testing in this case, which suffice to illustrate the model's success. ### Machine parameters Given the simplicity of the task and model, we can just train the model on the CPU. During training, success should reach 100% in less than 10 minutes, whereas solving the task (evaluation reward > 200) might take about 20 minutes (on a laptop CPU). We allocate a larger number of samplers for training (8) than for validation or testing (just 1), and we default to CPU usage by returning an empty list of `devices`. We also include a video visualizer (`AgentViewViz`) in test mode. """ # %% @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: visualizer = None if mode == "test": visualizer = VizSuite( mode=mode, video_viz=AgentViewViz( label="episode_vid", max_clip_length=400, vector_task_source=("render", {"mode": "rgb_array"}), fps=30, ), ) return { "nprocesses": 8 if mode == "train" else 1, "devices": [], "visualizer": visualizer, } # %% """ ### Training pipeline The last definition is the training pipeline. In this case, we use a PPO stage with linearly decaying learning rate and 80 single-batch update repeats per rollout: """ # %% @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(1.2e6) return TrainingPipeline( named_losses=dict( ppo_loss=PPO( clip_param=0.2, value_loss_coef=0.5, entropy_coef=0.0, ), ), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps), ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-3)), num_mini_batch=1, update_repeats=80, max_grad_norm=100, num_steps=2000, gamma=0.99, use_gae=False, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=200000, metric_accumulate_interval=50000, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}, # type:ignore ), ) # %% """ ## Training and validation We have a complete implementation of this experiment's configuration class in `projects/tutorials/gym_tutorial.py`. To start training from scratch, we just need to invoke ```bash PYTHONPATH=. python allenact/main.py gym_tutorial -b projects/tutorials -m 8 -o /PATH/TO/gym_output -s 54321 -e ``` from the `allenact` root directory. Note that we include `-e` to enforce deterministic evaluation. Please refer to the [Navigation in MiniGrid tutorial](../tutorials/minigrid-tutorial.md) if in doubt of the meaning of the rest of parameters. If we have Tensorboard installed, we can track progress with ```bash tensorboard --logdir /PATH/TO/gym_output ``` which will default to the URL [http://localhost:6006/](http://localhost:6006/). After 1,200,000 steps, the script will terminate. If everything went well, the `valid` success rate should quickly converge to 1 and the mean reward to above 250, while the average episode length should stay below or near 300. ## Testing The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the subfolders in the path to the checkpoints, saved under the output folder. In order to evaluate (i.e. test) a collection of checkpoints, we need to pass the `--eval` flag and specify the directory containing the checkpoints with the `--checkpoint CHECKPOINT_DIR` option: ```bash PYTHONPATH=. python allenact/main.py gym_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/gym_output \ -s 54321 \ -e \ --eval \ --checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \ --approx_ckpt_step_interval 800000 # Skip some checkpoints ``` The option `--approx_ckpt_step_interval 800000` tells AllenAct that we only want to evaluate checkpoints which were saved every ~800000 steps, this lets us avoid evaluating every saved checkpoint. If everything went well, the `test` success rate should converge to 1, the episode length below or near 300 steps, and the mean reward to above 250. The images tab in tensorboard will contain videos for the sampled test episodes. ![video_results](../img/lunar_lander_continuous_test.png). If the test command fails with `pyglet.canvas.xlib.NoSuchDisplayException: Cannot connect to "None"`, e.g. when running remotely, try prepending `DISPLAY=:0.0` to the command above, assuming you have an xserver running with such display available: ```bash DISPLAY=:0.0 PYTHONPATH=. python allenact/main.py gym_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/gym_output \ -s 54321 \ -e \ --eval \ --checkpoint /PATH/TO/gym_output/checkpoints/GymTutorial/YOUR_START_DATE \ --approx_ckpt_step_interval 800000 ``` """ ================================================ FILE: projects/tutorials/minigrid_offpolicy_tutorial.py ================================================ # literate: tutorials/offpolicy-tutorial.md # %% """# Tutorial: Off-policy training.""" # %% """ **Note** The provided commands to execute in this tutorial assume you have [installed the full library](../installation/installation-allenact.md#full-library) and the `extra_requirements` for the `babyai_plugin` and `minigrid_plugin`. The latter can be installed with: ```bash pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt; pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt ``` In this tutorial we'll learn how to train an agent from an external dataset by imitating expert actions via Behavior Cloning. We'll use a [BabyAI agent](/api/allenact_plugins/babyai_plugin/babyai_models#BabyAIRecurrentACModel) to solve `GoToLocal` tasks on [MiniGrid](https://github.com/maximecb/gym-minigrid); see the `projects/babyai_baselines/experiments/go_to_local` directory for more details. This tutorial assumes `AllenAct`'s [abstractions](../getting_started/abstractions.md) are known. ## The task In a `GoToLocal` task, the agent immersed in a grid world has to navigate to a specific object in the presence of multiple distractors, requiring the agent to understand `go to` instructions like "go to the red ball". For further details, please consult the [original paper](https://arxiv.org/abs/1810.08272). ## Getting the dataset We will use a large dataset (**more than 4 GB**) including expert demonstrations for `GoToLocal` tasks. To download the data we'll run ```bash PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/download_babyai_expert_demos.py GoToLocal ``` from the project's root directory, which will download `BabyAI-GoToLocal-v0.pkl` and `BabyAI-GoToLocal-v0_valid.pkl` to the `allenact_plugins/babyai_plugin/data/demos` directory. We will also generate small versions of the datasets, which will be useful if running on CPU, by calling ```bash PYTHONPATH=. python allenact_plugins/babyai_plugin/scripts/truncate_expert_demos.py ``` from the project's root directory, which will generate `BabyAI-GoToLocal-v0-small.pkl` under the same `allenact_plugins/babyai_plugin/data/demos` directory. ## Data storage In order to train with an off-policy dataset, we need to define an `ExperienceStorage`. In AllenAct, an `ExperienceStorage` object has two primary functions: 1. It stores/manages relevant data (e.g. similarly to the `Dataset` class in PyTorch). 2. It loads stored data into batches that will be used for loss computation (e.g. similarly to the `Dataloader` class in PyTorch). Unlike a PyTorch `Dataset` however, an `ExperienceStorage` object can build its dataset **at runtime** by processing rollouts from the agent. This flexibility allows for us to, for exmaple, implement the experience replay datastructure used in deep Q-learning. For this tutorial we won't need this additional functionality as our off-policy dataset is a fixed collection of expert trajectories. An example of a `ExperienceStorage` for BabyAI expert demos might look as follows: """ # %% import_summary allenact_plugins.minigrid_plugin.minigrid_offpolicy.MiniGridExpertTrajectoryStorage # %% """ A complete example can be found in [MiniGridExpertTrajectoryStorage](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridExpertTrajectoryStorage). ## Loss function Off-policy losses must implement the [`GenericAbstractLoss`](/api/allenact/base_abstractions/misc/#genericabstractloss) interface. In this case, we minimize the cross-entropy between the actor's policy and the expert action: """ # %% import allenact_plugins.minigrid_plugin.minigrid_offpolicy.MiniGridOffPolicyExpertCELoss # %% """ A complete example can be found in [MiniGridOffPolicyExpertCELoss](/api/allenact_plugins/minigrid_plugin/minigrid_offpolicy#MiniGridOffPolicyExpertCELoss). Note that in this case we train the entire actor, but it would also be possible to forward data through a different subgraph of the ActorCriticModel. ## Experiment configuration For the experiment configuration, we'll build on top of an existing [base BabyAI GoToLocal Experiment Config](/api/projects/babyai_baselines/experiments/go_to_local/base/#basebabyaigotolocalexperimentconfig). The complete `ExperimentConfig` file for off-policy training is [here](/api/projects/tutorials/minigrid_offpolicy_tutorial/#bcoffpolicybabyaigotolocalexperimentconfig), but let's focus on the most relevant aspect to enable this type of training: providing an [OffPolicyPipelineComponent](/api/allenact/utils/experiment_utils/#offpolicypipelinecomponent) object as input to a `PipelineStage` when instantiating the `TrainingPipeline` in the `training_pipeline` method. """ # %% hide import os from typing import Optional, List, Tuple import torch from gym_minigrid.minigrid import MiniGridEnv from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage from allenact.utils.experiment_utils import ( PipelineStage, StageComponent, TrainingSettings, ) from allenact_plugins.babyai_plugin.babyai_constants import ( BABYAI_EXPERT_TRAJECTORIES_DIR, ) from allenact_plugins.minigrid_plugin.minigrid_offpolicy import ( MiniGridOffPolicyExpertCELoss, MiniGridExpertTrajectoryStorage, ) from projects.babyai_baselines.experiments.go_to_local.base import ( BaseBabyAIGoToLocalExperimentConfig, ) # %% class BCOffPolicyBabyAIGoToLocalExperimentConfig(BaseBabyAIGoToLocalExperimentConfig): """BC Off-policy imitation.""" DATASET: Optional[List[Tuple[str, bytes, List[int], MiniGridEnv.Actions]]] = None GPU_ID = 0 if torch.cuda.is_available() else None @classmethod def tag(cls): return "BabyAIGoToLocalBCOffPolicy" @classmethod def METRIC_ACCUMULATE_INTERVAL(cls): # See BaseBabyAIGoToLocalExperimentConfig for how this is used. return 1 @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) num_mini_batch = ppo_info["num_mini_batch"] update_repeats = ppo_info["update_repeats"] # fmt: off return cls._training_pipeline( named_losses={ "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss( total_episodes_in_epoch=int(1e6) ), }, named_storages={ "onpolicy": RolloutBlockStorage(), "minigrid_offpolicy_expert": MiniGridExpertTrajectoryStorage( data_path=os.path.join( BABYAI_EXPERT_TRAJECTORIES_DIR, "BabyAI-GoToLocal-v0{}.pkl".format( "" if torch.cuda.is_available() else "-small" ), ), num_samplers=cls.NUM_TRAIN_SAMPLERS, rollout_len=cls.ROLLOUT_STEPS, instr_len=cls.INSTR_LEN, ), }, pipeline_stages=[ # Single stage, only with off-policy training PipelineStage( loss_names=["offpolicy_expert_ce_loss"], # no on-policy losses max_stage_steps=total_train_steps, # keep sampling episodes in the stage stage_components=[ StageComponent( uuid="offpolicy", storage_uuid="minigrid_offpolicy_expert", loss_names=["offpolicy_expert_ce_loss"], training_settings=TrainingSettings( update_repeats=num_mini_batch * update_repeats, num_mini_batch=1, ) ) ], ), ], # As we don't have any on-policy losses, we set the next # two values to zero to ensure we don't attempt to # compute gradients for on-policy rollouts: num_mini_batch=0, update_repeats=0, total_train_steps=total_train_steps, ) # fmt: on # %% """ You'll have noted that it is possible to combine on-policy and off-policy training in the same stage, even though here we apply pure off-policy training. ## Training We recommend using a machine with a CUDA-capable GPU for this experiment. In order to start training, we just need to invoke ```bash PYTHONPATH=. python allenact/main.py -b projects/tutorials minigrid_offpolicy_tutorial -m 8 -o ``` Note that with the `-m 8` option we limit to 8 the number of on-policy task sampling processes used between off-policy updates. If everything goes well, the training success should quickly reach values around 0.7-0.8 on GPU and converge to values close to 1 if given sufficient time to train. If running tensorboard, you'll notice a separate group of scalars named `train-offpolicy-losses` and `train-offpolicy-misc` with losses, approximate "experiences per second" (i.e. the number of off-policy experiences/steps being used to update the model per second), and other tracked values in addition to the standard `train-onpolicy-*` used for on-policy training. In the `train-metrics` and `train-misc` sections you'll find the metrics quantifying the performance of the agent throughout training and some other plots showing training details. *Note that the x-axis for these plots is different than for the `train-offpolicy-*` sections*. This is because these plots use the number of rollout steps as the x-axis (i.e. steps that the trained agent takes interactively) while the `train-offpolicy-*` plots uses the number of offpolicy "experiences" that have been shown to the agent. A view of the training progress about 5 hours after starting on a CUDA-capable GPU should look similar to the below (note that training reached >99% success after about 50 minutes). ![off-policy progress](https://ai2-prior-allenact-public-assets.s3.us-west-2.amazonaws.com/tutorials/minigrid-offpolicy/minigrid-offpolicy-tutorial-tb.png) """ ================================================ FILE: projects/tutorials/minigrid_tutorial.py ================================================ # literate: tutorials/minigrid-tutorial.md # %% """# Tutorial: Navigation in MiniGrid.""" # %% """ In this tutorial, we will train an agent to complete the `MiniGrid-Empty-Random-5x5-v0` task within the [MiniGrid](https://github.com/maximecb/gym-minigrid) environment. We will demonstrate how to: * Write an experiment configuration file with a simple training pipeline from scratch. * Use one of the supported environments with minimal user effort. * Train, validate and test your experiment from the command line. This tutorial assumes the [installation instructions](../installation/installation-allenact.md) have already been followed and that, to some extent, this framework's [abstractions](../getting_started/abstractions.md) are known. The `extra_requirements` for `minigrid_plugin` and `babyai_plugin` can be installed with. ```bash pip install -r allenact_plugins/minigrid_plugin/extra_requirements.txt; pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt ``` ## The task A `MiniGrid-Empty-Random-5x5-v0` task consists of a grid of dimensions 5x5 where an agent spawned at a random location and orientation has to navigate to the visitable bottom right corner cell of the grid by sequences of three possible actions (rotate left/right and move forward). A visualization of the environment with expert steps in a random `MiniGrid-Empty-Random-5x5-v0` task looks like ![MiniGridEmptyRandom5x5 task example](../img/minigrid_environment.png) The observation for the agent is a subset of the entire grid, simulating a simplified limited field of view, as depicted by the highlighted rectangle (observed subset of the grid) around the agent (red arrow). Gray cells correspond to walls. ## Experiment configuration file Our complete experiment consists of: * Training a basic actor-critic agent with memory to solve randomly sampled navigation tasks. * Validation on a fixed set of tasks (running in parallel with training). * A second stage where we test saved checkpoints with a larger fixed set of tasks. The entire configuration for the experiment, including training, validation, and testing, is encapsulated in a single class implementing the `ExperimentConfig` abstraction. For this tutorial, we will follow the config under `projects/tutorials/minigrid_tutorial.py`. The `ExperimentConfig` abstraction is used by the [OnPolicyTrainer](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicytrainer) class (for training) and the [OnPolicyInference](../api/allenact/algorithms/onpolicy_sync/engine.md#onpolicyinference) class (for validation and testing) invoked through the entry script `main.py` that calls an orchestrating [OnPolicyRunner](../api/allenact/algorithms/onpolicy_sync/runner.md#onpolicyrunner) class. It includes: * A `tag` method to identify the experiment. * A `create_model` method to instantiate actor-critic models. * A `make_sampler_fn` method to instantiate task samplers. * Three `{train,valid,test}_task_sampler_args` methods describing initialization parameters for task samplers used in training, validation, and testing; including assignment of workers to devices for simulation. * A `machine_params` method with configuration parameters that will be used for training, validation, and testing. * A `training_pipeline` method describing a possibly multi-staged training pipeline with different types of losses, an optimizer, and other parameters like learning rates, batch sizes, etc. ### Preliminaries We first import everything we'll need to define our experiment. """ # %% from typing import Dict, Optional, List, Any, cast import gym from gym_minigrid.envs import EmptyRandomEnv5x5 import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler from allenact.base_abstractions.sensor import SensorSuite from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvRNN from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor from allenact_plugins.minigrid_plugin.minigrid_tasks import ( MiniGridTaskSampler, MiniGridTask, ) # %% """ We now create the `MiniGridTutorialExperimentConfig` class which we will use to define our experiment. For pedagogical reasons, we will add methods to this class one at a time below with a description of what these classes do. """ # %% class MiniGridTutorialExperimentConfig(ExperimentConfig): # %% """An experiment is identified by a `tag`.""" # %% @classmethod def tag(cls) -> str: return "MiniGridTutorial" # %% """ ### Sensors and Model A readily available Sensor type for MiniGrid, [EgocentricMiniGridSensor](../api/allenact_plugins/minigrid_plugin/minigrid_sensors.md#egocentricminigridsensor), allows us to extract observations in a format consumable by an `ActorCriticModel` agent: """ # %% SENSORS = [ EgocentricMiniGridSensor(agent_view_size=5, view_channels=3), ] # %% """ The three `view_channels` include objects, colors and states corresponding to a partial observation of the environment as an image tensor, equivalent to that from `ImgObsWrapper` in [MiniGrid](https://github.com/maximecb/gym-minigrid#wrappers). The relatively large `agent_view_size` means the view will only be clipped by the environment walls in the forward and lateral directions with respect to the agent's orientation. We define our `ActorCriticModel` agent using a lightweight implementation with recurrent memory for MiniGrid environments, [MiniGridSimpleConvRNN](../api/allenact_plugins/minigrid_plugin/minigrid_models.md#minigridsimpleconvrnn): """ # %% @classmethod def create_model(cls, **kwargs) -> nn.Module: return MiniGridSimpleConvRNN( action_space=gym.spaces.Discrete(len(MiniGridTask.class_action_names())), observation_space=SensorSuite(cls.SENSORS).observation_spaces, num_objects=cls.SENSORS[0].num_objects, num_colors=cls.SENSORS[0].num_colors, num_states=cls.SENSORS[0].num_states, ) # %% """ ### Task samplers We use an available TaskSampler implementation for MiniGrid environments that allows to sample both random and deterministic `MiniGridTasks`, [MiniGridTaskSampler](../api/allenact_plugins/minigrid_plugin/minigrid_tasks.md#minigridtasksampler): """ # %% @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return MiniGridTaskSampler(**kwargs) # %% """ This task sampler will during training (or validation/testing), randomly initialize new tasks for the agent to complete. While it is not quite as important for this task type (as we test our agent in the same setting it is trained on) there are a lot of good reasons we would like to sample tasks differently during training than during validation or testing. One good reason, that is applicable in this tutorial, is that, during training, we would like to be able to sample tasks forever while, during testing, we would like to sample a fixed number of tasks (as otherwise we would never finish testing!). In `allenact` this is made possible by defining different arguments for the task sampler: """ # %% def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="train") def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="valid") def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test") # %% """ where, for convenience, we have defined a `_get_sampler_args` method: """ # %% def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 20 + 20 * (mode == "test") # 20 tasks for valid, 40 for test # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( max_tasks=max_tasks, # see above env_class=self.make_env, # builder for third-party environment (defined below) sensors=self.SENSORS, # sensors used to return observations to the agent env_info=dict(), # parameters for environment builder (none for now) task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above ) @staticmethod def make_env(*args, **kwargs): return EmptyRandomEnv5x5() # %% """ Note that the `env_class` argument to the Task Sampler is the one determining which task type we are going to train the model for (in this case, `MiniGrid-Empty-Random-5x5-v0` from [gym-minigrid](https://github.com/maximecb/gym-minigrid#empty-environment)) . The sparse reward is [given by the environment](https://github.com/maximecb/gym-minigrid/blob/6e22a44dc67414b647063692258a4f95ce789161/gym_minigrid/minigrid.py#L819) , and the maximum task length is 100. For training, we opt for a default random sampling, whereas for validation and test we define fixed sets of randomly sampled tasks without needing to explicitly define a dataset. In this toy example, the maximum number of different tasks is 32. For validation we sample 320 tasks using 16 samplers, or 640 for testing, so we can be fairly sure that all possible tasks are visited at least once during evaluation. ### Machine parameters Given the simplicity of the task and model, we can quickly train the model on the CPU: """ # %% @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: return { "nprocesses": 128 if mode == "train" else 16, "devices": [], } # %% """ We allocate a larger number of samplers for training (128) than for validation or testing (16), and we default to CPU usage by returning an empty list of `devices`. ### Training pipeline The last definition required before starting to train is a training pipeline. In this case, we just use a single PPO stage with linearly decaying learning rate: """ # %% @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(150000) return TrainingPipeline( named_losses=dict(ppo_loss=PPO(**PPOConfig)), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)), num_mini_batch=4, update_repeats=3, max_grad_norm=0.5, num_steps=16, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=10000, metric_accumulate_interval=1, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore ), ) # %% """ You can see that we use a `Builder` class to postpone the construction of some of the elements, like the optimizer, for which the model weights need to be known. ## Training and validation We have a complete implementation of this experiment's configuration class in `projects/tutorials/minigrid_tutorial.py`. To start training from scratch, we just need to invoke ```bash PYTHONPATH=. python allenact/main.py minigrid_tutorial -b projects/tutorials -m 8 -o /PATH/TO/minigrid_output -s 12345 ``` from the `allenact` root directory. * With `-b projects/tutorials` we tell `allenact` that `minigrid_tutorial` experiment config file will be found in the `projects/tutorials` directory. * With `-m 8` we limit the number of subprocesses to 8 (each subprocess will run 16 of the 128 training task samplers). * With `-o minigrid_output` we set the output folder into which results and logs will be saved. * With `-s 12345` we set the random seed. If we have Tensorboard installed, we can track progress with ```bash tensorboard --logdir /PATH/TO/minigrid_output ``` which will default to the URL [http://localhost:6006/](http://localhost:6006/). After 150,000 steps, the script will terminate and several checkpoints will be saved in the output folder. The training curves should look similar to: ![training curves](../img/minigrid_train.png) If everything went well, the `valid` success rate should converge to 1 and the mean episode length to a value below 4. (For perfectly uniform sampling and complete observation, the expectation for the optimal policy is 3.75 steps.) In the not-so-unlikely event of the run failing to converge to a near-optimal policy, we can just try to re-run (for example with a different random seed). The validation curves should look similar to: ![validation curves](../img/minigrid_valid.png) ## Testing The training start date for the experiment, in `YYYY-MM-DD_HH-MM-SS` format, is used as the name of one of the subfolders in the path to the checkpoints, saved under the output folder. In order to evaluate (i.e. test) a particular checkpoint, we need to pass the `--eval` flag and specify the checkpoint with the `--checkpoint CHECKPOINT_PATH` option: ```bash PYTHONPATH=. python allenact/main.py minigrid_tutorial \ -b projects/tutorials \ -m 1 \ -o /PATH/TO/minigrid_output \ -s 12345 \ --eval \ --checkpoint /PATH/TO/minigrid_output/checkpoints/MiniGridTutorial/YOUR_START_DATE/exp_MiniGridTutorial__stage_00__steps_000000151552.pt ``` Again, if everything went well, the `test` success rate should converge to 1 and the mean episode length to a value below 4. Detailed results are saved under a `metrics` subfolder in the output folder. The test curves should look similar to: ![test curves](../img/minigrid_test.png) """ ================================================ FILE: projects/tutorials/minigrid_tutorial_conds.py ================================================ from typing import Dict, Optional, List, Any, cast, Callable, Union, Tuple import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim from gym_minigrid.envs import EmptyRandomEnv5x5 from gym_minigrid.minigrid import MiniGridEnv from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig from allenact.algorithms.onpolicy_sync.policy import ActorCriticModel, DistributionType from allenact.base_abstractions.distributions import ( CategoricalDistr, ConditionalDistr, SequentialDistr, ) from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler from allenact.base_abstractions.misc import ActorCriticOutput, Memory, RLStepResult from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor from allenact.embodiedai.models.basic_models import RNNStateEncoder from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from allenact.utils.misc_utils import prepare_locals_for_super from allenact_plugins.minigrid_plugin.minigrid_models import MiniGridSimpleConvBase from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor from allenact_plugins.minigrid_plugin.minigrid_tasks import ( MiniGridTaskSampler, MiniGridTask, ) class ConditionedLinearActorCriticHead(nn.Module): def __init__( self, input_size: int, master_actions: int = 2, subpolicy_actions: int = 2 ): super().__init__() self.input_size = input_size self.master_and_critic = nn.Linear(input_size, master_actions + 1) self.embed_higher = nn.Embedding(num_embeddings=2, embedding_dim=input_size) self.actor = nn.Linear(2 * input_size, subpolicy_actions) nn.init.orthogonal_(self.master_and_critic.weight) nn.init.constant_(self.master_and_critic.bias, 0) nn.init.orthogonal_(self.actor.weight) nn.init.constant_(self.actor.bias, 0) def lower_policy(self, *args, **kwargs): assert "higher" in kwargs assert "state_embedding" in kwargs emb = self.embed_higher(kwargs["higher"]) logits = self.actor(torch.cat([emb, kwargs["state_embedding"]], dim=-1)) return CategoricalDistr(logits=logits) def forward(self, x): out = self.master_and_critic(x) master_logits = out[..., :-1] values = out[..., -1:] # noinspection PyArgumentList cond1 = ConditionalDistr( distr_conditioned_on_input_fn_or_instance=CategoricalDistr( logits=master_logits ), action_group_name="higher", ) cond2 = ConditionalDistr( distr_conditioned_on_input_fn_or_instance=lambda *args, **kwargs: ConditionedLinearActorCriticHead.lower_policy( self, *args, **kwargs ), action_group_name="lower", state_embedding=x, ) return ( SequentialDistr(cond1, cond2), values.view(*values.shape[:2], -1), # [steps, samplers, flattened] ) class ConditionedLinearActorCritic(ActorCriticModel[SequentialDistr]): def __init__( self, input_uuid: str, action_space: gym.spaces.Dict, observation_space: gym.spaces.Dict, ): super().__init__(action_space=action_space, observation_space=observation_space) assert ( input_uuid in observation_space.spaces ), "ConditionedLinearActorCritic expects only a single observational input." self.input_uuid = input_uuid box_space: gym.spaces.Box = observation_space[self.input_uuid] assert isinstance(box_space, gym.spaces.Box), ( "ConditionedLinearActorCritic requires that" "observation space corresponding to the input uuid is a Box space." ) assert len(box_space.shape) == 1 self.in_dim = box_space.shape[0] self.head = ConditionedLinearActorCriticHead( input_size=self.in_dim, master_actions=action_space["higher"].n, subpolicy_actions=action_space["lower"].n, ) # noinspection PyMethodMayBeStatic def _recurrent_memory_specification(self): return None def forward(self, observations, memory, prev_actions, masks): dists, values = self.head(observations[self.input_uuid]) # noinspection PyArgumentList return ( ActorCriticOutput( distributions=dists, values=values, extras={}, ), None, ) class ConditionedRNNActorCritic(ActorCriticModel[SequentialDistr]): def __init__( self, input_uuid: str, action_space: gym.spaces.Dict, observation_space: gym.spaces.Dict, hidden_size: int = 128, num_layers: int = 1, rnn_type: str = "GRU", head_type: Callable[ ..., ActorCriticModel[SequentialDistr] ] = ConditionedLinearActorCritic, ): super().__init__(action_space=action_space, observation_space=observation_space) self.hidden_size = hidden_size self.rnn_type = rnn_type assert ( input_uuid in observation_space.spaces ), "LinearActorCritic expects only a single observational input." self.input_uuid = input_uuid box_space: gym.spaces.Box = observation_space[self.input_uuid] assert isinstance(box_space, gym.spaces.Box), ( "RNNActorCritic requires that" "observation space corresponding to the input uuid is a Box space." ) assert len(box_space.shape) == 1 self.in_dim = box_space.shape[0] self.state_encoder = RNNStateEncoder( input_size=self.in_dim, hidden_size=hidden_size, num_layers=num_layers, rnn_type=rnn_type, trainable_masked_hidden_state=True, ) self.head_uuid = "{}_{}".format("rnn", input_uuid) self.ac_nonrecurrent_head: ActorCriticModel[SequentialDistr] = head_type( input_uuid=self.head_uuid, action_space=action_space, observation_space=gym.spaces.Dict( { self.head_uuid: gym.spaces.Box( low=np.float32(0.0), high=np.float32(1.0), shape=(hidden_size,) ) } ), ) self.memory_key = "rnn" @property def recurrent_hidden_state_size(self) -> int: return self.hidden_size @property def num_recurrent_layers(self) -> int: return self.state_encoder.num_recurrent_layers def _recurrent_memory_specification(self): return { self.memory_key: ( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) } def forward( # type:ignore self, observations: Dict[str, Union[torch.FloatTensor, Dict[str, Any]]], memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: rnn_out, mem_return = self.state_encoder( x=observations[self.input_uuid], hidden_states=memory.tensor(self.memory_key), masks=masks, ) # noinspection PyCallingNonCallable out, _ = self.ac_nonrecurrent_head( observations={self.head_uuid: rnn_out}, memory=None, prev_actions=prev_actions, masks=masks, ) # noinspection PyArgumentList return ( out, memory.set_tensor(self.memory_key, mem_return), ) class ConditionedMiniGridSimpleConvRNN(MiniGridSimpleConvBase): def __init__( self, action_space: gym.spaces.Dict, observation_space: gym.spaces.Dict, num_objects: int, num_colors: int, num_states: int, object_embedding_dim: int = 8, hidden_size=512, num_layers=1, rnn_type="GRU", head_type: Callable[ ..., ActorCriticModel[SequentialDistr] ] = ConditionedLinearActorCritic, **kwargs, ): super().__init__(**prepare_locals_for_super(locals())) self._hidden_size = hidden_size agent_view_x, agent_view_y, view_channels = observation_space[ "minigrid_ego_image" ].shape self.actor_critic = ConditionedRNNActorCritic( input_uuid=self.ac_key, action_space=action_space, observation_space=gym.spaces.Dict( { self.ac_key: gym.spaces.Box( low=np.float32(-1.0), high=np.float32(1.0), shape=( self.object_embedding_dim * agent_view_x * agent_view_y * view_channels, ), ) } ), hidden_size=hidden_size, num_layers=num_layers, rnn_type=rnn_type, head_type=head_type, ) self.memory_key = "rnn" self.train() @property def num_recurrent_layers(self): return self.actor_critic.num_recurrent_layers @property def recurrent_hidden_state_size(self): return self._hidden_size def _recurrent_memory_specification(self): return { self.memory_key: ( ( ("layer", self.num_recurrent_layers), ("sampler", None), ("hidden", self.recurrent_hidden_state_size), ), torch.float32, ) } class ConditionedMiniGridTask(MiniGridTask): _ACTION_NAMES = ("left", "right", "forward", "pickup") _ACTION_IND_TO_MINIGRID_IND = tuple( MiniGridEnv.Actions.__members__[name].value for name in _ACTION_NAMES ) @property def action_space(self) -> gym.spaces.Dict: return gym.spaces.Dict( higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2) ) def _step(self, action: Dict[str, int]) -> RLStepResult: assert len(action) == 2, "got action={}".format(action) minigrid_obs, reward, self._minigrid_done, info = self.env.step( action=( self._ACTION_IND_TO_MINIGRID_IND[action["lower"] + 2 * action["higher"]] ) ) # self.env.render() return RLStepResult( observation=self.get_observations(minigrid_output_obs=minigrid_obs), reward=reward, done=self.is_done(), info=info, ) def query_expert(self, **kwargs) -> Tuple[int, bool]: if kwargs["expert_sensor_group_name"] == "higher": if self._minigrid_done: raise ValueError("Episode is completed, but expert is still queried.") # return 0, False self.cached_expert = super().query_expert(**kwargs) if self.cached_expert[1]: return self.cached_expert[0] // 2, True else: return 0, False else: assert hasattr(self, "cached_expert") if self.cached_expert[1]: res = (self.cached_expert[0] % 2, True) else: res = (0, False) del self.cached_expert return res class MiniGridTutorialExperimentConfig(ExperimentConfig): @classmethod def tag(cls) -> str: return "MiniGridTutorial" SENSORS = [ EgocentricMiniGridSensor(agent_view_size=5, view_channels=3), ExpertActionSensor( action_space=gym.spaces.Dict( higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2) ) ), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: return ConditionedMiniGridSimpleConvRNN( action_space=gym.spaces.Dict( higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2) ), observation_space=SensorSuite(cls.SENSORS).observation_spaces, num_objects=cls.SENSORS[0].num_objects, num_colors=cls.SENSORS[0].num_colors, num_states=cls.SENSORS[0].num_states, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return MiniGridTaskSampler(**kwargs) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="train") def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="valid") def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test") def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 20 + 20 * ( mode == "test" ) # 20 tasks for valid, 40 for test (per sampler) # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( max_tasks=max_tasks, # see above env_class=self.make_env, # builder for third-party environment (defined below) sensors=self.SENSORS, # sensors used to return observations to the agent env_info=dict(), # parameters for environment builder (none for now) task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above task_class=ConditionedMiniGridTask, ) @staticmethod def make_env(*args, **kwargs): return EmptyRandomEnv5x5() @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: return { "nprocesses": 128 if mode == "train" else 16, "devices": [], } @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(150000) return TrainingPipeline( named_losses=dict( imitation_loss=Imitation( cls.SENSORS[1] ), # 0 is Minigrid, 1 is ExpertActionSensor ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"), ), # type:ignore pipeline_stages=[ PipelineStage( teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=ppo_steps // 2, ), loss_names=["imitation_loss", "ppo_loss"], max_stage_steps=ppo_steps, ) ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)), num_mini_batch=4, update_repeats=3, max_grad_norm=0.5, num_steps=16, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=10000, metric_accumulate_interval=1, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore ), ) ================================================ FILE: projects/tutorials/navtopartner_robothor_rgb_ppo.py ================================================ from math import ceil from typing import Dict, Any, List, Optional import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig from allenact.base_abstractions.sensor import SensorSuite from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, LinearDecay, ) from allenact.utils.multi_agent_viz_utils import MultiTrajectoryViz from allenact.utils.viz_utils import VizSuite, AgentViewViz from allenact_plugins.robothor_plugin.robothor_models import ( NavToPartnerActorCriticSimpleConvRNN, ) from allenact_plugins.robothor_plugin.robothor_sensors import RGBSensorMultiRoboThor from allenact_plugins.robothor_plugin.robothor_task_samplers import ( NavToPartnerTaskSampler, ) from allenact_plugins.robothor_plugin.robothor_tasks import NavToPartnerTask from allenact_plugins.robothor_plugin.robothor_viz import ThorMultiViz class NavToPartnerRoboThorRGBPPOExperimentConfig(ExperimentConfig): """A Multi-Agent Navigation experiment configuration in RoboThor.""" # Task Parameters MAX_STEPS = 500 REWARD_CONFIG = { "step_penalty": -0.01, "max_success_distance": 0.75, "success_reward": 5.0, } # Simulator Parameters CAMERA_WIDTH = 300 CAMERA_HEIGHT = 300 SCREEN_SIZE = 224 # Training Engine Parameters ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None NUM_PROCESSES = 20 TRAINING_GPUS: List[int] = [0] VALIDATION_GPUS: List[int] = [0] TESTING_GPUS: List[int] = [0] SENSORS = [ RGBSensorMultiRoboThor( agent_count=2, height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb", ), ] OBSERVATIONS = [ "rgb", ] ENV_ARGS = dict( width=CAMERA_WIDTH, height=CAMERA_HEIGHT, rotateStepDegrees=30.0, visibilityDistance=1.0, gridSize=0.25, agentCount=2, ) @classmethod def tag(cls): return "NavToPartnerRobothorRGBPPO" @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(1000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 200000 log_interval = 1 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) def split_num_processes(self, ndevices): assert self.NUM_PROCESSES >= ndevices, "NUM_PROCESSES {} < ndevices {}".format( self.NUM_PROCESSES, ndevices ) res = [0] * ndevices for it in range(self.NUM_PROCESSES): res[it % ndevices] += 1 return res viz: Optional[VizSuite] = None def get_viz(self, mode): if self.viz is not None: return self.viz self.viz = VizSuite( mode=mode, # Basic 2D trajectory visualizer (task output source): base_trajectory=MultiTrajectoryViz(), # plt_colormaps=["cool", "cool"]), # Egocentric view visualizer (vector task source): egeocentric=AgentViewViz(max_video_length=100, max_episodes_in_group=1), # Specialized 2D trajectory visualizer (task output source): thor_trajectory=ThorMultiViz( figsize=(16, 8), viz_rows_cols=(448, 448), scenes=("FloorPlan_Train{}_{}", 1, 1, 1, 1), ), ) return self.viz def machine_params(self, mode="train", **kwargs): visualizer = None if mode == "train": devices = ( ["cpu"] if not torch.cuda.is_available() else list(self.TRAINING_GPUS) ) nprocesses = ( 4 if not torch.cuda.is_available() else self.split_num_processes(len(devices)) ) elif mode == "valid": nprocesses = 0 devices = ["cpu"] if not torch.cuda.is_available() else self.VALIDATION_GPUS elif mode == "test": nprocesses = 1 devices = ["cpu"] if not torch.cuda.is_available() else self.TESTING_GPUS visualizer = self.get_viz(mode=mode) else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") return { "nprocesses": nprocesses, "devices": devices, "visualizer": visualizer, } # TODO Define Model @classmethod def create_model(cls, **kwargs) -> nn.Module: return NavToPartnerActorCriticSimpleConvRNN( action_space=gym.spaces.Tuple( [ gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())), gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())), ] ), observation_space=SensorSuite(cls.SENSORS).observation_spaces, hidden_size=512, ) # Define Task Sampler @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return NavToPartnerTaskSampler(**kwargs) # Utility Functions for distributing scenes between GPUs @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes: List[str], process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Tuple( [ gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())), gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())), ] ), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, } def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: scenes = ["FloorPlan_Train1_1"] res = self._get_sampler_args_for_scene_split( scenes, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["env_args"] = { **self.ENV_ARGS, "x_display": ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ), } return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: scenes = ["FloorPlan_Train1_1"] res = self._get_sampler_args_for_scene_split( scenes, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["env_args"] = { **self.ENV_ARGS, "x_display": ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ), } res["max_tasks"] = 20 return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: scenes = ["FloorPlan_Train1_1"] res = self._get_sampler_args_for_scene_split( scenes, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["env_args"] = { **self.ENV_ARGS, "x_display": ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ), } res["max_tasks"] = 4 return res ================================================ FILE: projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py ================================================ import torch import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, LinearDecay, ) from allenact.base_abstractions.sensor import ExpertActionSensor from projects.tutorials.object_nav_ithor_ppo_one_object import ( ObjectNavThorPPOExperimentConfig, ObjectNaviThorGridTask, ) class ObjectNavThorDaggerThenPPOExperimentConfig(ObjectNavThorPPOExperimentConfig): """A simple object navigation experiment in THOR. Training with DAgger and then PPO. """ SENSORS = ObjectNavThorPPOExperimentConfig.SENSORS + [ ExpertActionSensor( action_space=len(ObjectNaviThorGridTask.class_action_names()), ), ] @classmethod def tag(cls): return "ObjectNavThorDaggerThenPPO" @classmethod def training_pipeline(cls, **kwargs): dagger_steos = int(1e4) ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 2 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), "imitation_loss": Imitation(), # We add an imitation loss. }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=dagger_steos, ), max_stage_steps=dagger_steos, ), PipelineStage( loss_names=["ppo_loss"], max_stage_steps=ppo_steps, ), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) ================================================ FILE: projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.py ================================================ from projects.tutorials.object_nav_ithor_dagger_then_ppo_one_object import ( ObjectNavThorDaggerThenPPOExperimentConfig, ) from allenact.utils.viz_utils import ( VizSuite, TrajectoryViz, AgentViewViz, ActorViz, TensorViz1D, ) from allenact_plugins.ithor_plugin.ithor_viz import ThorViz class ObjectNavThorDaggerThenPPOVizExperimentConfig( ObjectNavThorDaggerThenPPOExperimentConfig ): """A simple object navigation experiment in THOR. Training with DAgger and then PPO + using viz for test. """ TEST_SAMPLES_IN_SCENE = 4 @classmethod def tag(cls): return "ObjectNavThorDaggerThenPPOViz" viz = None def get_viz(self, mode): if self.viz is not None: return self.viz self.viz = VizSuite( mode=mode, base_trajectory=TrajectoryViz( path_to_target_location=None, path_to_rot_degrees=("rotation",), ), egeocentric=AgentViewViz(max_video_length=100), action_probs=ActorViz(figsize=(3.25, 10), fontsize=18), taken_action_logprobs=TensorViz1D(), episode_mask=TensorViz1D(rollout_source=("masks",)), thor_trajectory=ThorViz( path_to_target_location=None, figsize=(8, 8), viz_rows_cols=(448, 448), ), ) return self.viz def machine_params(self, mode="train", **kwargs): params = super().machine_params(mode, **kwargs) if mode == "test": params.set_visualizer(self.get_viz(mode)) return params ================================================ FILE: projects/tutorials/object_nav_ithor_ppo_one_object.py ================================================ from math import ceil from typing import Dict, Any, List, Optional import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams from allenact.base_abstractions.sensor import SensorSuite from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, LinearDecay, ) from allenact_plugins.ithor_plugin.ithor_sensors import ( RGBSensorThor, GoalObjectTypeThorSensor, ) from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask from allenact_plugins.navigation_plugin.objectnav.models import ObjectNavActorCritic class ObjectNavThorPPOExperimentConfig(ExperimentConfig): """A simple object navigation experiment in THOR. Training with PPO. """ # A simple setting, train/valid/test are all the same single scene # and we're looking for a single object OBJECT_TYPES = ["Tomato"] TRAIN_SCENES = ["FloorPlan1_physics"] VALID_SCENES = ["FloorPlan1_physics"] TEST_SCENES = ["FloorPlan1_physics"] # Setting up sensors and basic environment details SCREEN_SIZE = 224 SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, ), GoalObjectTypeThorSensor(object_types=OBJECT_TYPES), ] ENV_ARGS = { "player_screen_height": SCREEN_SIZE, "player_screen_width": SCREEN_SIZE, "quality": "Very Low", } MAX_STEPS = 128 ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None VALID_SAMPLES_IN_SCENE = 10 TEST_SAMPLES_IN_SCENE = 100 @classmethod def tag(cls): return "ObjectNavThorPPO" @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 2 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss"], max_stage_steps=ppo_steps, ), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) @classmethod def machine_params(cls, mode="train", **kwargs): num_gpus = torch.cuda.device_count() has_gpu = num_gpus != 0 if mode == "train": nprocesses = 20 if has_gpu else 4 gpu_ids = [0] if has_gpu else [] elif mode == "valid": nprocesses = 1 gpu_ids = [1 % num_gpus] if has_gpu else [] elif mode == "test": nprocesses = 1 gpu_ids = [0] if has_gpu else [] else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") return MachineParams( nprocesses=nprocesses, devices=gpu_ids, ) @classmethod def create_model(cls, **kwargs) -> nn.Module: return ObjectNavActorCritic( action_space=gym.spaces.Discrete( len(ObjectNaviThorGridTask.class_action_names()) ), observation_space=SensorSuite(cls.SENSORS).observation_spaces, rgb_uuid=cls.SENSORS[0].uuid, depth_uuid=None, goal_sensor_uuid="goal_object_type_ind", hidden_size=512, object_type_embedding_dim=8, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return ObjectNavTaskSampler(**kwargs) @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes: List[str], process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "object_types": self.OBJECT_TYPES, "env_args": self.ENV_ARGS, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete( len(ObjectNaviThorGridTask.class_action_names()) ), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, } def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.TRAIN_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = "manual" res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.VALID_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = self.VALID_SAMPLES_IN_SCENE res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.TEST_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = self.TEST_SAMPLES_IN_SCENE res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res ================================================ FILE: projects/tutorials/pointnav_habitat_rgb_ddppo.py ================================================ import os from typing import Dict, Any, List, Optional, Sequence import gym import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from torchvision import models from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph from allenact.base_abstractions.sensor import SensorSuite from allenact.base_abstractions.task import TaskSampler from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, LinearDecay, evenly_distribute_count_into_bins, ) from allenact_plugins.habitat_plugin.habitat_constants import ( HABITAT_DATASETS_DIR, HABITAT_CONFIGS_DIR, ) from allenact_plugins.habitat_plugin.habitat_sensors import ( RGBSensorHabitat, TargetCoordinatesSensorHabitat, ) from allenact_plugins.habitat_plugin.habitat_task_samplers import PointNavTaskSampler from allenact_plugins.habitat_plugin.habitat_utils import ( construct_env_configs, get_habitat_config, ) from allenact_plugins.navigation_plugin.objectnav.models import ( ResnetTensorNavActorCritic, ) from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask class PointNavHabitatRGBPPOTutorialExperimentConfig(ExperimentConfig): """A Point Navigation experiment configuration in Habitat.""" # Task Parameters MAX_STEPS = 500 REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, } DISTANCE_TO_GOAL = 0.2 # Simulator Parameters CAMERA_WIDTH = 640 CAMERA_HEIGHT = 480 SCREEN_SIZE = 224 # Training Engine Parameters ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None NUM_PROCESSES = max(5 * torch.cuda.device_count() - 1, 4) TRAINING_GPUS = list(range(torch.cuda.device_count())) VALIDATION_GPUS = [torch.cuda.device_count() - 1] TESTING_GPUS = [torch.cuda.device_count() - 1] task_data_dir_template = os.path.join( HABITAT_DATASETS_DIR, "pointnav/gibson/v1/{}/{}.json.gz" ) TRAIN_SCENES = task_data_dir_template.format(*(["train"] * 2)) VALID_SCENES = task_data_dir_template.format(*(["val"] * 2)) TEST_SCENES = task_data_dir_template.format(*(["test"] * 2)) CONFIG = get_habitat_config( os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav_gibson.yaml") ) CONFIG.defrost() CONFIG.NUM_PROCESSES = NUM_PROCESSES CONFIG.SIMULATOR_GPU_IDS = TRAINING_GPUS CONFIG.DATASET.SCENES_DIR = "habitat/habitat-api/data/scene_datasets/" CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = ["*"] CONFIG.DATASET.DATA_PATH = TRAIN_SCENES CONFIG.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"] CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT CONFIG.SIMULATOR.TURN_ANGLE = 30 CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25 CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS CONFIG.TASK.TYPE = "Nav-v0" CONFIG.TASK.SUCCESS_DISTANCE = DISTANCE_TO_GOAL CONFIG.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"] CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR" CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2 CONFIG.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass" CONFIG.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"] CONFIG.TASK.SPL.TYPE = "SPL" CONFIG.TASK.SPL.SUCCESS_DISTANCE = DISTANCE_TO_GOAL CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = DISTANCE_TO_GOAL CONFIG.MODE = "train" SENSORS = [ RGBSensorHabitat( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, ), TargetCoordinatesSensorHabitat(coordinate_dims=2), ] PREPROCESSORS = [ Builder( ResNetPreprocessor, { "input_height": SCREEN_SIZE, "input_width": SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", }, ), ] OBSERVATIONS = [ "rgb_resnet", "target_coordinates_ind", ] TRAIN_CONFIGS = construct_env_configs(CONFIG) @classmethod def tag(cls): return "PointNavHabitatRGBPPO" @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) def machine_params(self, mode="train", **kwargs): if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else self.TRAINING_GPUS * workers_per_device ) nprocesses = ( 1 if not torch.cuda.is_available() else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) ) elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS elif mode == "test": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces, preprocessors=self.PREPROCESSORS, ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sensor_preprocessor_graph=sensor_preprocessor_graph, ) # Define Model @classmethod def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorNavActorCritic( action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, ) # Define Task Sampler @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavTaskSampler(**kwargs) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TRAIN_CONFIGS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore } def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.CONFIG.clone() config.defrost() config.DATASET.DATA_PATH = self.VALID_SCENES config.MODE = "validate" config.freeze() return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore } def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: raise NotImplementedError("Testing not implemented for this tutorial.") ================================================ FILE: projects/tutorials/pointnav_ithor_rgb_ddppo.py ================================================ import glob import os from math import ceil from typing import Dict, Any, List, Optional, Sequence import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from torchvision import models from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph from allenact.base_abstractions.sensor import SensorSuite from allenact.base_abstractions.task import TaskSampler from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, LinearDecay, evenly_distribute_count_into_bins, ) from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from allenact_plugins.navigation_plugin.objectnav.models import ( ResnetTensorNavActorCritic, ) from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor from allenact_plugins.robothor_plugin.robothor_task_samplers import ( PointNavDatasetTaskSampler, ) from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask class PointNaviThorRGBPPOExperimentConfig(ExperimentConfig): """A Point Navigation experiment configuration in iTHOR.""" # Task Parameters MAX_STEPS = 500 REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, } # Simulator Parameters CAMERA_WIDTH = 640 CAMERA_HEIGHT = 480 SCREEN_SIZE = 224 # Training Engine Parameters ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None NUM_PROCESSES = 60 TRAINING_GPUS = list(range(torch.cuda.device_count())) VALIDATION_GPUS = [torch.cuda.device_count() - 1] TESTING_GPUS = [torch.cuda.device_count() - 1] # Dataset Parameters TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train") VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val") SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GPSCompassSensorRoboThor(), ] PREPROCESSORS = [ Builder( ResNetPreprocessor, { "input_height": SCREEN_SIZE, "input_width": SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", }, ), ] OBSERVATIONS = [ "rgb_resnet", "target_coordinates_ind", ] ENV_ARGS = dict( width=CAMERA_WIDTH, height=CAMERA_HEIGHT, rotateStepDegrees=30.0, visibilityDistance=1.0, gridSize=0.25, ) @classmethod def tag(cls): return "PointNavithorRGBPPO" @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) def machine_params(self, mode="train", **kwargs): sampler_devices: Sequence[int] = [] if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else self.TRAINING_GPUS * workers_per_device ) nprocesses = ( 1 if not torch.cuda.is_available() else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) ) sampler_devices = self.TRAINING_GPUS elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS elif mode == "test": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces, preprocessors=self.PREPROCESSORS, ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sampler_devices=( sampler_devices if mode == "train" else gpu_ids ), # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, ) # Define Model @classmethod def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorNavActorCritic( action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, ) # Define Task Sampler @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavDatasetTaskSampler(**kwargs) # Utility Functions for distributing scenes between GPUs @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)] if len(scenes) == 0: raise RuntimeError( ( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done." ).format(scenes_dir) ) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, } def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.TRAIN_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.TRAIN_DATASET_DIR res["loop_dataset"] = True res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) res["allow_flipping"] = True return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) return res ================================================ FILE: projects/tutorials/running_inference_tutorial.py ================================================ # literate: tutorials/running-inference-on-a-pretrained-model.md # %% """# Tutorial: Inference with a pre-trained model.""" # %% """ In this tutorial we will run inference on a pre-trained model for the PointNav task in the RoboTHOR environment. In this task the agent is tasked with going to a specific location within a realistic 3D environment. For information on how to train a PointNav Model see [this tutorial](training-a-pointnav-model.md) We will need to [install the full AllenAct library](../installation/installation-allenact.md#full-library), the `robothor_plugin` requirements via ```bash pip install -r allenact_plugins/robothor_plugin/extra_requirements.txt ``` and [download the RoboTHOR Pointnav dataset](../installation/download-datasets.md) before we get started. For this tutorial we will download the weights of a model trained on the debug dataset. This can be done with a handy script in the `pretrained_model_ckpts` directory: ```bash bash pretrained_model_ckpts/download_navigation_model_ckpts.sh robothor-pointnav-rgb-resnet ``` This will download the weights for an RGB model that has been trained on the PointNav task in RoboTHOR to `pretrained_model_ckpts/robothor-pointnav-rgb-resnet` Next we need to run the inference, using the PointNav experiment config from the [tutorial on making a PointNav experiment](training-a-pointnav-model.md). We can do this with the following command: ```bash PYTHONPATH=. python allenact/main.py -o -b -c --eval ``` Where `` is the location where the results of the test will be dumped, `` is the location of the downloaded model weights, and `` is a path to the directory where our experiment definition is stored. For our current setup the following command would work: ```bash PYTHONPATH=. python allenact/main.py \ training_a_pointnav_model \ -o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \ -b projects/tutorials \ -c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \ --eval ``` For testing on all saved checkpoints we pass a directory to `--checkpoint` rather than just a single file: ```bash PYTHONPATH=. python allenact/main.py \ training_a_pointnav_model \ -o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \ -b projects/tutorials \ -c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30 --eval ``` ## Visualization We also show examples of visualizations that can be extracted from the `"valid"` and `"test"` modes. Currently, visualization is still undergoing design changes and does not support multi-agent tasks, but the available functionality is sufficient for pointnav in RoboThor. Following up on the example above, we can make a specialized pontnav `ExperimentConfig` where we instantiate the base visualization class, `VizSuite`, defined in [`allenact.utils.viz_utils`](https://github.com/allenai/allenact/tree/master/allenact/utils/viz_utils.py), when in `test` mode. Each visualization type can be thought of as a plugin to the base `VizSuite`. For example, all `episode_ids` passed to `VizSuite` will be processed with each of the instantiated visualization types (possibly with the exception of the `AgentViewViz`). In the example below we show how to instantiate different visualization types from 4 different data sources. The data sources available to `VizSuite` are: * Task output (e.g. 2D trajectories) * Vector task (e.g. egocentric views) * Rollout storage (e.g. recurrent memory, taken action logprobs...) * `ActorCriticOutput` (e.g. action probabilities) The visualization types included below are: * `TrajectoryViz`: Generic 2D trajectory view. * `AgentViewViz`: RGB egocentric view. * `ActorViz`: Action probabilities from `ActorCriticOutput[CategoricalDistr]`. * `TensorViz1D`: Evolution of a point from RolloutStorage over time. * `TensorViz2D`: Evolution of a vector from RolloutStorage over time. * `ThorViz`: Specialized 2D trajectory view [for RoboThor](https://github.com/allenai/allenact/tree/master/allenact_plugins/robothor_plugin/robothor_viz.py). Note that we need to explicitly set the `episode_ids` that we wish to visualize. For `AgentViewViz` we have the option of using a different (typically shorter) list of episodes or enforce the ones used for the rest of visualizations. """ # %% hide from typing import Optional from allenact.utils.viz_utils import ( VizSuite, TrajectoryViz, ActorViz, AgentViewViz, TensorViz1D, TensorViz2D, ) from allenact_plugins.robothor_plugin.robothor_viz import ThorViz from projects.tutorials.training_a_pointnav_model import ( PointNavRoboThorRGBPPOExperimentConfig, ) # %% class PointNavRoboThorRGBPPOVizExperimentConfig(PointNavRoboThorRGBPPOExperimentConfig): """ExperimentConfig used to demonstrate how to set up visualization code. # Attributes viz_ep_ids : Scene names that will be visualized. viz_video_ids : Scene names that will have videos visualizations associated with them. """ viz_ep_ids = [ "FloorPlan_Train1_1_3", "FloorPlan_Train1_1_4", "FloorPlan_Train1_1_5", "FloorPlan_Train1_1_6", ] viz_video_ids = [["FloorPlan_Train1_1_3"], ["FloorPlan_Train1_1_4"]] viz: Optional[VizSuite] = None def get_viz(self, mode): if self.viz is not None: return self.viz self.viz = VizSuite( episode_ids=self.viz_ep_ids, mode=mode, # Basic 2D trajectory visualizer (task output source): base_trajectory=TrajectoryViz( path_to_target_location=( "task_info", "target", ), ), # Egocentric view visualizer (vector task source): egeocentric=AgentViewViz( max_video_length=100, episode_ids=self.viz_video_ids ), # Default action probability visualizer (actor critic output source): action_probs=ActorViz(figsize=(3.25, 10), fontsize=18), # Default taken action logprob visualizer (rollout storage source): taken_action_logprobs=TensorViz1D(), # Same episode mask visualizer (rollout storage source): episode_mask=TensorViz1D(rollout_source=("masks",)), # Default recurrent memory visualizer (rollout storage source): rnn_memory=TensorViz2D( rollout_source=("memory_first_last", "single_belief") ), # Specialized 2D trajectory visualizer (task output source): thor_trajectory=ThorViz( figsize=(16, 8), viz_rows_cols=(448, 448), scenes=("FloorPlan_Train{}_{}", 1, 1, 1, 1), ), ) return self.viz def machine_params(self, mode="train", **kwargs): res = super().machine_params(mode, **kwargs) if mode == "test": res.set_visualizer(self.get_viz(mode)) return res # %% """ Running test on the same downloaded models, but using the visualization-enabled `ExperimentConfig` with ```bash PYTHONPATH=. python allenact/main.py \ running_inference_tutorial \ -o pretrained_model_ckpts/robothor-pointnav-rgb-resnet/ \ -b projects/tutorials \ -c pretrained_model_ckpts/robothor-pointnav-rgb-resnet/checkpoints/PointNavRobothorRGBPPO/2020-08-31_12-13-30/exp_PointNavRobothorRGBPPO__stage_00__steps_000039031200.pt \ --eval ``` generates different types of visualization and logs them in tensorboard. If everything is properly setup and tensorboard includes the `robothor-pointnav-rgb-resnet` folder, under the `IMAGES` tab, we should see something similar to ![Visualization example](../img/viz_pretrained_2videos.jpg) """ ================================================ FILE: projects/tutorials/training_a_pointnav_model.py ================================================ # literate: tutorials/training-a-pointnav-model.md # %% """# Tutorial: PointNav in RoboTHOR.""" # %% """ ![RoboTHOR Robot](../img/RoboTHOR_robot.jpg) ## Introduction One of the most obvious tasks that an embodied agent should master is navigating the world it inhabits. Before we can teach a robot to cook or clean it first needs to be able to move around. The simplest way to formulate "moving around" into a task is by making your agent find a beacon somewhere in the environment. This beacon transmits its location, such that at any time, the agent can get the direction and euclidian distance to the beacon. This particular task is often called Point Navigation, or **PointNav** for short. #### PointNav At first glance, this task seems trivial. If the agent is given the direction and distance of the target at all times, can it not simply follow this signal directly? The answer is no, because agents are often trained on this task in environments that emulate real-world buildings which are not wide-open spaces, but rather contain many smaller rooms. Because of this, the agent has to learn to navigate human spaces and use doors and hallways to efficiently navigate from one side of the building to the other. This task becomes particularly difficult when the agent is tested in an environment that it is not trained in. If the agent does not know how the floor plan of an environment looks, it has to learn to predict the design of man-made structures, to efficiently navigate across them, much like how people instinctively know how to move around a building they have never seen before based on their experience navigating similar buildings. #### What is an environment anyways? Environments are worlds in which embodied agents exist. If our embodied agent is simply a neural network that is being trained in a simulator, then that simulator is its environment. Similarly, if our agent is a physical robot then its environment is the real world. The agent interacts with the environment by taking one of several available actions (such as "move forward", or "turn left"). After each action, the environment produces a new frame that the agent can analyze to determine its next step. For many tasks, including PointNav the agent also has a special "stop" action which indicates that the agent thinks it has reached the target. After this action is called the agent will be reset to a new location, regardless if it reached the target. The hope is that after enough training the agent will learn to correctly assess that it has successfully navigated to the target. ![RoboTHOR Sim vs. Real](../img/RoboTHOR_sim_real.jpg) There are many simulators designed for the training of embodied agents. In this tutorial, we will be using a simulator called [RoboTHOR](https://ai2thor.allenai.org/robothor/), which is designed specifically to train models that can easily be transferred to a real robot, by providing a photo-realistic virtual environment and a real-world replica of the environment that researchers can have access to. RoboTHOR contains 60 different virtual scenes with different floor plans and furniture and 15 validation scenes. It is also important to mention that **AllenAct** has a class abstraction called Environment. This is not the actual simulator game engine or robotics controller, but rather a shallow wrapper that provides a uniform interface to the actual environment. #### Learning algorithm Finally, let us briefly touch on the algorithm that we will use to train our embodied agent to navigate. While *AllenAct* offers us great flexibility to train models using complex pipelines, we will be using a simple pure reinforcement learning approach for this tutorial. More specifically, we will be using DD-PPO, a decentralized and distributed variant of the ubiquitous PPO algorithm. For those unfamiliar with Reinforcement Learning we highly recommend [this tutorial](http://karpathy.github.io/2016/05/31/rl/) by Andrej Karpathy, and [this book](http://www.incompleteideas.net/book/the-book-2nd.html) by Sutton and Barto. Essentially what we are doing is letting our agent explore the environment on its own, rewarding it for taking actions that bring it closer to its goal and penalizing it for actions that take it away from its goal. We then optimize the agent's model to maximize this reward. ## Requirements To train the model on the PointNav task, we need to [install the RoboTHOR environment](../installation/installation-framework.md) and [download the RoboTHOR PointNav dataset](../installation/download-datasets.md) The dataset contains a list of episodes with thousands of randomly generated starting positions and target locations for each of the scenes as well as a precomputed cache of distances, containing the shortest path from each point in a scene, to every other point in that scene. This is used to reward the agent for moving closer to the target in terms of geodesic distance - the actual path distance (as opposed to a straight line distance). ## Config File Setup Now comes the most important part of the tutorial, we are going to write an experiment config file. If this is your first experience with experiment config files in AllenAct, we suggest that you first see our how-to on [defining an experiment](../howtos/defining-an-experiment.md) which will walk you through creating a simplified experiment config file. Unlike a library that can be imported into python, **AllenAct** is structured as a framework with a runner script called `main.py` which will run the experiment specified in a config file. This design forces us to keep meticulous records of exactly which settings were used to produce a particular result, which can be very useful given how expensive RL models are to train. The `projects/` directory is home to different projects using `AllenAct`. Currently it is populated with baselines of popular tasks and tutorials. We already have all the code for this tutorial stored in `projects/tutorials/training_a_pointnav_model.py`. We will be using this file to run our experiments, but you can create a new directory in `projects/` and start writing your experiment there. We start off by importing everything we will need: """ # %% import glob import os from math import ceil from typing import Dict, Any, List, Optional, Sequence import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR from torchvision import models from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph from allenact.base_abstractions.sensor import SensorSuite from allenact.base_abstractions.task import TaskSampler from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, LinearDecay, evenly_distribute_count_into_bins, ) from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from allenact_plugins.navigation_plugin.objectnav.models import ( ResnetTensorNavActorCritic, ) from allenact_plugins.robothor_plugin.robothor_sensors import GPSCompassSensorRoboThor from allenact_plugins.robothor_plugin.robothor_task_samplers import ( PointNavDatasetTaskSampler, ) from allenact_plugins.robothor_plugin.robothor_tasks import PointNavTask # %% """Next we define a new experiment config class:""" # %% class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig): """A Point Navigation experiment configuration in RoboThor.""" # %% """ We then define the task parameters. For PointNav, these include the maximum number of steps our agent can take before being reset (this prevents the agent from wandering on forever), and a configuration for the reward function that we will be using. """ # %% # Task Parameters MAX_STEPS = 500 REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, } # %% """ In this case, we set the maximum number of steps to 500. We give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal in as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination. If the agent selects the `stop` action without reaching the target we do not punish it (although this is sometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves closer to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should be and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around with them. Next, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render every frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set to a 224 by 224 box). """ # %% # Simulator Parameters CAMERA_WIDTH = 640 CAMERA_HEIGHT = 480 SCREEN_SIZE = 224 # %% """ Next, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel processes that will be used to train the model. In general, more processes result in faster training, but since each process is a unique instance of the environment in which we are training they can take up a lot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may need to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to be the number of training scenes in RoboTHOR, which allows each process to load only a single scene into memory, saving time and space. `TRAINING_GPUS` takes the ids of the GPUS on which the model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which the validation and testing will occur. During training, a validation process is constantly running and evaluating the current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea. If our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default to running everything on the CPU with only 1 process. """ # %% ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None NUM_PROCESSES = 20 TRAINING_GPUS: Sequence[int] = [0] VALIDATION_GPUS: Sequence[int] = [0] TESTING_GPUS: Sequence[int] = [0] # %% """ Since we are using a dataset to train our model we need to define the path to where we have stored it. If we download the dataset instructed above we can define the path as follows """ # %% TRAIN_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug") VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/robothor-pointnav/debug") # %% """ Next, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the raw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we specified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks the point our agent needs to move to. It tells us the direction and distance to our goal at every time step. """ # %% SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GPSCompassSensorRoboThor(), ] # %% """ For the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct* the preprocessor abstraction is designed with large models with frozen weights in mind. These models often hail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a complex embedding, which then gets stored and used as input to our trainable model instead of the original image. Most other preprocessing work is done in the sensor classes (as we just saw with the RGB sensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should use this abstraction. """ # %% PREPROCESSORS = [ Builder( ResNetPreprocessor, { "input_height": SCREEN_SIZE, "input_width": SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", }, ), ] # %% """ Next, we must define all of the observation inputs that our model will use. These are just the hardcoded ids of the sensors we are using in the experiment. """ # %% OBSERVATIONS = [ "rgb_resnet", "target_coordinates_ind", ] # %% """ Finally, we must define the settings of our simulator. We set the camera dimensions to the values we defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a turn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the agent moves forward, it will do so by 0.25 meters. """ # %% ENV_ARGS = dict( width=CAMERA_WIDTH, height=CAMERA_HEIGHT, rotateStepDegrees=30.0, visibilityDistance=1.0, gridSize=0.25, agentMode="bot", ) # %% """ Now we move on to the methods that we must define to finish implementing an experiment config. Firstly we have a simple method that just returns the name of the experiment. """ # %% @classmethod def tag(cls): return "PointNavRobothorRGBPPO" # %% """ Next, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms we will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4. We specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters respectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval` sets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how often we save the model weights and run validation on them. """ # %% @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 1000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) # %% """ The `machine_params` method returns the hardware parameters of each process, based on the list of devices we defined above. """ # %% def machine_params(self, mode="train", **kwargs): sampler_devices: List[int] = [] if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else list(self.TRAINING_GPUS) * workers_per_device ) nprocesses = ( 8 if not torch.cuda.is_available() else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) ) sampler_devices = list(self.TRAINING_GPUS) elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS elif mode == "test": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces, preprocessors=self.PREPROCESSORS, ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sampler_devices=( sampler_devices if mode == "train" else gpu_ids ), # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, ) # %% """ Now we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch, so any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which unsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different actions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space` We specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define the size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and distance to the target) with `goal_dims`. """ # %% @classmethod def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorNavActorCritic( action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, ) # %% """ We also need to define the task sampler that we will be using. This is a piece of code that generates instances of tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting our tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets the agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the `stop` action. """ # %% @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavDatasetTaskSampler(**kwargs) # %% """ You might notice that we did not specify the task sampler's arguments, but are rather passing them in. The reason for this is that each process will have its own task sampler, and we need to specify exactly which scenes each process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this. """ # %% @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)] if len(scenes) == 0: raise RuntimeError( ( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done." ).format(scenes_dir) ) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, } # %% """ The very last things we need to define are the sampler arguments themselves. We define them separately for a train, validation, and test sampler, but in this case, they are almost the same. The arguments need to include the location of the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above and are just referencing here. The only consequential differences between these task samplers are the path to the dataset we are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since we want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of RoboTHOR are private we are also testing on our validation set. """ # %% def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.TRAIN_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.TRAIN_DATASET_DIR res["loop_dataset"] = True res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) res["allow_flipping"] = True return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) return res # %% """ This is it! If we copy all of the code into a file we should be able to run our experiment! ## Training Model On Debug Dataset We can test if our installation worked properly by training our model on a small dataset of 4 episodes. This should take about 20 minutes on a computer with a NVIDIA GPU. We can now train a model by running: ```bash PYTHONPATH=. python allenact/main.py -o -c -b ``` If using the same configuration as we have set up, the following command should work: ```bash PYTHONPATH=. python allenact/main.py training_a_pointnav_model -o storage/robothor-pointnav-rgb-resnet-resnet -b projects/tutorials ``` If we start up a tensorboard server during training and specify that `output_dir=storage` the output should look something like this: ![tensorboard output](../img/point-nav-baseline-tb.png) ## Training Model On Full Dataset We can also train the model on the full dataset by changing back our dataset path and running the same command as above. But be aware, training this takes nearly 2 days on a machine with 8 GPU. ## Testing Model To test the performance of a model please refer to [this tutorial](running-inference-on-a-pretrained-model.md). ## Conclusion In this tutorial, we learned how to create a new PointNav experiment using **AllenAct**. There are many simple and obvious ways to modify the experiment from here - changing the model, the learning algorithm and the environment each requires very few lines of code changed in the above file, allowing us to explore our embodied ai research ideas across different frameworks with ease. """ ================================================ FILE: requirements.txt ================================================ certifi==2020.12.5 chardet==4.0.0 cloudpickle==1.6.0 cycler==0.10.0 decorator==4.4.2 filelock==3.0.12 future==0.18.2 gym==0.17.3 idna==2.10 imageio==2.9.0 imageio-ffmpeg==0.4.3 kiwisolver==1.3.1 matplotlib==3.3.3 moviepy==1.0.3 networkx==2.5 numpy==1.19.5 opencv-python==4.5.1.48 Pillow>=8.2.0,<9.0.0 proglog==0.1.9 protobuf==3.14.0 pyglet==1.5.0 pyparsing==2.4.7 python-dateutil>=2.8.1 requests==2.25.1 scipy==1.5.4 setproctitle==1.2.1 six>=1.15.0 tensorboardX==2.1 torch>=1.6.0,!=1.8.0,<2.0.0 torchvision>=0.7.0,<0.10.0 tqdm==4.56.0 urllib3==1.26.5 attr attrs wandb ================================================ FILE: scripts/auto_format.sh ================================================ #!/bin/bash # Move to the directory containing the directory that this file is in cd "$( cd "$( dirname "${BASH_SOURCE[0]}/.." )" >/dev/null 2>&1 && pwd )" || exit echo RUNNING BLACK black . --exclude src --exclude external_projects echo BLACK DONE echo "" echo RUNNING DOCFORMATTER find . -name "*.py" | grep -v ^./src | grep -v ^./external_projects | grep -v used_configs | xargs docformatter --in-place -r echo DOCFORMATTER DONE echo ALL DONE ================================================ FILE: scripts/build_docs.py ================================================ import glob import os import shutil import sys from pathlib import Path from subprocess import check_output from threading import Thread from typing import Dict, Union, Optional, Set, List, Sequence, Mapping from git import Git from ruamel.yaml import YAML # type: ignore from constants import ABS_PATH_OF_TOP_LEVEL_DIR # TODO: the scripts directory shouldn't be a module (as it conflicts with # some local developmment workflows) but we do want to import scripts/literate.py. # Temporary solution is just to modify the sys.path when this script is run. sys.path.append(os.path.abspath(os.path.dirname(Path(__file__)))) from literate import literate_python_to_markdown class StringColors: HEADER = "\033[95m" OKBLUE = "\033[94m" OKGREEN = "\033[92m" WARNING = "\033[93m" FAIL = "\033[91m" ENDC = "\033[0m" BOLD = "\033[1m" UNDERLINE = "\033[4m" exclude_files = [ ".DS_Store", "__init__.py", "__init__.pyc", "README.md", "version.py", "run.py", "setup.py", "main.py", ] def render_file( relative_src_path: str, src_file: str, to_file: str, modifier="" ) -> None: """Shells out to pydocmd, which creates a .md file from the docstrings of python functions and classes in the file we specify. The modifer specifies the depth at which to generate docs for classes and functions in the file. More information here: https://pypi.org/project/pydoc-markdown/ """ # First try literate was_literate = False try: was_literate = literate_python_to_markdown( path=os.path.join(relative_src_path, src_file) ) except Exception as _: pass if was_literate: return # Now do standard pydocmd relative_src_namespace = relative_src_path.replace("/", ".") src_base = src_file.replace(".py", "") if relative_src_namespace == "": namespace = f"{src_base}{modifier}" else: namespace = f"{relative_src_namespace}.{src_base}{modifier}" pydoc_config = """'{ renderer: { type: markdown, code_headers: true, descriptive_class_title: false, add_method_class_prefix: true, source_linker: {type: github, repo: allenai/allenact}, header_level_by_type: { Module: 1, Class: 2, Method: 3, Function: 3, Data: 3, } } }'""" pydoc_config = " ".join(pydoc_config.split()) args = ["pydoc-markdown", "-m", namespace, pydoc_config] try: call_result = check_output([" ".join(args)], shell=True, env=os.environ).decode( "utf-8" ) # noinspection PyShadowingNames with open(to_file, "w") as f: doc_split = call_result.split("\n") # github_path = "https://github.com/allenai/allenact/tree/master/" # path = ( # github_path + namespace.replace(".", "/") + ".py" # ) # mdlink = "[[source]]({})".format(path) mdlink = "" # Removing the above source link for now. call_result = "\n".join([doc_split[0] + " " + mdlink] + doc_split[1:]) call_result = call_result.replace("_DOC_COLON_", ":") f.write(call_result) print( f"{StringColors.OKGREEN}[SUCCESS]{StringColors.ENDC} built docs for {src_file} -> {to_file}." ) except Exception as _: cmd = " ".join(args) print( f"{StringColors.WARNING}[SKIPPING]{StringColors.ENDC} could not" f" build docs for {src_file} (missing an import?). CMD: '{cmd}'" ) # noinspection PyShadowingNames def build_docs_for_file( relative_path: str, file_name: str, docs_dir: str, threads: List ) -> Dict[str, str]: """Build docs for an individual python file.""" clean_filename = file_name.replace(".py", "") markdown_filename = f"{clean_filename}.md" output_path = os.path.join(docs_dir, relative_path, markdown_filename) nav_path = os.path.join("api", relative_path, markdown_filename) thread = Thread(target=render_file, args=(relative_path, file_name, output_path)) thread.start() threads.append(thread) return {os.path.basename(clean_filename): nav_path} # noinspection PyShadowingNames def build_docs( base_dir: Union[Path, str], root_path: Union[Path, str], docs_dir: Union[Path, str], threads: List, allowed_dirs: Optional[Set[str]] = None, ): base_dir, root_path, docs_dir = str(base_dir), str(root_path), str(docs_dir) nav_root = [] for child in os.listdir(root_path): relative_path = os.path.join(root_path, child) if ( (allowed_dirs is not None) and (os.path.isdir(relative_path)) and (os.path.abspath(relative_path) not in allowed_dirs) # or ".git" in relative_path # or ".idea" in relative_path # or "__pycache__" in relative_path # or "tests" in relative_path # or "mypy_cache" in relative_path ): print("SKIPPING {}".format(relative_path)) continue # without_allenact = str(root_path).replace("allenact/", "") new_path = os.path.relpath(root_path, base_dir).replace(".", "") target_dir = os.path.join(docs_dir, new_path) if not os.path.exists(target_dir): os.mkdir(target_dir) if os.path.isdir(relative_path): nav_subsection = build_docs( base_dir, relative_path, docs_dir, threads=threads, allowed_dirs=allowed_dirs, ) if not nav_subsection: continue nav_root.append({child: nav_subsection}) else: if child in exclude_files or not child.endswith(".py"): continue nav = build_docs_for_file(new_path, child, docs_dir, threads=threads) nav_root.append(nav) return nav_root def project_readme_paths_to_nav_structure(project_readmes): nested_dict = {} for fp in project_readmes: has_seen_project_dir = False sub_nested_dict = nested_dict split_fp = os.path.dirname(fp).split("/") for i, yar in enumerate(split_fp): has_seen_project_dir = has_seen_project_dir or yar == "projects" if not has_seen_project_dir or yar == "projects": continue if yar not in sub_nested_dict: if i == len(split_fp) - 1: sub_nested_dict[yar] = fp.replace("docs/", "") break else: sub_nested_dict[yar] = {} sub_nested_dict = sub_nested_dict[yar] def recursively_create_nav_structure(nested_dict): if isinstance(nested_dict, str): return nested_dict to_return = [] for key in nested_dict: to_return.append({key: recursively_create_nav_structure(nested_dict[key])}) return to_return return recursively_create_nav_structure(nested_dict) def pruned_nav_entries(nav_entries): if isinstance(nav_entries, str): if os.path.exists(os.path.join("docs", nav_entries)): return nav_entries else: return None elif isinstance(nav_entries, Sequence): new_entries = [] for entry in nav_entries: entry = pruned_nav_entries(entry) if entry: new_entries.append(entry) return new_entries elif isinstance(nav_entries, Mapping): new_entries = {} for k, entry in nav_entries.items(): entry = pruned_nav_entries(entry) if entry: new_entries[k] = entry return new_entries else: raise NotImplementedError() def main(): os.chdir(ABS_PATH_OF_TOP_LEVEL_DIR) print("Copying all README.md files to docs.") with open("README.md") as f: readme_content = f.readlines() readme_content = [x.replace("docs/", "") for x in readme_content] with open("docs/index.md", "w") as f: f.writelines(readme_content) project_readmes = [] for readme_file_path in glob.glob("projects/**/README.md", recursive=True): if "docs/" not in readme_file_path: new_path = os.path.join("docs", readme_file_path) os.makedirs(os.path.dirname(new_path), exist_ok=True) shutil.copy(readme_file_path, new_path) project_readmes.append(new_path) print("Copying LICENSE file to docs.") shutil.copy("LICENSE", "docs/LICENSE.md") print("Copying CONTRIBUTING.md file to docs.") shutil.copy("CONTRIBUTING.md", "docs/CONTRIBUTING.md") # print("Copying CNAME file to docs.") # shutil.copy("CNAME", "docs/CNAME") print("Building the docs.") parent_folder_path = Path(__file__).parent.parent yaml_path = parent_folder_path / "mkdocs.yml" source_path = parent_folder_path docs_dir = str(parent_folder_path / "docs" / "api") if not os.path.exists(docs_dir): os.mkdir(docs_dir) # Adding project readmes to the yaml yaml = YAML() mkdocs_yaml = yaml.load(yaml_path) site_nav = mkdocs_yaml["nav"] # TODO Find a way to do the following in a way that results in nice titles. # projects_key = "Projects using allenact" # nav_obj = None # for obj in site_nav: # if projects_key in obj: # nav_obj = obj # break # nav_obj[projects_key] = project_readme_paths_to_nav_structure(project_readmes) with open(yaml_path, "w") as f: yaml.dump(mkdocs_yaml, f) # Get directories to ignore git_dirs = set( os.path.abspath(os.path.split(p)[0]) for p in Git(".").ls_files().split("\n") ) ignore_rel_dirs = [ "docs", "scripts", "experiments", "src", ".pip_src", "dist", "build", ] ignore_abs_dirs = set( os.path.abspath(os.path.join(str(parent_folder_path), rel_dir)) for rel_dir in ignore_rel_dirs ) for d in ignore_abs_dirs: if d in git_dirs: git_dirs.remove(d) threads: List = [] nav_entries = build_docs( parent_folder_path, source_path, docs_dir, threads=threads, allowed_dirs=git_dirs, ) nav_entries.sort(key=lambda x: list(x)[0], reverse=False) for thread in threads: thread.join() nav_entries = pruned_nav_entries(nav_entries) docs_key = "API" # Find the yaml corresponding to the API nav_obj = None for obj in site_nav: if docs_key in obj: nav_obj = obj break nav_obj[docs_key] = nav_entries with open(yaml_path, "w") as f: yaml.dump(mkdocs_yaml, f) if __name__ == "__main__": main() ================================================ FILE: scripts/build_docs.sh ================================================ #!/usr/bin/env bash set -e # Add allenact to the python path export PYTHONPATH=$PYTHONPATH:$PWD # Alter the relative path of the README image for the docs. #sed -i '1s/docs/./' docs/README.md python scripts/build_docs.py ================================================ FILE: scripts/dcommand.py ================================================ #!/usr/bin/env python3 """Tool to run command on multiple nodes through SSH.""" import argparse import glob import os def get_argument_parser(): """Creates the argument parser.""" # noinspection PyTypeChecker parser = argparse.ArgumentParser( description="dcommand", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--runs_on", required=False, type=str, default=None, help="Comma-separated IP addresses of machines. If empty, the tool will scan for lists of IP addresses" " in `screen_ids_file`s in the `~/.allenact` directory.", ) parser.add_argument( "--ssh_cmd", required=False, type=str, default="ssh {addr}", help="SSH command. Useful to utilize a pre-shared key with 'ssh -i path/to/mykey.pem ubuntu@{addr}'.", ) parser.add_argument( "--command", required=False, default="nvidia-smi | head -n 35", type=str, help="Command to be run through ssh onto each machine", ) return parser def get_args(): """Creates the argument parser and parses any input arguments.""" parser = get_argument_parser() args = parser.parse_args() return args def wrap_double(text): return f'"{text}"' def wrap_single(text): return f"'{text}'" def wrap_single_nested(text, quote=r"'\''"): return f"{quote}{text}{quote}" if __name__ == "__main__": args = get_args() all_addresses = [] if args.runs_on is not None: all_addresses = args.runs_on.split(",") else: all_files = sorted( glob.glob(os.path.join(os.path.expanduser("~"), ".allenact", "*.killfile")), reverse=True, ) if len(all_files) == 0: print( f"No screen_ids_file found under {os.path.join(os.path.expanduser('~'), '.allenact')}" ) for killfile in all_files: with open(killfile, "r") as f: # Each line contains 'IP_address screen_ID' nodes = [tuple(line[:-1].split(" ")) for line in f.readlines()] all_addresses.extend(node[0] for node in nodes) use_addresses = "" while use_addresses not in ["y", "n"]: use_addresses = input( f"Run on {all_addresses} from {killfile}? [Y/n] " ).lower() if use_addresses == "": use_addresses = "y" if use_addresses == "n": all_addresses.clear() else: break print(f"Running on IP addresses {all_addresses}") for it, addr in enumerate(all_addresses): ssh_command = f"{args.ssh_cmd.format(addr=addr)} {wrap_single(args.command)}" print(f"{it} {addr} SSH command {ssh_command}") os.system(ssh_command) print("DONE") ================================================ FILE: scripts/dconfig.py ================================================ #!/usr/bin/env python3 import os import argparse def get_argument_parser(): """Creates the argument parser.""" # noinspection PyTypeChecker parser = argparse.ArgumentParser( description="dconfig", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--runs_on", required=True, type=str, help="Comma-separated IP addresses of machines", ) parser.add_argument( "--config_script", required=True, type=str, help="Path to bash script with configuration", ) parser.add_argument( "--ssh_cmd", required=False, type=str, default="ssh -f {addr}", help="SSH command. Useful to utilize a pre-shared key with 'ssh -i path/to/mykey.pem -f ubuntu@{addr}'. " "The option `-f` should be used, since we want a non-interactive session", ) parser.add_argument( "--distribute_public_rsa_key", dest="distribute_public_rsa_key", action="store_true", required=False, help="if you pass the `--distribute_public_rsa_key` flag, the manager node's public key will be added to the " "authorized keys of all workers (this is necessary in default-configured EC2 instances to use " "`scripts/dmain.py`)", ) parser.set_defaults(distribute_public_rsa_key=False) return parser def get_args(): """Creates the argument parser and parses any input arguments.""" parser = get_argument_parser() args = parser.parse_args() return args def wrap_double(text): return f'"{text}"' def wrap_single(text): return f"'{text}'" def wrap_single_nested(text, quote=r"'\''"): return f"{quote}{text}{quote}" if __name__ == "__main__": args = get_args() all_addresses = args.runs_on.split(",") print(f"Running on addresses {all_addresses}") remote_config_script = f"{args.config_script}.distributed" for it, addr in enumerate(all_addresses): if args.distribute_public_rsa_key: key_command = ( f"{args.ssh_cmd.format(addr=addr)} " f"{wrap_double('echo $(cat ~/.ssh/id_rsa.pub) >> ~/.ssh/authorized_keys')}" ) print(f"Key command {key_command}") os.system(f"{key_command}") scp_cmd = ( args.ssh_cmd.replace("ssh ", "scp ") .replace("-f", args.config_script) .format(addr=addr) ) print(f"SCP command {scp_cmd}:{remote_config_script}") os.system(f"{scp_cmd}:{remote_config_script}") screen_name = f"allenact_config_machine{it}" bash_command = wrap_single_nested( f"source {remote_config_script} &>> log_allenact_distributed_config" ) screen_command = wrap_single( f"screen -S {screen_name} -dm bash -c {bash_command}" ) ssh_command = f"{args.ssh_cmd.format(addr=addr)} {screen_command}" print(f"SSH command {ssh_command}") os.system(ssh_command) print(f"{addr} {screen_name}") print("DONE") ================================================ FILE: scripts/dkill.py ================================================ #!/usr/bin/env python3 """Tool to terminate multi-node (distributed) training.""" import os import argparse import glob def get_argument_parser(): """Creates the argument parser.""" # noinspection PyTypeChecker parser = argparse.ArgumentParser( description="dkill", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--screen_ids_file", required=False, type=str, default=None, help="Path to file generated by dmain.py with IPs and screen ids for nodes running process." " If empty, the tool will scan the `~/.allenact` directory for `screen_ids_file`s.", ) parser.add_argument( "--ssh_cmd", required=False, type=str, default="ssh {addr}", help="SSH command. Useful to utilize a pre-shared key with 'ssh -i mykey.pem ubuntu@{addr}'. ", ) return parser def get_args(): """Creates the argument parser and parses any input arguments.""" parser = get_argument_parser() args = parser.parse_args() return args if __name__ == "__main__": args = get_args() all_files = ( [args.screen_ids_file] if args.screen_ids_file is not None else sorted( glob.glob(os.path.join(os.path.expanduser("~"), ".allenact", "*.killfile")), reverse=True, ) ) if len(all_files) == 0: print( f"No screen_ids_file found under {os.path.join(os.path.expanduser('~'), '.allenact')}" ) for killfile in all_files: with open(killfile, "r") as f: nodes = [tuple(line[:-1].split(" ")) for line in f.readlines()] do_kill = "" while do_kill not in ["y", "n"]: do_kill = input( f"Stopping processes on {nodes} from {killfile}? [y/N] " ).lower() if do_kill == "": do_kill = "n" if do_kill == "y": for it, node in enumerate(nodes): addr, screen_name = node print(f"Killing screen {screen_name} on {addr}") ssh_command = ( f"{args.ssh_cmd.format(addr=addr)} '" f"screen -S {screen_name} -p 0 -X quit ; " f"sleep 1 ; " f"echo Master processes left running: ; " f"ps aux | grep Master: | grep -v grep ; " f"echo ; " f"'" ) # print(f"SSH command {ssh_command}") os.system(ssh_command) do_delete = "" while do_delete not in ["y", "n"]: do_delete = input(f"Delete file {killfile}? [y/N] ").lower() if do_delete == "": do_delete = "n" if do_delete == "y": os.system(f"rm {killfile}") print(f"Deleted {killfile}") print("DONE") ================================================ FILE: scripts/dmain.py ================================================ #!/usr/bin/env python3 """Entry point to multi-node (distributed) training for a user given experiment name.""" import os import random import string import subprocess import sys import time from pathlib import Path from typing import Optional # Add to PYTHONPATH the path of the parent directory of the current file's directory sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(Path(__file__))))) from allenact.main import get_argument_parser as get_main_arg_parser from allenact.utils.system import init_logging, get_logger from constants import ABS_PATH_OF_TOP_LEVEL_DIR def get_argument_parser(): """Creates the argument parser.""" parser = get_main_arg_parser() parser.description = f"distributed {parser.description}" parser.add_argument( "--runs_on", required=True, type=str, help="Comma-separated IP addresses of machines", ) parser.add_argument( "--ssh_cmd", required=False, type=str, default="ssh -f {addr}", help="SSH command. Useful to utilize a pre-shared key with 'ssh -i mykey.pem -f ubuntu@{addr}'. " "The option `-f` should be used for non-interactive session", ) parser.add_argument( "--env_activate_path", required=True, type=str, help="Path to the virtual environment's `activate` script. It must be the same across all machines", ) parser.add_argument( "--allenact_path", required=False, type=str, default="allenact", help="Path to allenact top directory. It must be the same across all machines", ) # Required distributed_ip_and_port idx = [a.dest for a in parser._actions].index("distributed_ip_and_port") parser._actions[idx].required = True return parser def get_args(): """Creates the argument parser and parses any input arguments.""" parser = get_argument_parser() args = parser.parse_args() return args def get_raw_args(): raw_args = sys.argv[1:] filtered_args = [] remove: Optional[str] = None enclose_in_quotes: Optional[str] = None for arg in raw_args: if remove is not None: remove = None elif enclose_in_quotes is not None: # Within backslash expansion: close former single, open double, create single, close double, reopen single inner_quote = r"\'\"\'\"\'" # Convert double quotes into backslash double for later expansion filtered_args.append( inner_quote + arg.replace('"', r"\"").replace("'", r"\"") + inner_quote ) enclose_in_quotes = None elif arg in [ "--runs_on", "--ssh_cmd", "--env_activate_path", "--allenact_path", "--extra_tag", "--machine_id", ]: remove = arg elif arg == "--config_kwargs": enclose_in_quotes = arg filtered_args.append(arg) else: filtered_args.append(arg) return filtered_args def wrap_single(text): return f"'{text}'" def wrap_single_nested(text): # Close former single, start backslash expansion (via $), create new single quote for expansion: quote_enter = r"'$'\'" # New closing single quote for expansion, close backslash expansion, reopen former single: quote_leave = r"\'''" return f"{quote_enter}{text}{quote_leave}" def wrap_double(text): return f'"{text}"' def id_generator(size=4, chars=string.ascii_uppercase + string.digits): return "".join(random.choice(chars) for _ in range(size)) # Assume we can ssh into each of the `runs_on` machines through port 22 if __name__ == "__main__": # Tool must be called from AllenAct project's root directory cwd = os.path.abspath(os.getcwd()) assert cwd == ABS_PATH_OF_TOP_LEVEL_DIR, ( f"`dmain.py` called from {cwd}." f"\nIt should be called from AllenAct's top level directory {ABS_PATH_OF_TOP_LEVEL_DIR}." ) args = get_args() init_logging(args.log_level) raw_args = get_raw_args() if args.seed is None: seed = random.randint(0, 2**31 - 1) raw_args.extend(["-s", f"{seed}"]) get_logger().info(f"Using random seed {seed} in all workers (none was given)") all_addresses = args.runs_on.split(",") get_logger().info(f"Running on IP addresses {all_addresses}") assert args.distributed_ip_and_port.split(":")[0] in all_addresses, ( f"Missing listener IP address {args.distributed_ip_and_port.split(':')[0]}" f" in list of worker addresses {all_addresses}" ) time_str = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime(time.time())) global_job_id = id_generator() killfilename = os.path.join( os.path.expanduser("~"), ".allenact", f"{time_str}_{global_job_id}.killfile" ) os.makedirs(os.path.dirname(killfilename), exist_ok=True) code_src = "." with open(killfilename, "w") as killfile: for it, addr in enumerate(all_addresses): code_tget = f"{addr}:{args.allenact_path}/" get_logger().info(f"rsync {code_src} to {code_tget}") os.system(f"rsync -rz {code_src} {code_tget}") job_id = id_generator() command = " ".join( ["python", "main.py"] + raw_args + [ "--extra_tag", f"{args.extra_tag}{'__' if len(args.extra_tag) > 0 else ''}machine{it}", ] + ["--machine_id", f"{it}"] ) logfile = ( f"{args.output_dir}/log_{time_str}_{global_job_id}_{job_id}_machine{it}" ) env_and_command = wrap_single_nested( f"for NCCL_SOCKET_IFNAME in $(route | grep default) ; do : ; done && export NCCL_SOCKET_IFNAME" f" && cd {args.allenact_path}" f" && mkdir -p {args.output_dir}" f" && source {args.env_activate_path} &>> {logfile}" f" && echo pwd=$(pwd) &>> {logfile}" f" && echo output_dir={args.output_dir} &>> {logfile}" f" && echo python_version=$(python --version) &>> {logfile}" f" && echo python_path=$(which python) &>> {logfile}" f" && set | grep NCCL_SOCKET_IFNAME &>> {logfile}" f" && echo &>> {logfile}" f" && {command} &>> {logfile}" ) screen_name = f"allenact_{time_str}_{global_job_id}_{job_id}_machine{it}" screen_command = wrap_single( f"screen -S {screen_name} -dm bash -c {env_and_command}" ) ssh_command = f"{args.ssh_cmd.format(addr=addr)} {screen_command}" get_logger().debug(f"SSH command {ssh_command}") subprocess.run(ssh_command, shell=True, executable="/bin/bash") get_logger().info(f"{addr} {screen_name}") killfile.write(f"{addr} {screen_name}\n") get_logger().info("") get_logger().info(f"Running screen ids saved to {killfilename}") get_logger().info("") get_logger().info("DONE") ================================================ FILE: scripts/literate.py ================================================ """Helper functions used to create literate documentation from python files.""" import importlib import inspect import os from typing import Optional, Sequence, List, cast from typing.io import TextIO from constants import ABS_PATH_OF_DOCS_DIR, ABS_PATH_OF_TOP_LEVEL_DIR def get_literate_output_path(file: TextIO) -> Optional[str]: for l in file: l = l.strip() if l != "": if l.lower().startswith(("# literate", "#literate")): parts = l.split(":") if len(parts) == 1: assert ( file.name[-3:].lower() == ".py" ), "Can only run literate on python (*.py) files." return file.name[:-3] + ".md" elif len(parts) == 2: rel_outpath = parts[1].strip() outpath = os.path.abspath( os.path.join(ABS_PATH_OF_DOCS_DIR, rel_outpath) ) assert outpath.startswith( ABS_PATH_OF_DOCS_DIR ), f"Path {outpath} is not allowed, must be within {ABS_PATH_OF_DOCS_DIR}." return outpath else: raise NotImplementedError( f"Line '{l}' is not of the correct format." ) else: return None return None def source_to_markdown(dot_path: str, summarize: bool = False): importlib.invalidate_caches() module_path, obj_name = ".".join(dot_path.split(".")[:-1]), dot_path.split(".")[-1] module = importlib.import_module(module_path) obj = getattr(module, obj_name) source = inspect.getsource(obj) if not summarize: return source elif inspect.isclass(obj): lines = source.split("\n") newlines = [lines[0]] whitespace_len = float("inf") k = 1 started = False while k < len(lines): l = lines[k] lstripped = l.lstrip() if started: newlines.append(l) started = "):" not in l and "->" not in l if not started: newlines.append(l[: cast(int, whitespace_len)] + " ...\n") if ( l.lstrip().startswith("def ") and len(l) - len(lstripped) <= whitespace_len ): whitespace_len = len(l) - len(lstripped) newlines.append(l) started = "):" not in l and "->" not in l if not started: newlines.append(l[:whitespace_len] + " ...\n") k += 1 return "\n".join(newlines).strip() elif inspect.isfunction(obj): return source.split("\n")[0] + "\n ..." else: return def _strip_empty_lines(lines: Sequence[str]) -> List[str]: lines = list(lines) if len(lines) == 0: return lines for i in range(len(lines)): if lines[i].strip() != "": lines = lines[i:] break for i in reversed(list(range(len(lines)))): if lines[i].strip() != "": lines = lines[: i + 1] break return lines def literate_python_to_markdown(path: str) -> bool: assert path[-3:].lower() == ".py", "Can only run literate on python (*.py) files." with open(path, "r") as file: output_path = get_literate_output_path(file) if output_path is None: return False output_lines = [ f" ", f"\n", ] md_lines: List[str] = [] code_lines = md_lines lines = file.readlines() mode = None for line in lines: line = line.rstrip() stripped_line = line.strip() if (mode is None or mode == "change") and line.strip() == "": continue if mode == "markdown": if stripped_line in ['"""', "'''"]: output_lines.extend(_strip_empty_lines(md_lines) + [""]) md_lines.clear() mode = None elif stripped_line.endswith(('"""', "'''")): output_lines.extend( _strip_empty_lines(md_lines) + [stripped_line[:-3]] ) md_lines.clear() mode = None # TODO: Does not account for the case where a string is ended with a comment. else: md_lines.append(line.strip()) elif stripped_line.startswith(("# %%", "#%%")): last_mode = mode mode = "change" if last_mode == "code": output_lines.extend( ["```python"] + _strip_empty_lines(code_lines) + ["```"] ) code_lines.clear() if " import " in stripped_line: path = stripped_line.split(" import ")[-1].strip() output_lines.append( "```python\n" + source_to_markdown(path) + "\n```" ) elif " import_summary " in stripped_line: path = stripped_line.split(" import_summary ")[-1].strip() output_lines.append( "```python\n" + source_to_markdown(path, summarize=True) + "\n```" ) elif " hide" in stripped_line: mode = "hide" elif mode == "hide": continue elif mode == "change": if stripped_line.startswith(('"""', "'''")): mode = "markdown" if len(stripped_line) != 3: if stripped_line.endswith(('"""', "'''")): output_lines.append(stripped_line[3:-3]) mode = "change" else: output_lines.append(stripped_line[3:]) else: mode = "code" code_lines.append(line) elif mode == "code": code_lines.append(line) else: raise NotImplementedError( f"mode {mode} is not implemented. Last 5 lines: " + "\n".join(output_lines[-5:]) ) if mode == "code" and len(code_lines) != 0: output_lines.extend( ["```python"] + _strip_empty_lines(code_lines) + ["```"] ) with open(output_path, "w") as f: f.writelines([l + "\n" for l in output_lines]) return True if __name__ == "__main__": # print( # source_to_markdown( # "allenact_plugins.minigrid_plugin.minigrid_offpolicy.ExpertTrajectoryIterator", # True # ) # ) literate_python_to_markdown( os.path.join( ABS_PATH_OF_TOP_LEVEL_DIR, "projects/tutorials/training_a_pointnav_model.py", ) ) ================================================ FILE: scripts/release.py ================================================ import os import sys from pathlib import Path from subprocess import getoutput def make_package(name, verbose=False): """Prepares sdist for allenact or allenact_plugins.""" orig_dir = os.getcwd() base_dir = os.path.join(os.path.abspath(os.path.dirname(Path(__file__))), "..") os.chdir(base_dir) with open(".VERSION", "r") as f: __version__ = f.readline().strip() # generate sdist via setuptools output = getoutput(f"{sys.executable} {name}/setup.py sdist") if verbose: print(output) os.chdir(os.path.join(base_dir, "dist")) # uncompress the tar.gz sdist output = getoutput(f"tar zxvf {name}-{__version__}.tar.gz") if verbose: print(output) # copy setup.py to the top level of the package (required by pip install) output = getoutput( f"cp {name}-{__version__}/{name}/setup.py {name}-{__version__}/setup.py" ) if verbose: print(output) # create new source file with version getoutput( f"printf '__version__ = \"{__version__}\"\n' >> {name}-{__version__}/{name}/_version.py" ) # include it in sources getoutput( f'printf "\n{name}/_version.py" >> {name}-{__version__}/{name}.egg-info/SOURCES.txt' ) # recompress tar.gz output = getoutput(f"tar zcvf {name}-{__version__}.tar.gz {name}-{__version__}/") if verbose: print(output) # remove temporary directory output = getoutput(f"rm -r {name}-{__version__}") if verbose: print(output) os.chdir(orig_dir) if __name__ == "__main__": verbose = False make_package("allenact", verbose) make_package("allenact_plugins", verbose) ================================================ FILE: scripts/run_tests.sh ================================================ #!/usr/bin/env bash echo RUNNING PYTEST WITH COVERAGE pipenv run coverage run -m --source=. pytest tests/ echo DONE echo "" echo GENERATING COVERAGE HTML coverage html echo HTML GENERATED if [ "$(uname)" == "Darwin" ]; then echo OPENING COVERAGE INFO open htmlcov/index.html fi ================================================ FILE: scripts/startx.py ================================================ import atexit import os import platform import re import shlex import subprocess import tempfile # Turning off automatic black formatting for this script as it breaks quotes. # fmt: off def pci_records(): records = [] command = shlex.split("lspci -vmm") output = subprocess.check_output(command).decode() for devices in output.strip().split("\n\n"): record = {} records.append(record) for row in devices.split("\n"): key, value = row.split("\t") record[key.split(":")[0]] = value return records def generate_xorg_conf(devices): xorg_conf = [] device_section = """ Section "Device" Identifier "Device{device_id}" Driver "nvidia" VendorName "NVIDIA Corporation" BusID "{bus_id}" EndSection """ server_layout_section = """ Section "ServerLayout" Identifier "Layout0" {screen_records} EndSection """ screen_section = """ Section "Screen" Identifier "Screen{screen_id}" Device "Device{device_id}" DefaultDepth 24 Option "AllowEmptyInitialConfiguration" "True" SubSection "Display" Depth 24 Virtual 1024 768 EndSubSection EndSection """ screen_records = [] for i, bus_id in enumerate(devices): xorg_conf.append(device_section.format(device_id=i, bus_id=bus_id)) xorg_conf.append(screen_section.format(device_id=i, screen_id=i)) screen_records.append('Screen {screen_id} "Screen{screen_id}" 0 0'.format(screen_id=i)) xorg_conf.append(server_layout_section.format(screen_records="\n ".join(screen_records))) output = "\n".join(xorg_conf) return output def startx(display=0): if platform.system() != "Linux": raise Exception("Can only run startx on linux") devices = [] for r in pci_records(): if r.get("Vendor", "") == "NVIDIA Corporation"\ and r["Class"] in ["VGA compatible controller", "3D controller"]: bus_id = "PCI:" + ":".join(map(lambda x: str(int(x, 16)), re.split(r"[:\.]", r["Slot"]))) devices.append(bus_id) if not devices: raise Exception("no nvidia cards found") fd = None path = None try: fd, path = tempfile.mkstemp() with open(path, "w") as f: f.write(generate_xorg_conf(devices)) command = shlex.split("Xorg -noreset +extension GLX +extension RANDR +extension RENDER -config %s :%s" % (path, display)) proc = subprocess.Popen(command) atexit.register(lambda: proc.poll() is None and proc.kill()) proc.wait() finally: if fd is not None: os.close(fd) os.unlink(path) # fmt: on if __name__ == "__main__": startx() ================================================ FILE: tests/.gitignore ================================================ tmp .DS_Store !.py !.gitignore ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/hierarchical_policies/__init__.py ================================================ ================================================ FILE: tests/hierarchical_policies/test_minigrid_conditional.py ================================================ import os from tempfile import mkdtemp from typing import Dict, Optional, List, Any, cast import gym from gym_minigrid.envs import EmptyRandomEnv5x5 from torch import nn from torch import optim from torch.optim.lr_scheduler import LambdaLR from allenact.algorithms.onpolicy_sync.losses.imitation import Imitation from allenact.algorithms.onpolicy_sync.losses.ppo import PPO, PPOConfig from allenact.algorithms.onpolicy_sync.runner import OnPolicyRunner from allenact.base_abstractions.experiment_config import ExperimentConfig, TaskSampler from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor from allenact.utils.experiment_utils import ( TrainingPipeline, Builder, PipelineStage, LinearDecay, ) from allenact_plugins.minigrid_plugin.minigrid_sensors import EgocentricMiniGridSensor from allenact_plugins.minigrid_plugin.minigrid_tasks import MiniGridTaskSampler from projects.tutorials.minigrid_tutorial_conds import ( ConditionedMiniGridSimpleConvRNN, ConditionedMiniGridTask, ) class MiniGridCondTestExperimentConfig(ExperimentConfig): @classmethod def tag(cls) -> str: return "MiniGridCondTest" SENSORS = [ EgocentricMiniGridSensor(agent_view_size=5, view_channels=3), ExpertActionSensor( action_space=gym.spaces.Dict( higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2) ) ), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: return ConditionedMiniGridSimpleConvRNN( action_space=gym.spaces.Dict( higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2) ), observation_space=SensorSuite(cls.SENSORS).observation_spaces, num_objects=cls.SENSORS[0].num_objects, num_colors=cls.SENSORS[0].num_colors, num_states=cls.SENSORS[0].num_states, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return MiniGridTaskSampler(**kwargs) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="train") def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="valid") def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test") def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 20 + 20 * ( mode == "test" ) # 20 tasks for valid, 40 for test (per sampler) # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks) ) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( max_tasks=max_tasks, # see above env_class=self.make_env, # builder for third-party environment (defined below) sensors=self.SENSORS, # sensors used to return observations to the agent env_info=dict(), # parameters for environment builder (none for now) task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above task_class=ConditionedMiniGridTask, ) @staticmethod def make_env(*args, **kwargs): return EmptyRandomEnv5x5() @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: return { "nprocesses": 4 if mode == "train" else 1, "devices": [], } @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(512) return TrainingPipeline( named_losses=dict( imitation_loss=Imitation( cls.SENSORS[1] ), # 0 is Minigrid, 1 is ExpertActionSensor ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"), ), # type:ignore pipeline_stages=[ PipelineStage( teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=ppo_steps // 2, ), loss_names=["imitation_loss", "ppo_loss"], max_stage_steps=ppo_steps, ) ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)), num_mini_batch=4, update_repeats=3, max_grad_norm=0.5, num_steps=16, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=10000, metric_accumulate_interval=1, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore ), ) class TestMiniGridCond: def test_train(self, tmpdir): cfg = MiniGridCondTestExperimentConfig() train_runner = OnPolicyRunner( config=cfg, output_dir=tmpdir, loaded_config_src_files=None, seed=12345, mode="train", deterministic_cudnn=False, deterministic_agents=False, extra_tag="", disable_tensorboard=True, disable_config_saving=True, ) start_time_str, valid_results = train_runner.start_train( checkpoint=None, restart_pipeline=False, max_sampler_processes_per_worker=1, collect_valid_results=True, ) assert len(valid_results) > 0 test_runner = OnPolicyRunner( config=cfg, output_dir=tmpdir, loaded_config_src_files=None, seed=12345, mode="test", deterministic_cudnn=False, deterministic_agents=False, extra_tag="", disable_tensorboard=True, disable_config_saving=True, ) test_results = test_runner.start_test( checkpoint_path_dir_or_pattern=os.path.join( tmpdir, "checkpoints", "**", start_time_str, "*.pt" ), max_sampler_processes_per_worker=1, inference_expert=True, ) assert test_results[-1]["test-metrics/ep_length"] < 4 if __name__ == "__main__": TestMiniGridCond().test_train(mkdtemp()) # type:ignore ================================================ FILE: tests/manipulathor_plugin/__init__.py ================================================ ================================================ FILE: tests/manipulathor_plugin/test_utils.py ================================================ from allenact_plugins.manipulathor_plugin.arm_calculation_utils import ( world_coords_to_agent_coords, ) class TestArmCalculationUtils(object): def test_translation_functions(self): agent_coordinate = { "position": {"x": 1, "y": 0, "z": 2}, "rotation": {"x": 0, "y": -45, "z": 0}, } obj_coordinate = { "position": {"x": 0, "y": 1, "z": 0}, "rotation": {"x": 0, "y": 0, "z": 0}, } rotated = world_coords_to_agent_coords(obj_coordinate, agent_coordinate) eps = 0.01 assert ( abs(rotated["position"]["x"] - (-2.12)) < eps and abs(rotated["position"]["y"] - (1.0)) < eps and abs(rotated["position"]["z"] - (-0.70)) < eps ) if __name__ == "__main__": TestArmCalculationUtils().test_translation_functions() ================================================ FILE: tests/mapping/__init__.py ================================================ ================================================ FILE: tests/mapping/test_ai2thor_mapping.py ================================================ import os import platform import random import sys import urllib import urllib.request import warnings from collections import defaultdict # noinspection PyUnresolvedReferences from tempfile import mkdtemp from typing import Dict, List, Tuple, cast # noinspection PyUnresolvedReferences import ai2thor # noinspection PyUnresolvedReferences import ai2thor.wsgi_server import compress_pickle import numpy as np import torch from allenact.algorithms.onpolicy_sync.storage import RolloutBlockStorage from allenact.base_abstractions.misc import Memory, ActorCriticOutput from allenact.embodiedai.mapping.mapping_utils.map_builders import SemanticMapBuilder from allenact.utils.experiment_utils import set_seed from allenact.utils.system import get_logger from allenact.utils.tensor_utils import batch_observations from allenact_plugins.ithor_plugin.ithor_sensors import ( RelativePositionChangeTHORSensor, ReachableBoundsTHORSensor, BinnedPointCloudMapTHORSensor, SemanticMapTHORSensor, ) from allenact_plugins.ithor_plugin.ithor_util import get_open_x_displays from allenact_plugins.robothor_plugin.robothor_sensors import DepthSensorThor from constants import ABS_PATH_OF_TOP_LEVEL_DIR class TestAI2THORMapSensors(object): def setup_path_for_use_with_rearrangement_project(self) -> bool: if platform.system() != "Darwin" and len(get_open_x_displays()) == 0: wrn_msg = "Cannot run tests as there seem to be no open displays!" warnings.warn(wrn_msg) get_logger().warning(wrn_msg) return False os.chdir(ABS_PATH_OF_TOP_LEVEL_DIR) sys.path.append( os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "projects/ithor_rearrangement") ) try: import rearrange except ImportError: wrn_msg = ( "Could not import `rearrange`. Is it possible you have" " not initialized the submodules (i.e. by running" " `git submodule init; git submodule update;`)?" ) warnings.warn(wrn_msg) get_logger().warning(wrn_msg) return False return True def test_binned_and_semantic_mapping(self, tmpdir): try: if not self.setup_path_for_use_with_rearrangement_project(): return from baseline_configs.rearrange_base import RearrangeBaseExperimentConfig from baseline_configs.walkthrough.walkthrough_rgb_base import ( WalkthroughBaseExperimentConfig, ) from rearrange.constants import ( FOV, PICKUPABLE_OBJECTS, OPENABLE_OBJECTS, ) from datagen.datagen_utils import get_scenes ORDERED_OBJECT_TYPES = list(sorted(PICKUPABLE_OBJECTS + OPENABLE_OBJECTS)) map_range_sensor = ReachableBoundsTHORSensor(margin=1.0) map_info = dict( map_range_sensor=map_range_sensor, vision_range_in_cm=40 * 5, map_size_in_cm=1050, resolution_in_cm=5, ) map_sensors = [ RelativePositionChangeTHORSensor(), map_range_sensor, DepthSensorThor( height=224, width=224, use_normalization=False, uuid="depth", ), BinnedPointCloudMapTHORSensor( fov=FOV, ego_only=False, **map_info, ), SemanticMapTHORSensor( fov=FOV, ego_only=False, ordered_object_types=ORDERED_OBJECT_TYPES, **map_info, ), ] all_sensors = [*WalkthroughBaseExperimentConfig.SENSORS, *map_sensors] open_x_displays = [] try: open_x_displays = get_open_x_displays() except (AssertionError, IOError): pass walkthrough_task_sampler = WalkthroughBaseExperimentConfig.make_sampler_fn( stage="train", sensors=all_sensors, scene_to_allowed_rearrange_inds={s: [0] for s in get_scenes("train")}, force_cache_reset=True, allowed_scenes=None, seed=1, x_display=open_x_displays[0] if len(open_x_displays) != 0 else None, thor_controller_kwargs={ **RearrangeBaseExperimentConfig.THOR_CONTROLLER_KWARGS, # "server_class": ai2thor.wsgi_server.WsgiServer, # Only for debugging }, ) targets_path = os.path.join(tmpdir, "rearrange_mapping_examples.pkl.gz") urllib.request.urlretrieve( "https://ai2-prior-allenact-public-test.s3-us-west-2.amazonaws.com/ai2thor_mapping/rearrange_mapping_examples.pkl.gz", targets_path, ) goal_obs_dict = compress_pickle.load(targets_path) def compare_recursive(obs, goal_obs, key_list: List): if isinstance(obs, Dict): for k in goal_obs: compare_recursive( obs=obs[k], goal_obs=goal_obs[k], key_list=key_list + [k] ) elif isinstance(obs, (List, Tuple)): for i in range(len(goal_obs)): compare_recursive( obs=obs[i], goal_obs=goal_obs[i], key_list=key_list + [i] ) else: # Should be a numpy array at this point assert isinstance(obs, np.ndarray) and isinstance( goal_obs, np.ndarray ), f"After {key_list}, not numpy arrays, obs={obs}, goal_obs={goal_obs}" obs = 1.0 * obs goal_obs = 1.0 * goal_obs goal_where_nan = np.isnan(goal_obs) obs_where_nan = np.isnan(obs) where_nan_not_equal = (goal_where_nan != obs_where_nan).sum() # assert ( # where_nan_not_equal.sum() <= 1 # and where_nan_not_equal.mean() < 1e3 # ) where_nan = np.logical_or(goal_where_nan, obs_where_nan) obs[where_nan] = 0.0 goal_obs[where_nan] = 0.0 def special_mean(v): while len(v.shape) > 2: v = v.sum(-1) return v.mean() numer = np.abs(obs - goal_obs) denom = np.abs( np.stack((obs, goal_obs, np.ones_like(obs)), axis=0) ).max(0) difference = special_mean(numer / denom) # assert ( # difference < 1.2e-3 # ), f"Difference of {np.abs(obs - goal_obs).mean()} at {key_list}." if ( len(obs.shape) >= 2 and obs.shape[0] == obs.shape[1] and obs.shape[0] > 1 ): # Sanity check that rotating the observations makes them not-equal rot_obs = np.rot90(obs) numer = np.abs(rot_obs - goal_obs) denom = np.abs( np.stack((rot_obs, goal_obs, np.ones_like(obs)), axis=0) ).max(0) rot_difference = special_mean(numer / denom) assert ( difference < rot_difference or (obs == rot_obs).all() ), f"Too small a difference ({(numer / denom).mean()})." observations_dict = defaultdict(lambda: []) for i in range(5): # Why 5, why not 5? set_seed(i) task = walkthrough_task_sampler.next_task() obs_list = observations_dict[i] obs_list.append(task.get_observations()) k = 0 compare_recursive( obs=obs_list[0], goal_obs=goal_obs_dict[i][0], key_list=[i, k] ) while not task.is_done(): obs = task.step( action=task.action_names().index( random.choice( 3 * [ "move_ahead", "rotate_right", "rotate_left", "look_up", "look_down", ] + ["done"] ) ) ).observation k += 1 obs_list.append(obs) compare_recursive( obs=obs, goal_obs=goal_obs_dict[i][task.num_steps_taken()], key_list=[i, k], ) # Free space metric map in RGB using pointclouds coming from depth images. This # is built iteratively after every step. # R - is used to encode points at a height < 0.02m (i.e. the floor) # G - is used to encode points at a height between 0.02m and 2m, i.e. objects the agent would run into # B - is used to encode points higher than 2m, i.e. ceiling # Uncomment if you wish to visualize the observations: import matplotlib.pyplot as plt plt.imshow( np.flip(255 * (obs["binned_pc_map"]["map"] > 0), 0) ) # np.flip because we expect "up" to be -row plt.title("Free space map") plt.show() plt.close() # See also `obs["binned_pc_map"]["egocentric_update"]` to see the # the metric map from the point of view of the agent before it is # rotated into the world-space coordinates and merged with past observations. # Semantic map in RGB which is iteratively revealed using depth maps to figure out what # parts of the scene the agent has seen so far. # This map has shape 210x210x72 with the 72 channels corresponding to the 72 # object types in `ORDERED_OBJECT_TYPES` semantic_map = obs["semantic_map"]["map"] # We can't display all 72 channels in an RGB image so instead we randomly assign # each object a color and then just allow them to overlap each other colored_semantic_map = ( SemanticMapBuilder.randomly_color_semantic_map(semantic_map) ) # Here's the full semantic map with nothing masked out because the agent # hasn't seen it yet colored_semantic_map_no_fog = ( SemanticMapBuilder.randomly_color_semantic_map( map_sensors[ -1 ].semantic_map_builder.ground_truth_semantic_map ) ) # Uncomment if you wish to visualize the observations: # import matplotlib.pyplot as plt # plt.imshow( # np.flip( # np.flip because we expect "up" to be -row # np.concatenate( # ( # colored_semantic_map, # 255 + 0 * colored_semantic_map[:, :10, :], # colored_semantic_map_no_fog, # ), # axis=1, # ), # 0, # ) # ) # plt.title("Semantic map with and without exploration fog") # plt.show() # plt.close() # See also # * `obs["semantic_map"]["egocentric_update"]` # * `obs["semantic_map"]["explored_mask"]` # * `obs["semantic_map"]["egocentric_mask"]` # To save observations for comparison against future runs, uncomment the below. # os.makedirs("tmp_out", exist_ok=True) # compress_pickle.dump( # {**observations_dict}, "tmp_out/rearrange_mapping_examples.pkl.gz" # ) finally: try: walkthrough_task_sampler.close() except NameError: pass def test_pretrained_rearrange_walkthrough_mapping_agent(self, tmpdir): try: if not self.setup_path_for_use_with_rearrangement_project(): return from baseline_configs.rearrange_base import RearrangeBaseExperimentConfig from baseline_configs.walkthrough.walkthrough_rgb_mapping_ppo import ( WalkthroughRGBMappingPPOExperimentConfig, ) from rearrange.constants import ( FOV, PICKUPABLE_OBJECTS, OPENABLE_OBJECTS, ) from datagen.datagen_utils import get_scenes open_x_displays = [] try: open_x_displays = get_open_x_displays() except (AssertionError, IOError): pass walkthrough_task_sampler = ( WalkthroughRGBMappingPPOExperimentConfig.make_sampler_fn( stage="train", scene_to_allowed_rearrange_inds={ s: [0] for s in get_scenes("train") }, force_cache_reset=True, allowed_scenes=None, seed=2, x_display=open_x_displays[0] if len(open_x_displays) != 0 else None, ) ) named_losses = ( WalkthroughRGBMappingPPOExperimentConfig.training_pipeline()._named_losses ) ckpt_path = os.path.join( tmpdir, "pretrained_walkthrough_mapping_agent_75mil.pt" ) if not os.path.exists(ckpt_path): urllib.request.urlretrieve( "https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/rearrangement/walkthrough/pretrained_walkthrough_mapping_agent_75mil.pt", ckpt_path, ) state_dict = torch.load( ckpt_path, map_location="cpu", ) walkthrough_model = WalkthroughRGBMappingPPOExperimentConfig.create_model() walkthrough_model.load_state_dict(state_dict["model_state_dict"]) memory = RolloutBlockStorage.create_memory( spec=walkthrough_model.recurrent_memory_specification, num_samplers=1 ).step_squeeze(0) masks = torch.FloatTensor([0]).view(1, 1, 1) binned_map_losses = [] semantic_map_losses = [] for i in range(5): masks = 0 * masks set_seed(i + 1) task = walkthrough_task_sampler.next_task() def add_step_dim(input): if isinstance(input, torch.Tensor): return input.unsqueeze(0) elif isinstance(input, Dict): return {k: add_step_dim(v) for k, v in input.items()} else: raise NotImplementedError batch = add_step_dim(batch_observations([task.get_observations()])) while not task.is_done(): # noinspection PyTypeChecker ac_out, memory = cast( Tuple[ActorCriticOutput, Memory], walkthrough_model.forward( observations=batch, memory=memory, prev_actions=None, masks=masks, ), ) binned_map_losses.append( named_losses["binned_map_loss"] .loss( step_count=0, # Not used in this loss batch={"observations": batch}, actor_critic_output=ac_out, )[0] .item() ) assert ( binned_map_losses[-1] < 0.16 ), f"Binned map loss to large at ({i}, {task.num_steps_taken()})" semantic_map_losses.append( named_losses["semantic_map_loss"] .loss( step_count=0, # Not used in this loss batch={"observations": batch}, actor_critic_output=ac_out, )[0] .item() ) assert ( semantic_map_losses[-1] < 0.004 ), f"Semantic map loss to large at ({i}, {task.num_steps_taken()})" masks = masks.fill_(1.0) obs = task.step( action=ac_out.distributions.sample().item() ).observation batch = add_step_dim(batch_observations([obs])) if task.num_steps_taken() >= 10: break # To save observations for comparison against future runs, uncomment the below. # os.makedirs("tmp_out", exist_ok=True) # compress_pickle.dump( # {**observations_dict}, "tmp_out/rearrange_mapping_examples.pkl.gz" # ) finally: try: walkthrough_task_sampler.close() except NameError: pass if __name__ == "__main__": TestAI2THORMapSensors().test_binned_and_semantic_mapping(mkdtemp()) # type:ignore # TestAI2THORMapSensors().test_binned_and_semantic_mapping("tmp_out") # Used for local debugging # TestAI2THORMapSensors().test_pretrained_rearrange_walkthrough_mapping_agent( # mkdtemp() # "tmp_out" # ) # Used for local debugging ================================================ FILE: tests/multiprocessing/__init__.py ================================================ ================================================ FILE: tests/multiprocessing/test_frozen_attribs.py ================================================ from typing import Dict, Any import torch.multiprocessing as mp import torch.nn as nn from allenact.base_abstractions.experiment_config import ExperimentConfig from allenact.base_abstractions.task import TaskSampler from allenact.utils.experiment_utils import TrainingPipeline # noinspection PyAbstractClass,PyTypeChecker class MyConfig(ExperimentConfig): MY_VAR: int = 3 @classmethod def tag(cls) -> str: return "" @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: return None @classmethod def create_model(cls, **kwargs) -> nn.Module: return None @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return None def my_var_is(self, val): assert self.MY_VAR == val # noinspection PyAbstractClass class MySpecConfig(MyConfig): MY_VAR = 6 @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: return {} @classmethod def tag(cls) -> str: return "SpecTag" scfg = MySpecConfig() class TestFrozenAttribs(object): def test_frozen_inheritance(self): from abc import abstractmethod from allenact.base_abstractions.experiment_config import FrozenClassVariables class SomeBase(metaclass=FrozenClassVariables): yar = 3 @abstractmethod def use(self): raise NotImplementedError() class SomeDerived(SomeBase): yar = 33 def use(self): return self.yar failed = False try: SomeDerived.yar = 6 # Error except Exception as _: failed = True assert failed inst = SomeDerived() inst2 = SomeDerived() inst.yar = 12 # No error assert inst.use() == 12 assert inst2.use() == 33 @staticmethod def my_func(config, val): config.my_var_is(val) def test_frozen_experiment_config(self): val = 5 failed = False try: MyConfig() except (RuntimeError, TypeError): failed = True assert failed scfg.MY_VAR = val scfg.my_var_is(val) failed = False try: MyConfig.MY_VAR = val except RuntimeError: failed = True assert failed failed = False try: MySpecConfig.MY_VAR = val except RuntimeError: failed = True assert failed for fork_method in ["forkserver", "fork"]: ctxt = mp.get_context(fork_method) p = ctxt.Process(target=self.my_func, kwargs=dict(config=scfg, val=val)) p.start() p.join() if __name__ == "__main__": TestFrozenAttribs().test_frozen_inheritance() # type:ignore TestFrozenAttribs().test_frozen_experiment_config() # type:ignore ================================================ FILE: tests/sync_algs_cpu/__init__.py ================================================ ================================================ FILE: tests/sync_algs_cpu/test_to_to_obj_trains.py ================================================ import io import math import os import pathlib from contextlib import redirect_stdout, redirect_stderr from typing import Optional, List, Dict, Any import torch from allenact.algorithms.onpolicy_sync.losses.abstract_loss import ( AbstractActorCriticLoss, ) from allenact.algorithms.onpolicy_sync.policy import ObservationType from allenact.algorithms.onpolicy_sync.runner import OnPolicyRunner from allenact.algorithms.onpolicy_sync.storage import ( StreamingStorageMixin, ExperienceStorage, RolloutBlockStorage, ) from allenact.base_abstractions.experiment_config import MachineParams from allenact.base_abstractions.misc import ( Memory, GenericAbstractLoss, ModelType, LossOutput, ) from allenact.utils.experiment_utils import PipelineStage, StageComponent from allenact.utils.misc_utils import prepare_locals_for_super from projects.babyai_baselines.experiments.go_to_obj.ppo import ( PPOBabyAIGoToObjExperimentConfig, ) SILLY_STORAGE_VALUES = [1.0, 2.0, 3.0, 4.0] SILLY_STORAGE_REPEATS = [1, 2, 3, 4] class FixedConstantLoss(AbstractActorCriticLoss): def __init__(self, name: str, value: float): super().__init__() self.name = name self.value = value def loss( # type: ignore self, *args, **kwargs, ): return self.value, {self.name: self.value} class SillyStorage(ExperienceStorage, StreamingStorageMixin): def __init__(self, values_to_return: List[float], repeats: List[int]): self.values_to_return = values_to_return self.repeats = repeats assert len(self.values_to_return) == len(self.repeats) self.index = 0 def initialize(self, *, observations: ObservationType, **kwargs): pass def add( self, observations: ObservationType, memory: Optional[Memory], actions: torch.Tensor, action_log_probs: torch.Tensor, value_preds: torch.Tensor, rewards: torch.Tensor, masks: torch.Tensor, ): pass def to(self, device: torch.device): pass def set_partition(self, index: int, num_parts: int): pass @property def total_experiences(self) -> int: return 0 @total_experiences.setter def total_experiences(self, value: int): pass def next_batch(self) -> Dict[str, Any]: if self.index >= len(self.values_to_return): raise EOFError to_return = { "value": torch.tensor( [self.values_to_return[self.index]] * self.repeats[self.index] ), } self.index += 1 return to_return def reset_stream(self): self.index = 0 def empty(self) -> bool: return len(self.values_to_return) == 0 class AverageBatchValueLoss(GenericAbstractLoss): def loss( self, *, model: ModelType, batch: ObservationType, batch_memory: Memory, stream_memory: Memory, ) -> LossOutput: v = batch["value"].mean() return LossOutput( value=v, info={"avg_batch_val": v}, per_epoch_info={}, batch_memory=batch_memory, stream_memory=stream_memory, bsize=batch["value"].shape[0], ) class PPOBabyAIGoToObjTestExperimentConfig(PPOBabyAIGoToObjExperimentConfig): NUM_CKPTS_TO_SAVE = 2 @classmethod def tag(cls): return "BabyAIGoToObjPPO-TESTING" @classmethod def machine_params(cls, mode="train", **kwargs): mp = super().machine_params(mode=mode, **kwargs) if mode == "valid": mp = MachineParams( nprocesses=1, devices=mp.devices, sensor_preprocessor_graph=mp.sensor_preprocessor_graph, sampler_devices=mp.sampler_devices, visualizer=mp.visualizer, local_worker_ids=mp.local_worker_ids, ) return mp @classmethod def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_RL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps) tp = cls._training_pipeline( named_losses={ "ppo_loss": ppo_info["loss"], "3_loss": FixedConstantLoss("3_loss", 3.0), "avg_value_loss": AverageBatchValueLoss(), }, named_storages={ "onpolicy": RolloutBlockStorage(), "silly_storage": SillyStorage( values_to_return=SILLY_STORAGE_VALUES, repeats=SILLY_STORAGE_REPEATS ), }, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss", "3_loss"], max_stage_steps=total_train_steps, stage_components=[ StageComponent( uuid="onpolicy", storage_uuid="onpolicy", loss_names=["ppo_loss", "3_loss"], ) ], ), ], num_mini_batch=ppo_info["num_mini_batch"], update_repeats=ppo_info["update_repeats"], total_train_steps=total_train_steps, valid_pipeline_stage=PipelineStage( loss_names=["ppo_loss", "3_loss"], max_stage_steps=-1, update_repeats=1, num_mini_batch=1, ), test_pipeline_stage=PipelineStage( loss_names=["avg_value_loss"], stage_components=[ StageComponent( uuid="debug", storage_uuid="silly_storage", loss_names=["avg_value_loss"], ), ], max_stage_steps=-1, update_repeats=1, num_mini_batch=1, ), ) tp.training_settings.save_interval = int( math.ceil(cls.TOTAL_RL_TRAIN_STEPS / cls.NUM_CKPTS_TO_SAVE) ) return tp def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: # Also run validation return self.test_task_sampler_args(**prepare_locals_for_super(locals())) # Wrapper context manager to redirect stdout and stderr to a file when potentially # using pytest capsys class RedirectOutput: def __init__(self, capsys: Optional, capfd: Optional): self.capsys = capsys self.capfd = capfd self.f = io.StringIO() self.redirect_stdout = redirect_stdout(self.f) self.redirect_stderr = redirect_stderr(self.f) self.capsys_output = "" self.capfd_output = "" # self.capsys_disabler = None def get_output(self): return self.f.getvalue() + self.capsys_output + self.capfd_output def __enter__(self): if self.capsys is not None: self.capsys.readouterr() # Clear out any existing output if self.capfd is not None: self.capfd.readouterr() # Clear out any existing output # self.capsys_disabler = self.capsys.disabled() # self.capsys_disabler.__enter__() self.redirect_stdout.__enter__() self.redirect_stderr.__enter__() def __exit__(self, *args): if self.capsys is not None: captured = self.capsys.readouterr() self.capsys_output = captured.out + captured.err # self.capsys_disabler.__exit__(*args) if self.capfd is not None: captured = self.capfd.readouterr() self.capfd_output = captured.out + captured.err self.redirect_stdout.__exit__(*args) self.redirect_stderr.__exit__(*args) class TestGoToObjTrains: def test_ppo_trains(self, capfd, tmpdir): cfg = PPOBabyAIGoToObjTestExperimentConfig() d = tmpdir / "test_ppo_trains" if isinstance(d, pathlib.Path): d.mkdir(parents=True, exist_ok=True) else: d.mkdir() output_dir = str(d) train_runner = OnPolicyRunner( config=cfg, output_dir=output_dir, loaded_config_src_files=None, seed=1, mode="train", deterministic_cudnn=True, ) output_redirector = RedirectOutput(capsys=None, capfd=capfd) with output_redirector: start_time_str = train_runner.start_train( max_sampler_processes_per_worker=1 ) s = output_redirector.get_output() def extract_final_metrics_from_log(s: str, mode: str): lines = s.splitlines() lines = [l for l in lines if mode.upper() in l] try: metrics_and_losses_list = ( lines[-1].split(")")[-1].split("[")[0].strip().split(" ") ) except IndexError: raise RuntimeError(f"Failed to parse log:\n{s}") def try_float(f): try: return float(f) except ValueError: return f metrics_and_losses_dict = { k: try_float(v) for k, v in zip( metrics_and_losses_list[::2], metrics_and_losses_list[1::2] ) } return metrics_and_losses_dict train_metrics = extract_final_metrics_from_log(s, "train") assert train_metrics["global_batch_size"] == 256 valid_metrics = extract_final_metrics_from_log(s, "valid") assert valid_metrics["3_loss/3_loss"] == 3, "Incorrect validation loss" assert ( valid_metrics["new_tasks_completed"] == cfg.NUM_TEST_TASKS ), "Incorrect number of tasks evaluated in validation" test_runner = OnPolicyRunner( config=cfg, output_dir=output_dir, loaded_config_src_files=None, seed=1, mode="test", deterministic_cudnn=True, ) test_results = test_runner.start_test( checkpoint_path_dir_or_pattern=os.path.join( output_dir, "checkpoints", "**", start_time_str, "*.pt" ), max_sampler_processes_per_worker=1, ) assert ( len(test_results) == 2 ), f"Too many or too few test results ({test_results})" tr = test_results[-1] assert ( tr["training_steps"] == round( math.ceil( cfg.TOTAL_RL_TRAIN_STEPS / (cfg.ROLLOUT_STEPS * cfg.NUM_TRAIN_SAMPLERS) ) ) * cfg.ROLLOUT_STEPS * cfg.NUM_TRAIN_SAMPLERS ), "Incorrect number of training steps" assert len(tr["tasks"]) == cfg.NUM_TEST_TASKS, "Incorrect number of test tasks" assert tr["test-metrics/success"] == sum( task["success"] for task in tr["tasks"] ) / len(tr["tasks"]), "Success counts don't seem to match" assert ( tr["test-metrics/success"] > 0.95 ), f"PPO did not seem to converge for the go_to_obj task (success {tr['success']})." assert tr["test-debug-losses/avg_value_loss/avg_batch_val"] == sum( ssv * ssr for ssv, ssr in zip(SILLY_STORAGE_VALUES, SILLY_STORAGE_REPEATS) ) / sum(SILLY_STORAGE_REPEATS) assert tr["test-debug-losses/avg_value_loss/avg_batch_val"] == sum( ssv * ssr for ssv, ssr in zip(SILLY_STORAGE_VALUES, SILLY_STORAGE_REPEATS) ) / sum(SILLY_STORAGE_REPEATS) assert tr["test-debug-misc/worker_batch_size"] == sum( SILLY_STORAGE_VALUES ) / len(SILLY_STORAGE_VALUES) if __name__ == "__main__": TestGoToObjTrains().test_ppo_trains( pathlib.Path("experiment_output/testing"), capsys=None, capfd=None ) # type:ignore ================================================ FILE: tests/utils/__init__.py ================================================ ================================================ FILE: tests/utils/test_inference_agent.py ================================================ from collections import Counter import torch from allenact.utils.experiment_utils import set_seed from allenact.utils.inference import InferenceAgent from projects.babyai_baselines.experiments.go_to_obj.ppo import ( PPOBabyAIGoToObjExperimentConfig, ) from packaging.version import parse if parse(torch.__version__) >= parse("2.0.0"): expected_results = [ { "ep_length": 39, "reward": 0.45999999999999996, "task_info": {}, "success": 1.0, }, {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "task_info": {}, "success": 0.0}, ] else: expected_results = [ {"ep_length": 64, "reward": 0.0, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "success": 0.0}, {"ep_length": 17, "reward": 0.7646153846153846, "success": 1.0}, {"ep_length": 22, "reward": 0.6953846153846154, "success": 1.0}, {"ep_length": 64, "reward": 0.0, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "success": 0.0}, {"ep_length": 64, "reward": 0.0, "success": 0.0}, ] class TestInferenceAgent(object): def test_inference_agent_from_minigrid_config(self): set_seed(1) exp_config = PPOBabyAIGoToObjExperimentConfig() agent = InferenceAgent.from_experiment_config( exp_config=exp_config, device=torch.device("cpu"), ) task_sampler = exp_config.make_sampler_fn( **exp_config.test_task_sampler_args(process_ind=0, total_processes=1) ) all_actions = [] successes = 0 for ind, expected_result in zip(range(10), expected_results): agent.reset() task = task_sampler.next_task() observations = task.get_observations() actions = [] while not task.is_done(): action = agent.act(observations=observations) actions.append(action) observations = task.step(action).observation metrics = task.metrics() successes += metrics["success"] assert metrics["success"] == 0 or metrics["reward"] > 0 assert metrics["ep_length"] <= 64 # Random seeding seems to not work well when changing linux/mac and torch versions :( # assert all( # abs(v - expected_result[k]) < 1e-4 # for k, v in task.metrics().items() # if k != "task_info" # ), f"Failed on task {ind} with actions {actions} and metrics {task.metrics()} (expected={expected_result})." all_actions.append(actions) assert successes > 0, "At least one task should be successful hopefully..." assert min(Counter(sum(all_actions, [])).values()) >= len( sum(all_actions, []) ) * 1 / (7 + 3), ( "Statistically, all actions should be taken at around 1/7 * num_actions times. We add 3 to" " the denominator for unlikely settings." ) if __name__ == "__main__": TestInferenceAgent().test_inference_agent_from_minigrid_config() ================================================ FILE: tests/utils/test_spaces.py ================================================ import warnings from collections import OrderedDict from typing import Tuple import numpy as np import torch from gym import spaces as gyms from allenact.utils import spaces_utils as su class TestSpaces(object): space = gyms.Dict( { "first": gyms.Tuple( [ gyms.Box(-10, 10, (3, 4)), gyms.MultiDiscrete([2, 3, 4]), gyms.Box(-1, 1, ()), ] ), "second": gyms.Tuple( [ gyms.Dict({"third": gyms.Discrete(11)}), gyms.MultiBinary(8), ] ), } ) @staticmethod def same(a, b, bidx=None): if isinstance(a, OrderedDict): for key in a: if not TestSpaces.same(a[key], b[key], bidx): return False return True elif isinstance(a, Tuple): for it in range(len(a)): if not TestSpaces.same(a[it], b[it], bidx): return False return True else: # np.array_equal also works for torch tensors and scalars if bidx is None: return np.array_equal(a, b) else: return np.array_equal(a, b[bidx]) def test_conversion(self): gsample = self.space.sample() asample = su.torch_point(self.space, gsample) back = su.numpy_point(self.space, asample) assert self.same(back, gsample) def test_flatten(self): # We flatten Discrete to 1 value assert su.flatdim(self.space) == 25 # gym flattens Discrete to one-hot assert gyms.flatdim(self.space) == 35 asample = su.torch_point(self.space, self.space.sample()) flattened = su.flatten(self.space, asample) unflattened = su.unflatten(self.space, flattened) assert self.same(asample, unflattened) # suppress `UserWarning: WARN: Box bound precision lowered by casting to float32` with warnings.catch_warnings(): warnings.simplefilter("ignore") flattened_space = su.flatten_space(self.space) assert flattened_space.shape == (25,) # The maximum comes from Discrete(11) assert flattened_space.high.max() == 11.0 assert flattened_space.low.min() == -10.0 gym_flattened_space = gyms.flatten_space(self.space) assert gym_flattened_space.shape == (35,) # The maximum comes from Box(-10, 10, (3, 4)) assert gym_flattened_space.high.max() == 10.0 assert gym_flattened_space.low.min() == -10.0 def test_batched(self): samples = [self.space.sample() for _ in range(10)] flattened = [ su.flatten(self.space, su.torch_point(self.space, sample)) for sample in samples ] stacked = torch.stack(flattened, dim=0) unflattened = su.unflatten(self.space, stacked) for bidx, refsample in enumerate(samples): # Compare each torch-ified sample to the corresponding unflattened from the stack assert self.same(su.torch_point(self.space, refsample), unflattened, bidx) assert self.same(su.flatten(self.space, unflattened), stacked) def test_tolist(self): space = gyms.MultiDiscrete([3, 3]) actions = su.torch_point(space, space.sample()) # single sampler actions = actions.unsqueeze(0).unsqueeze(0) # add [step, sampler] flat_actions = su.flatten(space, actions) al = su.action_list(space, flat_actions) assert len(al) == 1 assert len(al[0]) == 2 space = gyms.Tuple([gyms.MultiDiscrete([3, 3]), gyms.Discrete(2)]) actions = su.torch_point(space, space.sample()) # single sampler actions = ( actions[0].unsqueeze(0).unsqueeze(0), torch.tensor(actions[1]).unsqueeze(0).unsqueeze(0), ) # add [step, sampler] flat_actions = su.flatten(space, actions) al = su.action_list(space, flat_actions) assert len(al) == 1 assert len(al[0][0]) == 2 assert isinstance(al[0][1], int) space = gyms.Dict( {"tuple": gyms.MultiDiscrete([3, 3]), "scalar": gyms.Discrete(2)} ) actions = su.torch_point(space, space.sample()) # single sampler actions = OrderedDict( [ ("tuple", actions["tuple"].unsqueeze(0).unsqueeze(0)), ("scalar", torch.tensor(actions["scalar"]).unsqueeze(0).unsqueeze(0)), ] ) flat_actions = su.flatten(space, actions) al = su.action_list(space, flat_actions) assert len(al) == 1 assert len(al[0]["tuple"]) == 2 assert isinstance(al[0]["scalar"], int) if __name__ == "__main__": TestSpaces().test_conversion() # type:ignore TestSpaces().test_flatten() # type:ignore TestSpaces().test_batched() # type:ignore TestSpaces().test_tolist() # type:ignore ================================================ FILE: tests/vision/__init__.py ================================================ ================================================ FILE: tests/vision/test_pillow_rescaling.py ================================================ import hashlib import os import imageio import numpy as np from torchvision.transforms import transforms from allenact.utils.tensor_utils import ScaleBothSides from constants import ABS_PATH_OF_TOP_LEVEL_DIR to_pil = transforms.ToPILImage() # Same as used by the vision sensors class TestPillowRescaling(object): def _load_thor_img(self) -> np.ndarray: img_path = os.path.join( ABS_PATH_OF_TOP_LEVEL_DIR, "docs/img/iTHOR_framework.jpg" ) img = imageio.v2.imread(img_path) return img def _get_img_hash(self, img: np.ndarray) -> str: img_hash = hashlib.sha1(np.ascontiguousarray(img)) return img_hash.hexdigest() def _random_rgb_image(self, width: int, height: int, seed: int) -> np.ndarray: s = np.random.get_state() np.random.seed(seed) img = np.random.randint( low=0, high=256, size=(width, height, 3), dtype=np.uint8 ) np.random.set_state(s) return img def _random_depthmap( self, width: int, height: int, max_depth: float, seed: int ) -> np.ndarray: s = np.random.get_state() np.random.seed(seed) img = max_depth * np.random.rand(width, height, 1) np.random.set_state(s) return np.float32(img) def test_scaler_rgb_thor(self): thor_img_arr = np.uint8(self._load_thor_img()) assert self._get_img_hash(thor_img_arr) in [ "80ff8a342b4f74966796eee91babde31409d0457", "eb808b2218ccc2e56144131f9ef596a5c2ae3e2a", ] img = to_pil(thor_img_arr) scaler = ScaleBothSides(width=75, height=75) scaled_img = np.array(scaler(img)) assert self._get_img_hash(scaled_img) in [ "2c47057aa188240cb21b2edc39e0f269c1085bac", "b5df3cc03f181cb7be07ddd229cac8d1efd5d077", ] scaler = ScaleBothSides(width=500, height=600) scaled_img = np.array(scaler(img)) assert self._get_img_hash(scaled_img) in [ "faf0be2b9ec9bfd23a1b7b465c86ad961d03c259", "cccddd7f17b59434dcdd0006dceeffbe1a969dc8", ] def test_scaler_rgb_random(self): arr = self._random_rgb_image(width=100, height=100, seed=1) assert self._get_img_hash(arr) == "d01bd8ba151ab790fde9a8cc29aa8a3c63147334" img = to_pil(arr) scaler = ScaleBothSides(width=60, height=60) scaled_img = np.array(scaler(img)) assert ( self._get_img_hash(scaled_img) == "22473537e50d5e39abeeec4f92dbfde51c754010" ) scaler = ScaleBothSides(width=1000, height=800) scaled_img = np.array(scaler(img)) assert ( self._get_img_hash(scaled_img) == "5e5b955981e4ee3b5e22287536040d001a31fbd3" ) def test_scaler_depth_thor(self): thor_depth_arr = 5 * np.float32(self._load_thor_img()).sum(-1) thor_depth_arr /= thor_depth_arr.max() assert self._get_img_hash(thor_depth_arr) in [ "d3c1474400ba57ed78f52cf4ba6a4c2a1d90516c", "85a18befb2a174403079bf49d149630f829222c2", ] img = to_pil(thor_depth_arr) scaler = ScaleBothSides(width=75, height=75) scaled_img = np.array(scaler(img)) assert self._get_img_hash(scaled_img) in [ "6a879beb6bed49021e438c1e3af7a62c428a44d8", "868f1d2b32167bda524ba502158f1ee81c8a24d2", ] scaler = ScaleBothSides(width=500, height=600) scaled_img = np.array(scaler(img)) assert self._get_img_hash(scaled_img) in [ "79f11fb741ae638afca40125e4c501f54b22cc01", "2d3012e1cced2942f7368e84bf332241fcf9d7fe", ] def test_scaler_depth_random(self): depth_arr = self._random_depthmap(width=96, height=103, max_depth=5.0, seed=1) assert ( self._get_img_hash(depth_arr) == "cbd8ca127951ffafb6848536d9d731970a5397e9" ) img = to_pil(depth_arr) scaler = ScaleBothSides(width=60, height=60) scaled_img = np.array(scaler(img)) assert ( self._get_img_hash(scaled_img) == "5bed173f2d783fb2badcde9b43904ef85a1a5820" ) scaler = ScaleBothSides(width=1000, height=800) scaled_img = np.array(scaler(img)) assert ( self._get_img_hash(scaled_img) == "9dceb7f77d767888f24a84c00913c0cf4ccd9d49" ) if __name__ == "__main__": TestPillowRescaling().test_scaler_rgb_thor() TestPillowRescaling().test_scaler_rgb_random() TestPillowRescaling().test_scaler_depth_thor() TestPillowRescaling().test_scaler_depth_random()